Hotfix of BCE-Emdedding model (#12490)
This commit is contained in:
		
							parent
							
								
									80f15e41f5
								
							
						
					
					
						commit
						c59284418c
					
				
					 2 changed files with 3 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -766,7 +766,7 @@ class EmbeddingModel(_BaseAutoModelClass):
 | 
			
		|||
            optimize_llm_pre(model, qtype, mixed_precision,
 | 
			
		||||
                             quantization_group_size=quantization_group_size)
 | 
			
		||||
            cls.load_convert_fp16(qtype, model.encoder, "cpu", modules_to_not_convert,
 | 
			
		||||
                                  quantization_group_size, None, *args, **kwargs)
 | 
			
		||||
                                  quantization_group_size)
 | 
			
		||||
            create_npu_kernels(model.encoder)
 | 
			
		||||
        model = model.eval()
 | 
			
		||||
        logger.info(f"Finish to convert model")
 | 
			
		||||
| 
						 | 
				
			
			@ -781,7 +781,7 @@ class EmbeddingModel(_BaseAutoModelClass):
 | 
			
		|||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def load_convert_fp16(cls, q_k, optimize_model, device, modules_to_not_convert,
 | 
			
		||||
                          group_size=0, imatrix_data=None, *arg, **kwarg):
 | 
			
		||||
                          group_size=0, imatrix_data=None):
 | 
			
		||||
        from ipex_llm.transformers.npu_models.xlm_mp import replace_with_FP16Linear
 | 
			
		||||
        replace_with_FP16Linear(optimize_model, q_k, device=device,
 | 
			
		||||
                                modules_to_not_convert=modules_to_not_convert,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -711,7 +711,7 @@ class XLMLayerNorm(torch.nn.Module):
 | 
			
		|||
 | 
			
		||||
@module_optimization
 | 
			
		||||
def replace_with_Layernorm(layer, qtype=None, device='NPU',
 | 
			
		||||
                           modules_to_not_convert=[], group_size=0):
 | 
			
		||||
                           modules_to_not_convert=[], group_size=0, **kwargs):
 | 
			
		||||
    if isinstance(layer, torch.nn.LayerNorm):
 | 
			
		||||
        return XLMLayerNorm(
 | 
			
		||||
            weight=layer.weight.to(torch.float16),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue