Hotfix of BCE-Emdedding model (#12490)
This commit is contained in:
parent
80f15e41f5
commit
c59284418c
2 changed files with 3 additions and 3 deletions
|
|
@ -766,7 +766,7 @@ class EmbeddingModel(_BaseAutoModelClass):
|
||||||
optimize_llm_pre(model, qtype, mixed_precision,
|
optimize_llm_pre(model, qtype, mixed_precision,
|
||||||
quantization_group_size=quantization_group_size)
|
quantization_group_size=quantization_group_size)
|
||||||
cls.load_convert_fp16(qtype, model.encoder, "cpu", modules_to_not_convert,
|
cls.load_convert_fp16(qtype, model.encoder, "cpu", modules_to_not_convert,
|
||||||
quantization_group_size, None, *args, **kwargs)
|
quantization_group_size)
|
||||||
create_npu_kernels(model.encoder)
|
create_npu_kernels(model.encoder)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
logger.info(f"Finish to convert model")
|
logger.info(f"Finish to convert model")
|
||||||
|
|
@ -781,7 +781,7 @@ class EmbeddingModel(_BaseAutoModelClass):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load_convert_fp16(cls, q_k, optimize_model, device, modules_to_not_convert,
|
def load_convert_fp16(cls, q_k, optimize_model, device, modules_to_not_convert,
|
||||||
group_size=0, imatrix_data=None, *arg, **kwarg):
|
group_size=0, imatrix_data=None):
|
||||||
from ipex_llm.transformers.npu_models.xlm_mp import replace_with_FP16Linear
|
from ipex_llm.transformers.npu_models.xlm_mp import replace_with_FP16Linear
|
||||||
replace_with_FP16Linear(optimize_model, q_k, device=device,
|
replace_with_FP16Linear(optimize_model, q_k, device=device,
|
||||||
modules_to_not_convert=modules_to_not_convert,
|
modules_to_not_convert=modules_to_not_convert,
|
||||||
|
|
|
||||||
|
|
@ -711,7 +711,7 @@ class XLMLayerNorm(torch.nn.Module):
|
||||||
|
|
||||||
@module_optimization
|
@module_optimization
|
||||||
def replace_with_Layernorm(layer, qtype=None, device='NPU',
|
def replace_with_Layernorm(layer, qtype=None, device='NPU',
|
||||||
modules_to_not_convert=[], group_size=0):
|
modules_to_not_convert=[], group_size=0, **kwargs):
|
||||||
if isinstance(layer, torch.nn.LayerNorm):
|
if isinstance(layer, torch.nn.LayerNorm):
|
||||||
return XLMLayerNorm(
|
return XLMLayerNorm(
|
||||||
weight=layer.weight.to(torch.float16),
|
weight=layer.weight.to(torch.float16),
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue