diff --git a/python/llm/src/ipex_llm/transformers/convert_ipex.py b/python/llm/src/ipex_llm/transformers/convert_ipex.py index a934a1dd..b36d75fe 100644 --- a/python/llm/src/ipex_llm/transformers/convert_ipex.py +++ b/python/llm/src/ipex_llm/transformers/convert_ipex.py @@ -138,7 +138,7 @@ def _ipex_optimize_model(model, rms_classes, qtype): } qconfig = ipex.quantization.get_weight_only_quant_qconfig_mapping( weight_dtype=torch.qint8, # INT8 - lowp_mode=ipex.quantization.WoqLowpMode.INT8, + lowp_mode=ipex.quantization.WoqLowpMode.BF16, act_quant_mode=act_quant_mode_dict["PER_IC_BLOCK"], group_size=-1, )