LLM: Fix bigdl_ipex_int8 warning (#10890)

2024-04-26 11:18:44 +08:00 · 2024-04-26 11:18:44 +08:00 · 3e8ed54270
commit 3e8ed54270
parent fb3c268d13
1 changed files with 1 additions and 1 deletions
--- a/python/llm/src/ipex_llm/transformers/convert_ipex.py
+++ b/python/llm/src/ipex_llm/transformers/convert_ipex.py
@ -138,7 +138,7 @@ def _ipex_optimize_model(model, rms_classes, qtype):
        }
        qconfig = ipex.quantization.get_weight_only_quant_qconfig_mapping(
            weight_dtype=torch.qint8,  # INT8
-            lowp_mode=ipex.quantization.WoqLowpMode.INT8,
+            lowp_mode=ipex.quantization.WoqLowpMode.BF16,
            act_quant_mode=act_quant_mode_dict["PER_IC_BLOCK"],
            group_size=-1,
        )