set woq_int4 as default int4 (#13021)

2025-04-14 14:10:59 +08:00 · 2025-04-14 14:10:59 +08:00 · 10c30cdba9
commit 10c30cdba9
parent 6693e8ab04
1 changed files with 9 additions and 0 deletions
--- a/python/llm/src/ipex_llm/transformers/convert.py
+++ b/python/llm/src/ipex_llm/transformers/convert.py
@ -1089,6 +1089,15 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
                         embedding_qtype=None,
                         mixed_precision=False,
                         disable_optimize_pre=False):
    if qtype == ggml_tensor_qtype["sym_int4"] and torch.__version__ >= "2.6":
        logger.warning("sym_int4 is deprecated, use woq_int4 instead, "
                       "if you are loading saved sym_int4 low bit model, "
                       "please resaved it with woq_int4")
        qtype = ggml_tensor_qtype["woq_int4"]
    elif qtype == ggml_tensor_qtype["woq_int4"] and torch.__version__ < "2.6":
        logger.warning("woq_int4 is not supported with pytorch<2.6, "
                       "use sym_int4 instead or use ipex-llm with pytorch>=2.6")
        qtype = ggml_tensor_qtype["sym_int4"]
    if qtype in ggml_tensor_qtype.values():
        index = list(ggml_tensor_qtype.values()).index(qtype)
        logger.info(f"Converting the current model to "