set woq_int4 as default int4 (#13021)
This commit is contained in:
parent
6693e8ab04
commit
10c30cdba9
1 changed files with 9 additions and 0 deletions
|
|
@ -1089,6 +1089,15 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
|
||||||
embedding_qtype=None,
|
embedding_qtype=None,
|
||||||
mixed_precision=False,
|
mixed_precision=False,
|
||||||
disable_optimize_pre=False):
|
disable_optimize_pre=False):
|
||||||
|
if qtype == ggml_tensor_qtype["sym_int4"] and torch.__version__ >= "2.6":
|
||||||
|
logger.warning("sym_int4 is deprecated, use woq_int4 instead, "
|
||||||
|
"if you are loading saved sym_int4 low bit model, "
|
||||||
|
"please resaved it with woq_int4")
|
||||||
|
qtype = ggml_tensor_qtype["woq_int4"]
|
||||||
|
elif qtype == ggml_tensor_qtype["woq_int4"] and torch.__version__ < "2.6":
|
||||||
|
logger.warning("woq_int4 is not supported with pytorch<2.6, "
|
||||||
|
"use sym_int4 instead or use ipex-llm with pytorch>=2.6")
|
||||||
|
qtype = ggml_tensor_qtype["sym_int4"]
|
||||||
if qtype in ggml_tensor_qtype.values():
|
if qtype in ggml_tensor_qtype.values():
|
||||||
index = list(ggml_tensor_qtype.values()).index(qtype)
|
index = list(ggml_tensor_qtype.values()).index(qtype)
|
||||||
logger.info(f"Converting the current model to "
|
logger.info(f"Converting the current model to "
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue