support hqq (#12518)

* support

* fix
This commit is contained in:
Ruonan Wang 2024-12-10 23:43:02 -08:00 committed by GitHub
parent 68f2873bd3
commit 588bfa24dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -97,7 +97,8 @@ def replace_with_QuantizedLinear(layer, qtype, device, modules_to_not_convert,
if (layer.in_features == 18944 and layer.out_features == 3584): if (layer.in_features == 18944 and layer.out_features == 3584):
qtype = "sym_int8_rtn" qtype = "sym_int8_rtn"
iqtype = ggml_tensor_qtype[qtype] iqtype = ggml_tensor_qtype[qtype]
enable_scale_search = os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0" enable_scale_search = (os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0" or
os.environ.get("IPEX_LLM_NPU_QUANTIZATION_HQQ", "0") != "0")
qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32), qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32),
iqtype, device=device, iqtype, device=device,
enable_scale_search=enable_scale_search, enable_scale_search=enable_scale_search,
@ -123,7 +124,8 @@ def replace_with_DequantizedLinear(layer, qtype, device, modules_to_not_convert,
if (layer.in_features == 3584 and layer.out_features == 152064): if (layer.in_features == 3584 and layer.out_features == 152064):
qtype = "sym_int8_rtn" qtype = "sym_int8_rtn"
iqtype = ggml_tensor_qtype[qtype] iqtype = ggml_tensor_qtype[qtype]
enable_scale_search = os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0" enable_scale_search = (os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0" or
os.environ.get("IPEX_LLM_NPU_QUANTIZATION_HQQ", "0") != "0")
qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32), qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32),
iqtype, device=device, iqtype, device=device,
enable_scale_search=enable_scale_search, enable_scale_search=enable_scale_search,