parent
68f2873bd3
commit
588bfa24dc
1 changed files with 4 additions and 2 deletions
|
|
@ -97,7 +97,8 @@ def replace_with_QuantizedLinear(layer, qtype, device, modules_to_not_convert,
|
||||||
if (layer.in_features == 18944 and layer.out_features == 3584):
|
if (layer.in_features == 18944 and layer.out_features == 3584):
|
||||||
qtype = "sym_int8_rtn"
|
qtype = "sym_int8_rtn"
|
||||||
iqtype = ggml_tensor_qtype[qtype]
|
iqtype = ggml_tensor_qtype[qtype]
|
||||||
enable_scale_search = os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0"
|
enable_scale_search = (os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0" or
|
||||||
|
os.environ.get("IPEX_LLM_NPU_QUANTIZATION_HQQ", "0") != "0")
|
||||||
qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32),
|
qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32),
|
||||||
iqtype, device=device,
|
iqtype, device=device,
|
||||||
enable_scale_search=enable_scale_search,
|
enable_scale_search=enable_scale_search,
|
||||||
|
|
@ -123,7 +124,8 @@ def replace_with_DequantizedLinear(layer, qtype, device, modules_to_not_convert,
|
||||||
if (layer.in_features == 3584 and layer.out_features == 152064):
|
if (layer.in_features == 3584 and layer.out_features == 152064):
|
||||||
qtype = "sym_int8_rtn"
|
qtype = "sym_int8_rtn"
|
||||||
iqtype = ggml_tensor_qtype[qtype]
|
iqtype = ggml_tensor_qtype[qtype]
|
||||||
enable_scale_search = os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0"
|
enable_scale_search = (os.environ.get("IPEX_LLM_NPU_QUANTIZATION_OPT", "0") != "0" or
|
||||||
|
os.environ.get("IPEX_LLM_NPU_QUANTIZATION_HQQ", "0") != "0")
|
||||||
qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32),
|
qweights, scale = ggml_convert_qtype(layer.weight.data.to(torch.float32),
|
||||||
iqtype, device=device,
|
iqtype, device=device,
|
||||||
enable_scale_search=enable_scale_search,
|
enable_scale_search=enable_scale_search,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue