Configurable TORCH_LINEAR_THRESHOLD from env (#9588)

* Add TORCH_LINEAR_THRESHOLD from env (BIGDL_LLM_LINEAR_THRESHOLD)
* Change default to 512
This commit is contained in:
Qiyuan Gong 2023-12-05 13:19:47 +08:00 committed by GitHub
parent 1012507a40
commit f211f136b6

View file

@ -59,7 +59,7 @@ import ctypes
from bigdl.llm.ggml.quantize import ggml_tensor_qtype from bigdl.llm.ggml.quantize import ggml_tensor_qtype
IS_SERVER = is_server() IS_SERVER = is_server()
IS_SPR = is_spr() IS_SPR = is_spr()
TORCH_LINEAR_THRESHOLD = 96 TORCH_LINEAR_THRESHOLD = int(os.getenv("BIGDL_LLM_LINEAR_THRESHOLD", "512"))
SYM_INT4 = ggml_tensor_qtype["sym_int4"] SYM_INT4 = ggml_tensor_qtype["sym_int4"]
ASYM_INT4 = ggml_tensor_qtype["asym_int4"] ASYM_INT4 = ggml_tensor_qtype["asym_int4"]
SYM_INT8 = ggml_tensor_qtype["sym_int8"] SYM_INT8 = ggml_tensor_qtype["sym_int8"]