Configurable TORCH_LINEAR_THRESHOLD from env (#9588)
* Add TORCH_LINEAR_THRESHOLD from env (BIGDL_LLM_LINEAR_THRESHOLD) * Change default to 512
This commit is contained in:
parent
1012507a40
commit
f211f136b6
1 changed files with 1 additions and 1 deletions
|
|
@ -59,7 +59,7 @@ import ctypes
|
||||||
from bigdl.llm.ggml.quantize import ggml_tensor_qtype
|
from bigdl.llm.ggml.quantize import ggml_tensor_qtype
|
||||||
IS_SERVER = is_server()
|
IS_SERVER = is_server()
|
||||||
IS_SPR = is_spr()
|
IS_SPR = is_spr()
|
||||||
TORCH_LINEAR_THRESHOLD = 96
|
TORCH_LINEAR_THRESHOLD = int(os.getenv("BIGDL_LLM_LINEAR_THRESHOLD", "512"))
|
||||||
SYM_INT4 = ggml_tensor_qtype["sym_int4"]
|
SYM_INT4 = ggml_tensor_qtype["sym_int4"]
|
||||||
ASYM_INT4 = ggml_tensor_qtype["asym_int4"]
|
ASYM_INT4 = ggml_tensor_qtype["asym_int4"]
|
||||||
SYM_INT8 = ggml_tensor_qtype["sym_int8"]
|
SYM_INT8 = ggml_tensor_qtype["sym_int8"]
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue