From f211f136b683d5b07cb07bdf79116e5182a9e53d Mon Sep 17 00:00:00 2001 From: Qiyuan Gong Date: Tue, 5 Dec 2023 13:19:47 +0800 Subject: [PATCH] Configurable TORCH_LINEAR_THRESHOLD from env (#9588) * Add TORCH_LINEAR_THRESHOLD from env (BIGDL_LLM_LINEAR_THRESHOLD) * Change default to 512 --- python/llm/src/bigdl/llm/transformers/low_bit_linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/src/bigdl/llm/transformers/low_bit_linear.py b/python/llm/src/bigdl/llm/transformers/low_bit_linear.py index 833f11ac..a95623ff 100644 --- a/python/llm/src/bigdl/llm/transformers/low_bit_linear.py +++ b/python/llm/src/bigdl/llm/transformers/low_bit_linear.py @@ -59,7 +59,7 @@ import ctypes from bigdl.llm.ggml.quantize import ggml_tensor_qtype IS_SERVER = is_server() IS_SPR = is_spr() -TORCH_LINEAR_THRESHOLD = 96 +TORCH_LINEAR_THRESHOLD = int(os.getenv("BIGDL_LLM_LINEAR_THRESHOLD", "512")) SYM_INT4 = ggml_tensor_qtype["sym_int4"] ASYM_INT4 = ggml_tensor_qtype["asym_int4"] SYM_INT8 = ggml_tensor_qtype["sym_int8"]