Configurable TORCH_LINEAR_THRESHOLD from env (#9588)
* Add TORCH_LINEAR_THRESHOLD from env (BIGDL_LLM_LINEAR_THRESHOLD) * Change default to 512
This commit is contained in:
		
							parent
							
								
									1012507a40
								
							
						
					
					
						commit
						f211f136b6
					
				
					 1 changed files with 1 additions and 1 deletions
				
			
		| 
						 | 
					@ -59,7 +59,7 @@ import ctypes
 | 
				
			||||||
from bigdl.llm.ggml.quantize import ggml_tensor_qtype
 | 
					from bigdl.llm.ggml.quantize import ggml_tensor_qtype
 | 
				
			||||||
IS_SERVER = is_server()
 | 
					IS_SERVER = is_server()
 | 
				
			||||||
IS_SPR = is_spr()
 | 
					IS_SPR = is_spr()
 | 
				
			||||||
TORCH_LINEAR_THRESHOLD = 96
 | 
					TORCH_LINEAR_THRESHOLD = int(os.getenv("BIGDL_LLM_LINEAR_THRESHOLD", "512"))
 | 
				
			||||||
SYM_INT4 = ggml_tensor_qtype["sym_int4"]
 | 
					SYM_INT4 = ggml_tensor_qtype["sym_int4"]
 | 
				
			||||||
ASYM_INT4 = ggml_tensor_qtype["asym_int4"]
 | 
					ASYM_INT4 = ggml_tensor_qtype["asym_int4"]
 | 
				
			||||||
SYM_INT8 = ggml_tensor_qtype["sym_int8"]
 | 
					SYM_INT8 = ggml_tensor_qtype["sym_int8"]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue