Fix vllm gptq awq error (#12863)
* fix gptq awq error * fix python style
This commit is contained in:
parent
a488981f3f
commit
348dc8056d
1 changed files with 6 additions and 4 deletions
|
|
@ -93,10 +93,12 @@ def get_load_function(low_bit):
|
||||||
vllm_config=new_vllm_config
|
vllm_config=new_vllm_config
|
||||||
)
|
)
|
||||||
if self.vllm_config.model_config.low_bit_model_path is None:
|
if self.vllm_config.model_config.low_bit_model_path is None:
|
||||||
if "qwen" in self.vllm_config.model_config.model.lower() or \
|
if ("qwen" in self.vllm_config.model_config.model.lower() or
|
||||||
"baichuan" in self.vllm_config.model_config.model.lower() or \
|
"baichuan" in self.vllm_config.model_config.model.lower() or
|
||||||
"codegeex4-all" in self.vllm_config.model_config.model.lower() or \
|
"codegeex4-all" in self.vllm_config.model_config.model.lower() or
|
||||||
"chatglm" in self.vllm_config.model_config.model.lower():
|
"chatglm" in self.vllm_config.model_config.model.lower()) and \
|
||||||
|
"gptq" not in self.model_config.model.lower() and \
|
||||||
|
"awq" not in self.model_config.model.lower():
|
||||||
self.model.apply(padding_mlp)
|
self.model.apply(padding_mlp)
|
||||||
from ipex_llm import optimize_model
|
from ipex_llm import optimize_model
|
||||||
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
|
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue