diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py index 15a9818a..73783c97 100644 --- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py +++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py @@ -93,10 +93,12 @@ def get_load_function(low_bit): vllm_config=new_vllm_config ) if self.vllm_config.model_config.low_bit_model_path is None: - if "qwen" in self.vllm_config.model_config.model.lower() or \ - "baichuan" in self.vllm_config.model_config.model.lower() or \ - "codegeex4-all" in self.vllm_config.model_config.model.lower() or \ - "chatglm" in self.vllm_config.model_config.model.lower(): + if ("qwen" in self.vllm_config.model_config.model.lower() or + "baichuan" in self.vllm_config.model_config.model.lower() or + "codegeex4-all" in self.vllm_config.model_config.model.lower() or + "chatglm" in self.vllm_config.model_config.model.lower()) and \ + "gptq" not in self.model_config.model.lower() and \ + "awq" not in self.model_config.model.lower(): self.model.apply(padding_mlp) from ipex_llm import optimize_model not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)