diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py index b842c2a8..b6e73cde 100644 --- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py +++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py @@ -126,6 +126,12 @@ def get_load_function(low_bit): modules = ["down_proj"] if "whisper" in self.vllm_config.model_config.model.lower(): modules = ["proj_out"] + if "glm-4v" in self.vllm_config.model_config.model.lower() and \ + low_bit in ("sym_int4", "woq_int4"): + modules = ["dense_4h_to_h"] + if low_bit == "fp16": + # to fix qwen2.5-vl and glm-4v + modules = ["vision", "visual"] optimize_model(self.model, low_bit=low_bit, torch_dtype=self.vllm_config.model_config.dtype,