Fix multimodal errors (#13178)

* fix glm4v int4 output error * fix glm-4v qwen2.5-vl fp16 error * update
2025-05-22 15:39:27 +08:00 · 2025-05-22 15:39:27 +08:00 · e3130a06ed
commit e3130a06ed
parent 154af7d7f7
1 changed files with 6 additions and 0 deletions
--- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
+++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
@ -126,6 +126,12 @@ def get_load_function(low_bit):
                    modules = ["down_proj"]
                if "whisper" in self.vllm_config.model_config.model.lower():
                    modules = ["proj_out"]
                if "glm-4v" in self.vllm_config.model_config.model.lower() and \
                        low_bit in ("sym_int4", "woq_int4"):
                    modules = ["dense_4h_to_h"]
                if low_bit == "fp16":
                    # to fix qwen2.5-vl and glm-4v
                    modules = ["vision", "visual"]
                optimize_model(self.model,
                               low_bit=low_bit,
                               torch_dtype=self.vllm_config.model_config.dtype,