From e3130a06ed3bc9528e3c3d1cecc1b51732a860e2 Mon Sep 17 00:00:00 2001
From: "Wang, Jian4" <61138589+hzjane@users.noreply.github.com>
Date: Thu, 22 May 2025 15:39:27 +0800
Subject: [PATCH] Fix multimodal errors (#13178)

* fix glm4v int4 output error

* fix glm-4v qwen2.5-vl fp16 error

* update
---
 python/llm/src/ipex_llm/vllm/xpu/model_convert.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
index b842c2a8..b6e73cde 100644
--- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
+++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
@@ -126,6 +126,12 @@ def get_load_function(low_bit):
                     modules = ["down_proj"]
                 if "whisper" in self.vllm_config.model_config.model.lower():
                     modules = ["proj_out"]
+                if "glm-4v" in self.vllm_config.model_config.model.lower() and \
+                        low_bit in ("sym_int4", "woq_int4"):
+                    modules = ["dense_4h_to_h"]
+                if low_bit == "fp16":
+                    # to fix qwen2.5-vl and glm-4v
+                    modules = ["vision", "visual"]
                 optimize_model(self.model,
                                low_bit=low_bit,
                                torch_dtype=self.vllm_config.model_config.dtype,