Fix multimodal errors (#13178)

* fix glm4v int4 output error

* fix glm-4v qwen2.5-vl fp16 error

* update
This commit is contained in:
Wang, Jian4 2025-05-22 15:39:27 +08:00 committed by GitHub
parent 154af7d7f7
commit e3130a06ed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -126,6 +126,12 @@ def get_load_function(low_bit):
modules = ["down_proj"]
if "whisper" in self.vllm_config.model_config.model.lower():
modules = ["proj_out"]
if "glm-4v" in self.vllm_config.model_config.model.lower() and \
low_bit in ("sym_int4", "woq_int4"):
modules = ["dense_4h_to_h"]
if low_bit == "fp16":
# to fix qwen2.5-vl and glm-4v
modules = ["vision", "visual"]
optimize_model(self.model,
low_bit=low_bit,
torch_dtype=self.vllm_config.model_config.dtype,