Fix multimodal errors (#13178)
* fix glm4v int4 output error * fix glm-4v qwen2.5-vl fp16 error * update
This commit is contained in:
parent
154af7d7f7
commit
e3130a06ed
1 changed files with 6 additions and 0 deletions
|
|
@ -126,6 +126,12 @@ def get_load_function(low_bit):
|
|||
modules = ["down_proj"]
|
||||
if "whisper" in self.vllm_config.model_config.model.lower():
|
||||
modules = ["proj_out"]
|
||||
if "glm-4v" in self.vllm_config.model_config.model.lower() and \
|
||||
low_bit in ("sym_int4", "woq_int4"):
|
||||
modules = ["dense_4h_to_h"]
|
||||
if low_bit == "fp16":
|
||||
# to fix qwen2.5-vl and glm-4v
|
||||
modules = ["vision", "visual"]
|
||||
optimize_model(self.model,
|
||||
low_bit=low_bit,
|
||||
torch_dtype=self.vllm_config.model_config.dtype,
|
||||
|
|
|
|||
Loading…
Reference in a new issue