Fix multimodal errors (#13178)
* fix glm4v int4 output error * fix glm-4v qwen2.5-vl fp16 error * update
This commit is contained in:
		
							parent
							
								
									154af7d7f7
								
							
						
					
					
						commit
						e3130a06ed
					
				
					 1 changed files with 6 additions and 0 deletions
				
			
		| 
						 | 
					@ -126,6 +126,12 @@ def get_load_function(low_bit):
 | 
				
			||||||
                    modules = ["down_proj"]
 | 
					                    modules = ["down_proj"]
 | 
				
			||||||
                if "whisper" in self.vllm_config.model_config.model.lower():
 | 
					                if "whisper" in self.vllm_config.model_config.model.lower():
 | 
				
			||||||
                    modules = ["proj_out"]
 | 
					                    modules = ["proj_out"]
 | 
				
			||||||
 | 
					                if "glm-4v" in self.vllm_config.model_config.model.lower() and \
 | 
				
			||||||
 | 
					                        low_bit in ("sym_int4", "woq_int4"):
 | 
				
			||||||
 | 
					                    modules = ["dense_4h_to_h"]
 | 
				
			||||||
 | 
					                if low_bit == "fp16":
 | 
				
			||||||
 | 
					                    # to fix qwen2.5-vl and glm-4v
 | 
				
			||||||
 | 
					                    modules = ["vision", "visual"]
 | 
				
			||||||
                optimize_model(self.model,
 | 
					                optimize_model(self.model,
 | 
				
			||||||
                               low_bit=low_bit,
 | 
					                               low_bit=low_bit,
 | 
				
			||||||
                               torch_dtype=self.vllm_config.model_config.dtype,
 | 
					                               torch_dtype=self.vllm_config.model_config.dtype,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue