add vLLM glm4 fix (#12474)
This commit is contained in:
		
							parent
							
								
									4b6c3160be
								
							
						
					
					
						commit
						59bd4a214f
					
				
					 1 changed files with 1 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -94,9 +94,7 @@ def get_load_function(low_bit):
 | 
			
		|||
            from ipex_llm import optimize_model
 | 
			
		||||
            import os
 | 
			
		||||
            not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
 | 
			
		||||
            is_glm4_model = "glm-4" in self.model_config.model.lower()
 | 
			
		||||
            is_codegeex4_model = "codegeex4-all" in self.model_config.model.lower()
 | 
			
		||||
            if not_convert_last_mlp is not None or is_glm4_model or is_codegeex4_model:
 | 
			
		||||
            if not_convert_last_mlp is not None:
 | 
			
		||||
                # only use to avoid nan value in last mlp forward running glm4-9b-chat
 | 
			
		||||
                modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
 | 
			
		||||
            else:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue