Add basic glm4v support (#12345)
This commit is contained in:
		
							parent
							
								
									69e3a56943
								
							
						
					
					
						commit
						c8b7265359
					
				
					 1 changed files with 26 additions and 36 deletions
				
			
		| 
						 | 
				
			
			@ -1422,52 +1422,42 @@ def _optimize_post(model, lightweight_bmm=False):
 | 
			
		|||
                            module.SelfAttention,
 | 
			
		||||
                            chatglm_attention_forward
 | 
			
		||||
                            )
 | 
			
		||||
        elif model.config.num_layers == 40 and hasattr(model.config, 'rope_ratio'):
 | 
			
		||||
        elif isinstance(model.config.eos_token_id, list):
 | 
			
		||||
            # glm4 family
 | 
			
		||||
            modeling_module_name = model.__class__.__module__
 | 
			
		||||
            module = importlib.import_module(modeling_module_name)
 | 
			
		||||
 | 
			
		||||
            from ipex_llm.transformers.models.chatglm2 import chatglm_rms_norm_forward
 | 
			
		||||
            convert_forward(model, module.RMSNorm, chatglm_rms_norm_forward)
 | 
			
		||||
 | 
			
		||||
            if hasattr(model.transformer, "vision"):
 | 
			
		||||
                # glm-4v-9b
 | 
			
		||||
                # glm4 vision family
 | 
			
		||||
                modeling_module_name = model.transformer.vision.__class__.__module__
 | 
			
		||||
                vision_module = importlib.import_module(modeling_module_name)
 | 
			
		||||
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm4v import chatglm4v_attention_forward
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm4v import chatglm4v_model_forward
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm4v import visual_attention_forward
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm4v import patch_embedding_forward
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm2 import chatglm_rms_norm_forward
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                module.SelfAttention,
 | 
			
		||||
                                chatglm4v_attention_forward)
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                module.ChatGLMModel,
 | 
			
		||||
                                chatglm4v_model_forward)
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                module.RMSNorm,
 | 
			
		||||
                                chatglm_rms_norm_forward)
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                vision_module.Attention,
 | 
			
		||||
                                visual_attention_forward)
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                vision_module.PatchEmbedding,
 | 
			
		||||
                                patch_embedding_forward)
 | 
			
		||||
            else:
 | 
			
		||||
                # glm-4-9b-chat
 | 
			
		||||
                convert_forward(model, module.SelfAttention, chatglm4v_attention_forward)
 | 
			
		||||
                convert_forward(model, module.ChatGLMModel, chatglm4v_model_forward)
 | 
			
		||||
 | 
			
		||||
                if model.config.num_layers == 40:
 | 
			
		||||
                    # glm-4v-9b
 | 
			
		||||
                    from ipex_llm.transformers.models.chatglm4v import visual_attention_forward
 | 
			
		||||
                    from ipex_llm.transformers.models.chatglm4v import patch_embedding_forward
 | 
			
		||||
                    convert_forward(model, vision_module.Attention, visual_attention_forward)
 | 
			
		||||
                    convert_forward(model, vision_module.PatchEmbedding, patch_embedding_forward)
 | 
			
		||||
                else:
 | 
			
		||||
                    # todo
 | 
			
		||||
                    pass
 | 
			
		||||
 | 
			
		||||
            elif model.config.num_layers == 40:
 | 
			
		||||
                # glm-4-9b
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm4 import chatglm4_attention_forward
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm4 import chatglm4_model_forward
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm2 import chatglm_rms_norm_forward
 | 
			
		||||
                from ipex_llm.transformers.models.chatglm4 import chatglm4_encoder_forward
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                module.SelfAttention,
 | 
			
		||||
                                chatglm4_attention_forward)
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                module.ChatGLMModel,
 | 
			
		||||
                                chatglm4_model_forward)
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                module.RMSNorm,
 | 
			
		||||
                                chatglm_rms_norm_forward)
 | 
			
		||||
                convert_forward(model,
 | 
			
		||||
                                module.GLMTransformer,
 | 
			
		||||
                                chatglm4_encoder_forward)
 | 
			
		||||
 | 
			
		||||
                convert_forward(model, module.SelfAttention, chatglm4_attention_forward)
 | 
			
		||||
                convert_forward(model, module.ChatGLMModel, chatglm4_model_forward)
 | 
			
		||||
                convert_forward(model, module.GLMTransformer, chatglm4_encoder_forward)
 | 
			
		||||
    elif "mpt" in model.config.model_type:
 | 
			
		||||
        if model.config.architectures is not None:
 | 
			
		||||
            modeling_module_name = model.__class__.__module__
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue