Add fused_mlp to glm4v models (#12378)

This commit is contained in:
Yuwen Hu 2024-11-11 17:10:25 +08:00 committed by GitHub
parent dc34e8c51f
commit e0918934c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1056,13 +1056,14 @@ def _optimize_pre(model, qtype=None):
from ipex_llm.transformers.models.chatglm2 import split_mlp from ipex_llm.transformers.models.chatglm2 import split_mlp
model.apply(split_mlp) model.apply(split_mlp)
elif isinstance(model.config.eos_token_id, list): elif isinstance(model.config.eos_token_id, list):
from ipex_llm.transformers.models.chatglm2 import split_mlp
# glm4 family # glm4 family
if hasattr(model.transformer, "vision"): if hasattr(model.transformer, "vision"):
if model.config.num_layers != 40: if model.config.num_layers != 40:
from ipex_llm.transformers.models.chatglm4v import merge_qkv from ipex_llm.transformers.models.chatglm4v import merge_qkv
model.apply(merge_qkv) model.apply(merge_qkv)
model.apply(split_mlp)
elif model.config.num_layers in [40, 28]: elif model.config.num_layers in [40, 28]:
from ipex_llm.transformers.models.chatglm2 import split_mlp
model.apply(split_mlp) model.apply(split_mlp)
return model return model
@ -1459,6 +1460,8 @@ def _optimize_post(model, lightweight_bmm=False):
convert_forward(model, SiglipAttention, siglip_attention_forward) convert_forward(model, SiglipAttention, siglip_attention_forward)
from ipex_llm.transformers.models.chatglm4v import vision_model_forward from ipex_llm.transformers.models.chatglm4v import vision_model_forward
convert_forward(model, vision_module.VisionModel, vision_model_forward) convert_forward(model, vision_module.VisionModel, vision_model_forward)
from ipex_llm.transformers.models.chatglm2 import mlp_forward
convert_forward(model, module.MLP, mlp_forward)
elif model.config.num_layers in [40, 28]: elif model.config.num_layers in [40, 28]:
# glm-4-9b # glm-4-9b