Fix qwen 14b run into register attention fwd (#11128)

* fix qwen 14b
This commit is contained in:
Zhao Changmin 2024-05-24 14:45:07 +08:00 committed by GitHub
parent 373f9e6c79
commit 65f4212f89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1249,7 +1249,8 @@ def _optimize_post(model, lightweight_bmm=False):
from ipex_llm.transformers.models.qwen import qwen_mlp_forward
from ipex_llm.transformers.models.chatglm2 import chatglm_rms_norm_forward
from ipex_llm.transformers.models.qwen import qwen_model_forward
if model.config.max_position_embeddings == 8192:
if model.config.max_position_embeddings == 8192 \
and model.config.hidden_size == 4096:
convert_forward(model,
module.QWenAttention,
qwen_attention_forward_registered