update inter_pp of qwen2 (#12041)

This commit is contained in:
Ruonan Wang 2024-09-09 19:34:17 -07:00 committed by GitHub
parent 048b4590aa
commit 640998edea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -156,7 +156,7 @@ def optimize_llm(
if intra_pp is None:
intra_pp = 2
if inter_pp is None:
inter_pp = 4 if model.config.intermediate_size == 18944 else 1
inter_pp = 2 if model.config.intermediate_size == 18944 else 1
from ipex_llm.transformers.npu_models.qwen2_mp import gen_qwen2_fused_model_forward
from ipex_llm.transformers.npu_models.qwen2_mp import DecodeRunner, PrefillRunner