From e211a5b076f37071261db03e54fbe1b46ebde219 Mon Sep 17 00:00:00 2001 From: SONG Ge <38711238+sgwhat@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:08:01 +0800 Subject: [PATCH] update minicpm to meet latest refactor (#11937) --- .../llm/src/ipex_llm/transformers/npu_models/convert_mp.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index 500c2e2d..698ec896 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -95,6 +95,12 @@ def optimize_llm( from ipex_llm.transformers.npu_models.qwen2_mp import qwen2_casullm_forward convert_forward(model, Qwen2ForCausalLM, qwen2_casullm_forward) elif model.config.model_type == "minicpm": + # for minicpm-1b + if intra_pp is None: + intra_pp = 2 + if inter_pp is None: + inter_pp = 2 + from ipex_llm.transformers.npu_models.minicpm_mp import gen_minicpm_fused_model_forward from ipex_llm.transformers.npu_models.minicpm_mp import DecodeRunner, PrefillRunner