From d880e534d20d3cb22874eb6331eebda6edb8ba85 Mon Sep 17 00:00:00 2001 From: Yina Chen <33650826+cyita@users.noreply.github.com> Date: Thu, 7 Nov 2024 05:19:55 +0200 Subject: [PATCH] [NPU] acclib llama3.2 support groupwise (#12355) * change inter_pp * add comment --- .../ipex_llm/transformers/npu_models/convert_mp.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index d00a0885..f7e00bb8 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -337,7 +337,17 @@ def optimize_llm( if intra_pp is None: intra_pp = 2 if inter_pp is None: - inter_pp = 2 if group_size == 0 else 8 + if group_size == 0: + inter_pp = 2 + # llama3.2 + elif model.config.intermediate_size == 8192: + # llama3.2 1b + if model.config.hidden_size == 2048: + inter_pp = 1 + else: + inter_pp = 2 + else: + inter_pp = 8 convert_llama(model, max_output_len=max_context_len, max_prompt_len=max_prompt_len,