diff --git a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py index ab11c27b..eb001b6d 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py @@ -234,7 +234,7 @@ class LowBitQwenMultiDecoderlayer(LLMBaseNNFactory): print(f"{mode} start compiling") if ( group_size != 0 - and (mode == "prefill" or num_layers == 2) + and (mode == "prefill" or num_layers == 2 or num_layers == 3) and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1" ): self.compile(npu_dpu_groups=6)