[NPU] Update qwen2 compile config (#12383)

* update

* fix
This commit is contained in:
Ruonan Wang 2024-11-12 16:59:44 +08:00 committed by GitHub
parent 7a97fbb779
commit 6bf5a8c230
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -234,7 +234,7 @@ class LowBitQwenMultiDecoderlayer(LLMBaseNNFactory):
print(f"{mode} start compiling")
if (
group_size != 0
and (mode == "prefill" or num_layers == 2)
and (mode == "prefill" or num_layers == 2 or num_layers == 3)
and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1"
):
self.compile(npu_dpu_groups=6)