Fix run_decoders bug (#11871)

2024-08-20 12:04:59 -07:00 · 2024-08-20 12:04:59 -07:00 · bdaeee1d63
commit bdaeee1d63
parent 32f0a77846
1 changed files with 1 additions and 1 deletions
--- a/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py
+++ b/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py
@ -601,7 +601,7 @@ class FusedLlamaLowBitMultiDecoderlayer(torch.nn.Module):
                x_np[2].ctypes.data_as(ctypes.c_void_p),
            )
            t0 = time.perf_counter()
-            backend_lib.run_decoders(models_ptr, inputs_ptr, 2, 3)
+            backend_lib.run_decoders(models_ptr, inputs_ptr, self.intra_stages, 3)
            t1 = time.perf_counter()
        hidden_states = self.backend_decoders[-1].torch_out[0]