From 59b01fa7d2b2cc3f7ed1553dd75f8f974d039916 Mon Sep 17 00:00:00 2001
From: Yina Chen <33650826+cyita@users.noreply.github.com>
Date: Thu, 14 Nov 2024 04:03:36 +0200
Subject: [PATCH] small fix (#12397)

---
 python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py
index 173271c5..58c2632f 100644
--- a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py
+++ b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py
@@ -934,9 +934,9 @@ class PrefillRunner:
         hidden_states = F.pad(hidden_states.to(torch.float16), (0, 0, 0, pad_len), value=0.0)
         position_ids = F.pad(position_ids, (0, pad_len), value=0)
         attention_mask = F.pad(
-            attention_mask.to(torch.int64),
+            attention_mask.to(torch.float16),
             (0, pad_len, 0, pad_len),
-            value=torch.iinfo(torch.int64).min,
+            value=torch.finfo(torch.float16).min,
         )
 
         args = (hidden_states, position_ids, attention_mask, past_key_value)