From 59b01fa7d2b2cc3f7ed1553dd75f8f974d039916 Mon Sep 17 00:00:00 2001 From: Yina Chen <33650826+cyita@users.noreply.github.com> Date: Thu, 14 Nov 2024 04:03:36 +0200 Subject: [PATCH] small fix (#12397) --- python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py index 173271c5..58c2632f 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py @@ -934,9 +934,9 @@ class PrefillRunner: hidden_states = F.pad(hidden_states.to(torch.float16), (0, 0, 0, pad_len), value=0.0) position_ids = F.pad(position_ids, (0, pad_len), value=0) attention_mask = F.pad( - attention_mask.to(torch.int64), + attention_mask.to(torch.float16), (0, pad_len, 0, pad_len), - value=torch.iinfo(torch.int64).min, + value=torch.finfo(torch.float16).min, ) args = (hidden_states, position_ids, attention_mask, past_key_value)