From 8803242f5ce8869474588e462eb6f5466193f7ac Mon Sep 17 00:00:00 2001
From: Guoqiong Song <guoqiongsong@gmail.com>
Date: Wed, 4 Sep 2024 19:17:54 -0700
Subject: [PATCH] fix llama on cpu (#12018)

---
 python/llm/src/ipex_llm/transformers/models/llama.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/llm/src/ipex_llm/transformers/models/llama.py b/python/llm/src/ipex_llm/transformers/models/llama.py
index dfbbaf00..873407fb 100644
--- a/python/llm/src/ipex_llm/transformers/models/llama.py
+++ b/python/llm/src/ipex_llm/transformers/models/llama.py
@@ -1579,7 +1579,7 @@ def llama_attention_forward_4_41_original(
                     past_key_value.key_cache[self.layer_idx] = key_states
                     past_key_value.value_cache[self.layer_idx] = value_states
 
-    if cache_position is not None:
+    if attention_mask is not None:
         new_attention_mask = attention_mask[:, :, :, 0:kv_seq_len]
     else:
         new_attention_mask = attention_mask