fix llama on cpu (#12018)

2024-09-04 19:17:54 -07:00 · 2024-09-04 19:17:54 -07:00 · 8803242f5c
commit 8803242f5c
parent b3b2cd64b4
1 changed files with 1 additions and 1 deletions
--- a/python/llm/src/ipex_llm/transformers/models/llama.py
+++ b/python/llm/src/ipex_llm/transformers/models/llama.py
@ -1579,7 +1579,7 @@ def llama_attention_forward_4_41_original(
                    past_key_value.key_cache[self.layer_idx] = key_states
                    past_key_value.value_cache[self.layer_idx] = value_states
-    if cache_position is not None:
+    if attention_mask is not None:
        new_attention_mask = attention_mask[:, :, :, 0:kv_seq_len]
    else:
        new_attention_mask = attention_mask