fix llama on cpu (#12018)

This commit is contained in:
Guoqiong Song 2024-09-04 19:17:54 -07:00 committed by GitHub
parent b3b2cd64b4
commit 8803242f5c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1579,7 +1579,7 @@ def llama_attention_forward_4_41_original(
past_key_value.key_cache[self.layer_idx] = key_states past_key_value.key_cache[self.layer_idx] = key_states
past_key_value.value_cache[self.layer_idx] = value_states past_key_value.value_cache[self.layer_idx] = value_states
if cache_position is not None: if attention_mask is not None:
new_attention_mask = attention_mask[:, :, :, 0:kv_seq_len] new_attention_mask = attention_mask[:, :, :, 0:kv_seq_len]
else: else:
new_attention_mask = attention_mask new_attention_mask = attention_mask