fix llama on cpu (#12018)
This commit is contained in:
parent
b3b2cd64b4
commit
8803242f5c
1 changed files with 1 additions and 1 deletions
|
|
@ -1579,7 +1579,7 @@ def llama_attention_forward_4_41_original(
|
||||||
past_key_value.key_cache[self.layer_idx] = key_states
|
past_key_value.key_cache[self.layer_idx] = key_states
|
||||||
past_key_value.value_cache[self.layer_idx] = value_states
|
past_key_value.value_cache[self.layer_idx] = value_states
|
||||||
|
|
||||||
if cache_position is not None:
|
if attention_mask is not None:
|
||||||
new_attention_mask = attention_mask[:, :, :, 0:kv_seq_len]
|
new_attention_mask = attention_mask[:, :, :, 0:kv_seq_len]
|
||||||
else:
|
else:
|
||||||
new_attention_mask = attention_mask
|
new_attention_mask = attention_mask
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue