fix chatglm lookahead on ARC (#11320)

This commit is contained in:
Yishuo Wang 2024-06-14 16:26:11 +08:00 committed by GitHub
parent f5ef94046e
commit e8dd8e97ef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 2 additions and 2 deletions

View file

@ -244,7 +244,7 @@ def chatglm2_attention_forward(
key_states[..., :rot_dim] = k_rot[...] key_states[..., :rot_dim] = k_rot[...]
# IPEX-LLM OPT: kv cache and quantize kv # IPEX-LLM OPT: kv cache and quantize kv
use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states) use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
key_states, value_states = update_past_key_value( key_states, value_states = update_past_key_value(
past_key_value, key_states, value_states, past_key_value, key_states, value_states,
kv_seq_len, use_quantize_kv, hidden_states.device kv_seq_len, use_quantize_kv, hidden_states.device

View file

@ -171,7 +171,7 @@ def chatglm4_attention_forward(
key_states[..., :rot_dim] = k_rot[...] key_states[..., :rot_dim] = k_rot[...]
# IPEX-LLM OPT: kv cache and quantize kv # IPEX-LLM OPT: kv cache and quantize kv
use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states) use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
key_states, value_states = update_past_key_value( key_states, value_states = update_past_key_value(
past_key_value, key_states, value_states, past_key_value, key_states, value_states,
kv_seq_len, use_quantize_kv, hidden_states.device kv_seq_len, use_quantize_kv, hidden_states.device