fix chatglm lookahead on ARC (#11320)
This commit is contained in:
parent
f5ef94046e
commit
e8dd8e97ef
2 changed files with 2 additions and 2 deletions
|
|
@ -244,7 +244,7 @@ def chatglm2_attention_forward(
|
||||||
key_states[..., :rot_dim] = k_rot[...]
|
key_states[..., :rot_dim] = k_rot[...]
|
||||||
|
|
||||||
# IPEX-LLM OPT: kv cache and quantize kv
|
# IPEX-LLM OPT: kv cache and quantize kv
|
||||||
use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states)
|
use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
|
||||||
key_states, value_states = update_past_key_value(
|
key_states, value_states = update_past_key_value(
|
||||||
past_key_value, key_states, value_states,
|
past_key_value, key_states, value_states,
|
||||||
kv_seq_len, use_quantize_kv, hidden_states.device
|
kv_seq_len, use_quantize_kv, hidden_states.device
|
||||||
|
|
|
||||||
|
|
@ -171,7 +171,7 @@ def chatglm4_attention_forward(
|
||||||
key_states[..., :rot_dim] = k_rot[...]
|
key_states[..., :rot_dim] = k_rot[...]
|
||||||
|
|
||||||
# IPEX-LLM OPT: kv cache and quantize kv
|
# IPEX-LLM OPT: kv cache and quantize kv
|
||||||
use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states)
|
use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
|
||||||
key_states, value_states = update_past_key_value(
|
key_states, value_states = update_past_key_value(
|
||||||
past_key_value, key_states, value_states,
|
past_key_value, key_states, value_states,
|
||||||
kv_seq_len, use_quantize_kv, hidden_states.device
|
kv_seq_len, use_quantize_kv, hidden_states.device
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue