From e8dd8e97ef96a6ccc060c57c0a672d8aa223d12e Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Fri, 14 Jun 2024 16:26:11 +0800 Subject: [PATCH] fix chatglm lookahead on ARC (#11320) --- python/llm/src/ipex_llm/transformers/models/chatglm2.py | 2 +- python/llm/src/ipex_llm/transformers/models/chatglm4.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/models/chatglm2.py b/python/llm/src/ipex_llm/transformers/models/chatglm2.py index c83675c7..7eebf1d0 100644 --- a/python/llm/src/ipex_llm/transformers/models/chatglm2.py +++ b/python/llm/src/ipex_llm/transformers/models/chatglm2.py @@ -244,7 +244,7 @@ def chatglm2_attention_forward( key_states[..., :rot_dim] = k_rot[...] # IPEX-LLM OPT: kv cache and quantize kv - use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states) + use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states) key_states, value_states = update_past_key_value( past_key_value, key_states, value_states, kv_seq_len, use_quantize_kv, hidden_states.device diff --git a/python/llm/src/ipex_llm/transformers/models/chatglm4.py b/python/llm/src/ipex_llm/transformers/models/chatglm4.py index 5f0bd608..cfae9a37 100644 --- a/python/llm/src/ipex_llm/transformers/models/chatglm4.py +++ b/python/llm/src/ipex_llm/transformers/models/chatglm4.py @@ -171,7 +171,7 @@ def chatglm4_attention_forward( key_states[..., :rot_dim] = k_rot[...] # IPEX-LLM OPT: kv cache and quantize kv - use_quantize_kv = use_quantize_kv_cache(self.query_key_value, hidden_states) + use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states) key_states, value_states = update_past_key_value( past_key_value, key_states, value_states, kv_seq_len, use_quantize_kv, hidden_states.device