From eb1e65f8a980d7ed9be4bd08ebfe1b59d83ae6e6 Mon Sep 17 00:00:00 2001 From: "Huang, Xinshengzi" Date: Thu, 22 Aug 2024 15:14:47 +0800 Subject: [PATCH] add comment --- python/llm/src/ipex_llm/transformers/models/baichuan.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/models/baichuan.py b/python/llm/src/ipex_llm/transformers/models/baichuan.py index 3944a948..9d412792 100644 --- a/python/llm/src/ipex_llm/transformers/models/baichuan.py +++ b/python/llm/src/ipex_llm/transformers/models/baichuan.py @@ -246,6 +246,7 @@ def baichuan_attention_forward_7b( bsz, q_len, _ = hidden_states.size() device = hidden_states.device + # [CompressKV] use_compresskv = isinstance(past_key_value, DynamicCompressCache) qkv = self.W_pack(hidden_states) @@ -257,6 +258,7 @@ def baichuan_attention_forward_7b( kv_seq_len = key_states.shape[2] if past_key_value is not None: + # [CompressKV] if use_compresskv: kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)