parent
a71ae7c22b
commit
d2abc9711b
2 changed files with 2 additions and 2 deletions
|
|
@ -152,7 +152,6 @@ def compress_kv(attn_config, key_states, query_states, value_states, attention_m
|
|||
if not hasattr(attn_config, 'pooling'):
|
||||
attn_config.pooling = 'maxpool'
|
||||
bsz, num_heads, q_len, head_dim = query_states.shape
|
||||
print(f"attn_config.max_capacity_prompt: ", attn_config.max_capacity_prompt, " ", q_len)
|
||||
if q_len <= attn_config.max_capacity_prompt:
|
||||
return key_states, value_states
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -127,7 +127,8 @@ def qwen2_model_forward(
|
|||
DynamicCompressCache):
|
||||
past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values)
|
||||
if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values,
|
||||
DynamicNormalCache):
|
||||
(DynamicNormalCache,
|
||||
DynamicCompressCache)):
|
||||
past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
|
||||
past_key_values_length = past_key_values.get_usable_length(seq_length)
|
||||
# ipex-llm changes end
|
||||
|
|
|
|||
Loading…
Reference in a new issue