diff --git a/python/llm/src/ipex_llm/transformers/models/qwen2.py b/python/llm/src/ipex_llm/transformers/models/qwen2.py index 91335574..0306bb94 100644 --- a/python/llm/src/ipex_llm/transformers/models/qwen2.py +++ b/python/llm/src/ipex_llm/transformers/models/qwen2.py @@ -123,8 +123,8 @@ def qwen2_model_forward( if use_cache: if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache): past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values) - elif use_compress_kv and not isinstance(past_key_values, - DynamicCompressCache): + elif not use_quantize_kv and use_compress_kv and not isinstance(past_key_values, + DynamicCompressCache): past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values) if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values, (DynamicNormalCache,