From e956e71fc1f922e01edc68bede1a309337a45504 Mon Sep 17 00:00:00 2001 From: Yina Chen <33650826+cyita@users.noreply.github.com> Date: Wed, 7 Aug 2024 13:10:30 +0300 Subject: [PATCH] fix conflict with quant kv (#11737) --- python/llm/src/ipex_llm/transformers/models/qwen2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/models/qwen2.py b/python/llm/src/ipex_llm/transformers/models/qwen2.py index 91335574..0306bb94 100644 --- a/python/llm/src/ipex_llm/transformers/models/qwen2.py +++ b/python/llm/src/ipex_llm/transformers/models/qwen2.py @@ -123,8 +123,8 @@ def qwen2_model_forward( if use_cache: if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache): past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values) - elif use_compress_kv and not isinstance(past_key_values, - DynamicCompressCache): + elif not use_quantize_kv and use_compress_kv and not isinstance(past_key_values, + DynamicCompressCache): past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values) if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values, (DynamicNormalCache,