fix conflict with quant kv (#11737)

2024-08-07 13:10:30 +03:00 · 2024-08-07 13:10:30 +03:00 · e956e71fc1
commit e956e71fc1
parent 00a5574c8a
1 changed files with 2 additions and 2 deletions
--- a/python/llm/src/ipex_llm/transformers/models/qwen2.py
+++ b/python/llm/src/ipex_llm/transformers/models/qwen2.py
@ -123,7 +123,7 @@ def qwen2_model_forward(
    if use_cache:
        if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
            past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
-        elif use_compress_kv and not isinstance(past_key_values,
+        elif not use_quantize_kv and use_compress_kv and not isinstance(past_key_values,
                                                                        DynamicCompressCache):
            past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values)
        if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values,