fix conflict with quant kv (#11737)
This commit is contained in:
parent
00a5574c8a
commit
e956e71fc1
1 changed files with 2 additions and 2 deletions
|
|
@ -123,7 +123,7 @@ def qwen2_model_forward(
|
||||||
if use_cache:
|
if use_cache:
|
||||||
if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
|
if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
|
||||||
past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
|
past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
|
||||||
elif use_compress_kv and not isinstance(past_key_values,
|
elif not use_quantize_kv and use_compress_kv and not isinstance(past_key_values,
|
||||||
DynamicCompressCache):
|
DynamicCompressCache):
|
||||||
past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values)
|
past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values)
|
||||||
if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values,
|
if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue