LLM: fix chatglm kv cache (#9215)

This commit is contained in:
Ruonan Wang 2023-10-18 19:09:53 +08:00 committed by GitHub
parent 0765f94770
commit 942d6418e7

View file

@ -77,6 +77,8 @@ def attention_fn(
device=device) device=device)
new_cache_k[:] = cache_k new_cache_k[:] = cache_k
new_cache_v[:] = cache_v new_cache_v[:] = cache_v
cache_k = new_cache_k
cache_v = new_cache_v
key_layer, value_layer = append_kv_cache(cache_k, cache_v, key_layer, value_layer) key_layer, value_layer = append_kv_cache(cache_k, cache_v, key_layer, value_layer)
elif use_cache: elif use_cache: