LLM: fix chatglm kv cache (#9215)
This commit is contained in:
parent
0765f94770
commit
942d6418e7
1 changed files with 2 additions and 0 deletions
|
|
@ -77,6 +77,8 @@ def attention_fn(
|
||||||
device=device)
|
device=device)
|
||||||
new_cache_k[:] = cache_k
|
new_cache_k[:] = cache_k
|
||||||
new_cache_v[:] = cache_v
|
new_cache_v[:] = cache_v
|
||||||
|
cache_k = new_cache_k
|
||||||
|
cache_v = new_cache_v
|
||||||
key_layer, value_layer = append_kv_cache(cache_k, cache_v, key_layer, value_layer)
|
key_layer, value_layer = append_kv_cache(cache_k, cache_v, key_layer, value_layer)
|
||||||
|
|
||||||
elif use_cache:
|
elif use_cache:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue