LLM: fix wrong length in gptj kv_cache optimization (#9210)

* fix wrong length in gptj kv cache * update
2023-10-18 14:59:02 +08:00 · 2023-10-18 14:59:02 +08:00 · 3555ebc148
commit 3555ebc148
parent 6dad8d16df
1 changed files with 1 additions and 1 deletions
--- a/python/llm/src/bigdl/llm/transformers/models/gptj.py
+++ b/python/llm/src/bigdl/llm/transformers/models/gptj.py
@ -134,7 +134,7 @@ def gptj_attention_forward(
    device = hidden_states.device

    if layer_past is not None:
-        kv_seq_len += layer_past[0].size(-2)
+        kv_seq_len += layer_past[0].size(1)

    if layer_past is not None:
        cache_k = layer_past[0]