From 6fb65bb9d2852ec093b0786214d51ba093f4b3c9 Mon Sep 17 00:00:00 2001
From: Jiao Wang <jenniewang123@gmail.com>
Date: Wed, 28 Feb 2024 18:43:01 -0800
Subject: [PATCH] fix in transformers 4.36 (#10150)

---
 python/llm/portable-zip/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/llm/portable-zip/chat.py b/python/llm/portable-zip/chat.py
index b5cc48fc..18bd319f 100644
--- a/python/llm/portable-zip/chat.py
+++ b/python/llm/portable-zip/chat.py
@@ -288,7 +288,7 @@ if __name__ == "__main__":
 
     if model.config.architectures is not None and model.config.architectures[0] == "QWenLMHeadModel":
         stop_words = get_stop_words_ids("Qwen", tokenizer=tokenizer)
-        kv_cache = StartRecentKVCache(start_size=start_size)
+        kv_cache = StartRecentKVCache(start_size=start_size, k_seq_dim=1, v_seq_dim=1)
         qwen_stream_chat(model=model, tokenizer=tokenizer,kv_cache=kv_cache, stop_words=stop_words)
     elif model.config.architectures is not None and model.config.architectures[0] == "ChatGLMModel":
         chatglm3_stream_chat(model=model, tokenizer=tokenizer)