From 6fb65bb9d2852ec093b0786214d51ba093f4b3c9 Mon Sep 17 00:00:00 2001 From: Jiao Wang Date: Wed, 28 Feb 2024 18:43:01 -0800 Subject: [PATCH] fix in transformers 4.36 (#10150) --- python/llm/portable-zip/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/portable-zip/chat.py b/python/llm/portable-zip/chat.py index b5cc48fc..18bd319f 100644 --- a/python/llm/portable-zip/chat.py +++ b/python/llm/portable-zip/chat.py @@ -288,7 +288,7 @@ if __name__ == "__main__": if model.config.architectures is not None and model.config.architectures[0] == "QWenLMHeadModel": stop_words = get_stop_words_ids("Qwen", tokenizer=tokenizer) - kv_cache = StartRecentKVCache(start_size=start_size) + kv_cache = StartRecentKVCache(start_size=start_size, k_seq_dim=1, v_seq_dim=1) qwen_stream_chat(model=model, tokenizer=tokenizer,kv_cache=kv_cache, stop_words=stop_words) elif model.config.architectures is not None and model.config.architectures[0] == "ChatGLMModel": chatglm3_stream_chat(model=model, tokenizer=tokenizer)