From d489775d2cb842b508ca6e234c2dc98513dcac32 Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Fri, 7 Jul 2023 17:31:05 +0800 Subject: [PATCH] LLM: fix inconsistency between output token number and `max_new_token` (#8479) --- python/llm/src/bigdl/llm/ggml/model/generation/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/src/bigdl/llm/ggml/model/generation/utils.py b/python/llm/src/bigdl/llm/ggml/model/generation/utils.py index 9cd12576..75749443 100644 --- a/python/llm/src/bigdl/llm/ggml/model/generation/utils.py +++ b/python/llm/src/bigdl/llm/ggml/model/generation/utils.py @@ -164,7 +164,7 @@ class GenerationMixin: res_list = [] word_count = 0 for token in tokens: - if word_count > max_new_tokens: + if word_count >= max_new_tokens: break res_list.append(token) word_count += 1