diff --git a/python/llm/src/bigdl/llm/ggml/model/generation/utils.py b/python/llm/src/bigdl/llm/ggml/model/generation/utils.py index 9cd12576..75749443 100644 --- a/python/llm/src/bigdl/llm/ggml/model/generation/utils.py +++ b/python/llm/src/bigdl/llm/ggml/model/generation/utils.py @@ -164,7 +164,7 @@ class GenerationMixin: res_list = [] word_count = 0 for token in tokens: - if word_count > max_new_tokens: + if word_count >= max_new_tokens: break res_list.append(token) word_count += 1