From d489775d2cb842b508ca6e234c2dc98513dcac32 Mon Sep 17 00:00:00 2001
From: binbin Deng <108676127+plusbang@users.noreply.github.com>
Date: Fri, 7 Jul 2023 17:31:05 +0800
Subject: [PATCH] LLM: fix inconsistency between output token number and
 `max_new_token` (#8479)

---
 python/llm/src/bigdl/llm/ggml/model/generation/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/llm/src/bigdl/llm/ggml/model/generation/utils.py b/python/llm/src/bigdl/llm/ggml/model/generation/utils.py
index 9cd12576..75749443 100644
--- a/python/llm/src/bigdl/llm/ggml/model/generation/utils.py
+++ b/python/llm/src/bigdl/llm/ggml/model/generation/utils.py
@@ -164,7 +164,7 @@ class GenerationMixin:
             res_list = []
             word_count = 0
             for token in tokens:
-                if word_count > max_new_tokens:
+                if word_count >= max_new_tokens:
                     break
                 res_list.append(token)
                 word_count += 1