LLM: fix inconsistency between output token number and max_new_token (#8479)

2023-07-07 17:31:05 +08:00 · 2023-07-07 17:31:05 +08:00 · d489775d2c
commit d489775d2c
parent bcc1eae322
1 changed files with 1 additions and 1 deletions
--- a/python/llm/src/bigdl/llm/ggml/model/generation/utils.py
+++ b/python/llm/src/bigdl/llm/ggml/model/generation/utils.py
@ -164,7 +164,7 @@ class GenerationMixin:
            res_list = []
            word_count = 0
            for token in tokens:
-                if word_count > max_new_tokens:
+                if word_count >= max_new_tokens:
                    break
                res_list.append(token)
                word_count += 1