From 04baac5a2ef800153b1efae665f17d133a87d54a Mon Sep 17 00:00:00 2001 From: Guancheng Fu <110874468+gc-fu@users.noreply.github.com> Date: Wed, 27 Mar 2024 16:01:58 +0800 Subject: [PATCH] Fix fastchat top_k (#10560) * fix -1 top_k * fix * done --- python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py index b357c8d3..9fbe1cb7 100644 --- a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py +++ b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py @@ -100,7 +100,9 @@ class BigDLLLMWorker(BaseModelWorker): temperature = float(params.get("temperature", 1.0)) repetition_penalty = float(params.get("repetition_penalty", 1.0)) top_p = float(params.get("top_p", 1.0)) - top_k = int(params.get("top_k", 0)) # 0 means disable + top_k = int(params.get("top_k", 1)) + if top_k == -1: + top_k = 1 max_new_tokens = int(params.get("max_new_tokens", 256)) echo = bool(params.get("echo", True)) stop_str = params.get("stop", None)