From 04baac5a2ef800153b1efae665f17d133a87d54a Mon Sep 17 00:00:00 2001
From: Guancheng Fu <110874468+gc-fu@users.noreply.github.com>
Date: Wed, 27 Mar 2024 16:01:58 +0800
Subject: [PATCH] Fix fastchat top_k (#10560)

* fix -1 top_k

* fix

* done
---
 python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
index b357c8d3..9fbe1cb7 100644
--- a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
+++ b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
@@ -100,7 +100,9 @@ class BigDLLLMWorker(BaseModelWorker):
         temperature = float(params.get("temperature", 1.0))
         repetition_penalty = float(params.get("repetition_penalty", 1.0))
         top_p = float(params.get("top_p", 1.0))
-        top_k = int(params.get("top_k", 0))  # 0 means disable
+        top_k = int(params.get("top_k", 1))
+        if top_k == -1:
+            top_k = 1
         max_new_tokens = int(params.get("max_new_tokens", 256))
         echo = bool(params.get("echo", True))
         stop_str = params.get("stop", None)