From cffb932f054128f69b0cf5350a38062d82a24fab Mon Sep 17 00:00:00 2001
From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com>
Date: Wed, 12 Jun 2024 17:02:40 +0800
Subject: [PATCH] Expose timeout for streamer for fastchat worker (#11288)

* Expose timeout for stremer for fastchat worker

* Change to read from env variables
---
 python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
index 0ee07ba2..10ea99a8 100644
--- a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
+++ b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
@@ -19,6 +19,7 @@ A model worker that executes the model based on BigDL-LLM.
 Relies on load_model method
 """
 
+import os
 import torch
 import torch.nn.functional as F
 import gc
@@ -323,7 +324,7 @@ class BigDLLLMWorker(BaseModelWorker):
         # Use TextIteratorStreamer for streaming output
         streamer = TextIteratorStreamer(
             tokenizer=self.tokenizer,
-            timeout=60,
+            timeout=int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 60)),
             skip_prompt=True,
             skip_special_tokens=True,
         )