From cffb932f054128f69b0cf5350a38062d82a24fab Mon Sep 17 00:00:00 2001 From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:02:40 +0800 Subject: [PATCH] Expose timeout for streamer for fastchat worker (#11288) * Expose timeout for stremer for fastchat worker * Change to read from env variables --- python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py index 0ee07ba2..10ea99a8 100644 --- a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py +++ b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py @@ -19,6 +19,7 @@ A model worker that executes the model based on BigDL-LLM. Relies on load_model method """ +import os import torch import torch.nn.functional as F import gc @@ -323,7 +324,7 @@ class BigDLLLMWorker(BaseModelWorker): # Use TextIteratorStreamer for streaming output streamer = TextIteratorStreamer( tokenizer=self.tokenizer, - timeout=60, + timeout=int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 60)), skip_prompt=True, skip_special_tokens=True, )