Expose timeout for streamer for fastchat worker (#11288)

* Expose timeout for stremer for fastchat worker * Change to read from env variables
2024-06-12 17:02:40 +08:00 · 2024-06-12 17:02:40 +08:00 · cffb932f05
commit cffb932f05
parent d99423b75a
1 changed files with 2 additions and 1 deletions
--- a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
+++ b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
@ -19,6 +19,7 @@ A model worker that executes the model based on BigDL-LLM.
 Relies on load_model method
 """

+import os
 import torch
 import torch.nn.functional as F
 import gc
@ -323,7 +324,7 @@ class BigDLLLMWorker(BaseModelWorker):
        # Use TextIteratorStreamer for streaming output
        streamer = TextIteratorStreamer(
            tokenizer=self.tokenizer,
-            timeout=60,
+            timeout=int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 60)),
            skip_prompt=True,
            skip_special_tokens=True,
        )