Expose timeout for streamer for fastchat worker (#11288)

* Expose timeout for stremer for fastchat worker

* Change to read from env variables
This commit is contained in:
Yuwen Hu 2024-06-12 17:02:40 +08:00 committed by GitHub
parent d99423b75a
commit cffb932f05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -19,6 +19,7 @@ A model worker that executes the model based on BigDL-LLM.
Relies on load_model method Relies on load_model method
""" """
import os
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
import gc import gc
@ -323,7 +324,7 @@ class BigDLLLMWorker(BaseModelWorker):
# Use TextIteratorStreamer for streaming output # Use TextIteratorStreamer for streaming output
streamer = TextIteratorStreamer( streamer = TextIteratorStreamer(
tokenizer=self.tokenizer, tokenizer=self.tokenizer,
timeout=60, timeout=int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 60)),
skip_prompt=True, skip_prompt=True,
skip_special_tokens=True, skip_special_tokens=True,
) )