Expose timeout for streamer for fastchat worker (#11288)
* Expose timeout for stremer for fastchat worker * Change to read from env variables
This commit is contained in:
parent
d99423b75a
commit
cffb932f05
1 changed files with 2 additions and 1 deletions
|
|
@ -19,6 +19,7 @@ A model worker that executes the model based on BigDL-LLM.
|
|||
Relies on load_model method
|
||||
"""
|
||||
|
||||
import os
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import gc
|
||||
|
|
@ -323,7 +324,7 @@ class BigDLLLMWorker(BaseModelWorker):
|
|||
# Use TextIteratorStreamer for streaming output
|
||||
streamer = TextIteratorStreamer(
|
||||
tokenizer=self.tokenizer,
|
||||
timeout=60,
|
||||
timeout=int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 60)),
|
||||
skip_prompt=True,
|
||||
skip_special_tokens=True,
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue