Expose timeout for streamer for fastchat worker (#11288)
* Expose timeout for stremer for fastchat worker * Change to read from env variables
This commit is contained in:
parent
d99423b75a
commit
cffb932f05
1 changed files with 2 additions and 1 deletions
|
|
@ -19,6 +19,7 @@ A model worker that executes the model based on BigDL-LLM.
|
||||||
Relies on load_model method
|
Relies on load_model method
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import gc
|
import gc
|
||||||
|
|
@ -323,7 +324,7 @@ class BigDLLLMWorker(BaseModelWorker):
|
||||||
# Use TextIteratorStreamer for streaming output
|
# Use TextIteratorStreamer for streaming output
|
||||||
streamer = TextIteratorStreamer(
|
streamer = TextIteratorStreamer(
|
||||||
tokenizer=self.tokenizer,
|
tokenizer=self.tokenizer,
|
||||||
timeout=60,
|
timeout=int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 60)),
|
||||||
skip_prompt=True,
|
skip_prompt=True,
|
||||||
skip_special_tokens=True,
|
skip_special_tokens=True,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue