Expose timeout for streamer for fastchat worker (#11288)
* Expose timeout for stremer for fastchat worker * Change to read from env variables
This commit is contained in:
		
							parent
							
								
									d99423b75a
								
							
						
					
					
						commit
						cffb932f05
					
				
					 1 changed files with 2 additions and 1 deletions
				
			
		| 
						 | 
					@ -19,6 +19,7 @@ A model worker that executes the model based on BigDL-LLM.
 | 
				
			||||||
Relies on load_model method
 | 
					Relies on load_model method
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
import torch
 | 
					import torch
 | 
				
			||||||
import torch.nn.functional as F
 | 
					import torch.nn.functional as F
 | 
				
			||||||
import gc
 | 
					import gc
 | 
				
			||||||
| 
						 | 
					@ -323,7 +324,7 @@ class BigDLLLMWorker(BaseModelWorker):
 | 
				
			||||||
        # Use TextIteratorStreamer for streaming output
 | 
					        # Use TextIteratorStreamer for streaming output
 | 
				
			||||||
        streamer = TextIteratorStreamer(
 | 
					        streamer = TextIteratorStreamer(
 | 
				
			||||||
            tokenizer=self.tokenizer,
 | 
					            tokenizer=self.tokenizer,
 | 
				
			||||||
            timeout=60,
 | 
					            timeout=int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 60)),
 | 
				
			||||||
            skip_prompt=True,
 | 
					            skip_prompt=True,
 | 
				
			||||||
            skip_special_tokens=True,
 | 
					            skip_special_tokens=True,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue