Fix vLLM CPU api_server params (#11384)
This commit is contained in:
		
							parent
							
								
									21fc781fce
								
							
						
					
					
						commit
						b30bf7648e
					
				
					 1 changed files with 3 additions and 1 deletions
				
			
		| 
						 | 
					@ -175,7 +175,9 @@ if __name__ == "__main__":
 | 
				
			||||||
        served_model_names = [args.model]
 | 
					        served_model_names = [args.model]
 | 
				
			||||||
    engine_args = AsyncEngineArgs.from_cli_args(args)
 | 
					    engine_args = AsyncEngineArgs.from_cli_args(args)
 | 
				
			||||||
    engine = IPEXLLMAsyncLLMEngine.from_engine_args(
 | 
					    engine = IPEXLLMAsyncLLMEngine.from_engine_args(
 | 
				
			||||||
        engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
 | 
					        engine_args, usage_context=UsageContext.OPENAI_API_SERVER,
 | 
				
			||||||
 | 
					        load_in_low_bit=args.load_in_low_bit,
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    openai_serving_chat = OpenAIServingChat(engine, served_model_names,
 | 
					    openai_serving_chat = OpenAIServingChat(engine, served_model_names,
 | 
				
			||||||
                                            args.response_role,
 | 
					                                            args.response_role,
 | 
				
			||||||
                                            args.lora_modules,
 | 
					                                            args.lora_modules,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue