Fix vLLM CPU api_server params (#11384)
This commit is contained in:
parent
21fc781fce
commit
b30bf7648e
1 changed files with 3 additions and 1 deletions
|
|
@ -175,7 +175,9 @@ if __name__ == "__main__":
|
|||
served_model_names = [args.model]
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
engine = IPEXLLMAsyncLLMEngine.from_engine_args(
|
||||
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
|
||||
engine_args, usage_context=UsageContext.OPENAI_API_SERVER,
|
||||
load_in_low_bit=args.load_in_low_bit,
|
||||
)
|
||||
openai_serving_chat = OpenAIServingChat(engine, served_model_names,
|
||||
args.response_role,
|
||||
args.lora_modules,
|
||||
|
|
|
|||
Loading…
Reference in a new issue