Fix vLLM CPU api_server params (#11384)
This commit is contained in:
parent
21fc781fce
commit
b30bf7648e
1 changed files with 3 additions and 1 deletions
|
|
@ -175,7 +175,9 @@ if __name__ == "__main__":
|
||||||
served_model_names = [args.model]
|
served_model_names = [args.model]
|
||||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||||
engine = IPEXLLMAsyncLLMEngine.from_engine_args(
|
engine = IPEXLLMAsyncLLMEngine.from_engine_args(
|
||||||
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
|
engine_args, usage_context=UsageContext.OPENAI_API_SERVER,
|
||||||
|
load_in_low_bit=args.load_in_low_bit,
|
||||||
|
)
|
||||||
openai_serving_chat = OpenAIServingChat(engine, served_model_names,
|
openai_serving_chat = OpenAIServingChat(engine, served_model_names,
|
||||||
args.response_role,
|
args.response_role,
|
||||||
args.lora_modules,
|
args.lora_modules,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue