ipex-llm/docker/llm/serving/cpu/docker/start-vllm-service.sh
Xiangyu Tian ac3d53ff5d
LLM: Fix vLLM CPU version error (#11206)
Fix vLLM CPU version error
2024-06-04 19:10:23 +08:00

18 lines
No EOL
431 B
Bash

#!/bin/bash
model="YOUR_MODEL_PATH"
served_model_name="YOUR_MODEL_NAME"
python -m ipex_llm.vllm.cpu.entrypoints.openai.api_server \
--served-model-name $served_model_name \
--port 8000 \
--model $model \
--trust-remote-code \
--device cpu \
--dtype bfloat16 \
--enforce-eager \
--load-in-low-bit bf16 \
--max-model-len 4096 \
--max-num-batched-tokens 10240 \
--max-num-seqs 12 \
--tensor-parallel-size 1