ipex-llm/docker/llm/serving/xpu/docker/start-vllm-service.sh
Guancheng Fu 86fc0492f4
Update oneccl used (#11647)
* Add internal oneccl

* fix

* fix

* add oneccl
2024-07-26 09:38:39 +08:00

20 lines
No EOL
505 B
Bash

#!/bin/bash
model="YOUR_MODEL_PATH"
served_model_name="YOUR_MODEL_NAME"
source /opt/intel/1ccl-wks/setvars.sh
python -m ipex_llm.vllm.xpu.entrypoints.openai.api_server \
--served-model-name $served_model_name \
--port 8000 \
--model $model \
--trust-remote-code \
--gpu-memory-utilization 0.75 \
--device xpu \
--dtype float16 \
--enforce-eager \
--load-in-low-bit sym_int4 \
--max-model-len 4096 \
--max-num-batched-tokens 10240 \
--max-num-seqs 12 \
--tensor-parallel-size 1