Update vllm setting (#12059)

* revert

* update

* update

* update
This commit is contained in:
Shaojun Liu 2024-09-11 11:45:08 +08:00 committed by GitHub
parent 30a8680645
commit 7e1e51d91a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 16 additions and 8 deletions

View file

@ -97,6 +97,5 @@ COPY ./start-pp_serving-service.sh /llm/
COPY ./start-lightweight_serving-service.sh /llm/
ENV LD_LIBRARY_PATH /usr/local/lib/python3.11/dist-packages/intel_extension_for_pytorch/lib/:/opt/intel/oneapi/tbb/2021.12/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/2021.12/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.12/lib:/opt/intel/oneapi/mkl/2024.1/lib:/opt/intel/oneapi/ippcp/2021.11/lib/:/opt/intel/oneapi/ipp/2021.11/lib:/opt/intel/oneapi/dpl/2022.5/lib:/opt/intel/oneapi/dnnl/2024.1/lib:/opt/intel/oneapi/debugger/2024.1/opt/debugger/lib:/opt/intel/oneapi/dal/2024.2/lib:/opt/intel/oneapi/compiler/2024.1/opt/oclfpga/host/linux64/lib:/opt/intel/oneapi/compiler/2024.1/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.1/lib:/opt/intel/oneapi/ccl/2021.12/lib/
ENV BIGDL_LLM_SDP_IGNORE_MASK 0
WORKDIR /llm/

View file

@ -1,21 +1,30 @@
#!/bin/bash
model="YOUR_MODEL_PATH"
served_model_name="YOUR_MODEL_NAME"
export CCL_WORKER_COUNT=2
export FI_PROVIDER=shm
export CCL_ATL_TRANSPORT=ofi
export CCL_ZE_IPC_EXCHANGE=sockets
export CCL_ATL_SHM=1
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
export TORCH_LLM_ALLREDUCE=0
source /opt/intel/1ccl-wks/setvars.sh
export BIGDL_LLM_SDP_IGNORE_MASK=0
python -m ipex_llm.vllm.xpu.entrypoints.openai.api_server \
--served-model-name $served_model_name \
--port 8000 \
--model $model \
--trust-remote-code \
--gpu-memory-utilization 0.75 \
--gpu-memory-utilization 0.9 \
--device xpu \
--dtype float16 \
--enforce-eager \
--load-in-low-bit sym_int4 \
--max-model-len 4096 \
--max-num-batched-tokens 10240 \
--load-in-low-bit fp8 \
--max-model-len 2048 \
--max-num-batched-tokens 4000 \
--max-num-seqs 12 \
--tensor-parallel-size 1

View file

@ -176,7 +176,7 @@ def benchmark(llm_urls, model, prompt, num_requests, max_concurrent_requests, ma
cur_len = len(cur_llm_urls)
if dataset is None:
payload = {
"model": model,
"model": model_name,
"prompt": prompt,
"n": 1,
"best_of": 1,
@ -193,7 +193,7 @@ def benchmark(llm_urls, model, prompt, num_requests, max_concurrent_requests, ma
for index in range(num_requests):
prompt, prompt_len, output_len = sampled_requests[index]
payload = {
"model": model,
"model": model_name,
"prompt": prompt,
"n": 1,
"best_of": 1,