Add vllm-example to docker inference image (#9570)

* add vllm-serving to cpu image

* add vllm-serving to cpu image

* add vllm-serving
This commit is contained in:
Lilac09 2023-11-30 17:04:53 +08:00 committed by GitHub
parent 66f5b45f57
commit b785376f5c
2 changed files with 17 additions and 3 deletions

View file

@ -36,10 +36,15 @@ RUN env DEBIAN_FRONTEND=noninteractive apt-get update && \
# Download all-in-one benchmark
git clone https://github.com/intel-analytics/BigDL && \
cp -r ./BigDL/python/llm/dev/benchmark/ ./benchmark && \
rm -rf ./BigDL && \
# Install all-in-one dependencies
apt-get install -y numactl && \
pip install --upgrade omegaconf && \
pip install --upgrade pandas
pip install --upgrade pandas && \
# Install vllm dependencies
pip install --upgrade fastapi && \
pip install --upgrade "uvicorn[standard]" && \
# Copy vLLM-Serving
cp -r ./BigDL/python/llm/example/CPU/vLLM-Serving/ ./vLLM-Serving && \
rm -rf ./BigDL
ENTRYPOINT ["/bin/bash"]

View file

@ -5,6 +5,8 @@ ARG https_proxy
ENV TZ=Asia/Shanghai
ENV PYTHONUNBUFFERED=1
ENV USE_XETLA=OFF
ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
COPY chat.py /llm/chat.py
@ -38,4 +40,11 @@ RUN curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-P
apt-get update && \
apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero level-zero-dev && \
# Install related libary of chat.py
pip install --upgrade colorama
pip install --upgrade colorama && \
# Install vllm dependencies
pip install --upgrade fastapi && \
pip install --upgrade "uvicorn[standard]" && \
# Download vLLM-Serving
git clone https://github.com/intel-analytics/BigDL && \
cp -r ./BigDL/python/llm/example/GPU/vLLM-Serving/ ./vLLM-Serving && \
rm -rf ./BigDL