diff --git a/docker/llm/inference/cpu/docker/Dockerfile b/docker/llm/inference/cpu/docker/Dockerfile index c3d9ad37..41f6e102 100644 --- a/docker/llm/inference/cpu/docker/Dockerfile +++ b/docker/llm/inference/cpu/docker/Dockerfile @@ -36,10 +36,15 @@ RUN env DEBIAN_FRONTEND=noninteractive apt-get update && \ # Download all-in-one benchmark git clone https://github.com/intel-analytics/BigDL && \ cp -r ./BigDL/python/llm/dev/benchmark/ ./benchmark && \ - rm -rf ./BigDL && \ # Install all-in-one dependencies apt-get install -y numactl && \ pip install --upgrade omegaconf && \ - pip install --upgrade pandas + pip install --upgrade pandas && \ +# Install vllm dependencies + pip install --upgrade fastapi && \ + pip install --upgrade "uvicorn[standard]" && \ +# Copy vLLM-Serving + cp -r ./BigDL/python/llm/example/CPU/vLLM-Serving/ ./vLLM-Serving && \ + rm -rf ./BigDL ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/docker/llm/inference/xpu/docker/Dockerfile b/docker/llm/inference/xpu/docker/Dockerfile index 2b705b3e..9cf8e99d 100644 --- a/docker/llm/inference/xpu/docker/Dockerfile +++ b/docker/llm/inference/xpu/docker/Dockerfile @@ -5,6 +5,8 @@ ARG https_proxy ENV TZ=Asia/Shanghai ENV PYTHONUNBUFFERED=1 +ENV USE_XETLA=OFF +ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 COPY chat.py /llm/chat.py @@ -38,4 +40,11 @@ RUN curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-P apt-get update && \ apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero level-zero-dev && \ # Install related libary of chat.py - pip install --upgrade colorama + pip install --upgrade colorama && \ + # Install vllm dependencies + pip install --upgrade fastapi && \ + pip install --upgrade "uvicorn[standard]" && \ + # Download vLLM-Serving + git clone https://github.com/intel-analytics/BigDL && \ + cp -r ./BigDL/python/llm/example/GPU/vLLM-Serving/ ./vLLM-Serving && \ + rm -rf ./BigDL