* Update Dockerfile * Update Dockerfile * Ensure scripts are executable * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * Update Dockerfile * update * Update Dockerfile * remove inference-cpu and inference-xpu * update README
102 lines
4.5 KiB
Docker
102 lines
4.5 KiB
Docker
# Stage 1: Build stage to handle file preparation
|
|
FROM ubuntu:22.04 as build
|
|
|
|
# Copy the files to the build image
|
|
COPY ./start-notebook.sh /llm/
|
|
COPY ./model_adapter.py.patch /llm/
|
|
COPY ./vllm_offline_inference.py /llm/
|
|
COPY ./payload-1024.lua /llm/
|
|
COPY ./start-vllm-service.sh /llm/
|
|
COPY ./benchmark_vllm_throughput.py /llm/
|
|
COPY ./start-fastchat-service.sh /llm/
|
|
|
|
# Stage 2: Final image that only includes necessary runtime artifacts
|
|
FROM ubuntu:22.04
|
|
|
|
# Copy the scripts from the build stage
|
|
COPY --from=build /llm /llm/
|
|
|
|
ARG http_proxy
|
|
ARG https_proxy
|
|
# Disable pip's cache behavior
|
|
ARG PIP_NO_CACHE_DIR=false
|
|
ARG DEBIAN_FRONTEND=noninteractive
|
|
|
|
ENV PYTHONUNBUFFERED=1
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
# Install basic utilities
|
|
libunwind8-dev vim less \
|
|
# Version control and download tools
|
|
git curl wget \
|
|
# add-apt-repository requires gnupg, gpg-agent, software-properties-common
|
|
gnupg gpg-agent software-properties-common \
|
|
# Install performance testing tool, NUMA (Non-Uniform Memory Access) support, and patch tool
|
|
wrk numactl patch && \
|
|
# Install Python 3.11
|
|
# Add Python 3.11 PPA repository
|
|
add-apt-repository ppa:deadsnakes/ppa -y && \
|
|
# Install Python 3.11 and related packages
|
|
apt-get update && apt-get install -y --no-install-recommends python3.11 python3-pip python3.11-dev python3-wheel python3.11-distutils && \
|
|
# Remove the original /usr/bin/python3 symbolic link
|
|
rm /usr/bin/python3 && \
|
|
# Create a symbolic link pointing to Python 3.11 at /usr/bin/python3
|
|
ln -s /usr/bin/python3.11 /usr/bin/python3 && \
|
|
# Create a symbolic link pointing to /usr/bin/python3 at /usr/bin/python
|
|
ln -s /usr/bin/python3 /usr/bin/python && \
|
|
# Download and install pip, install FastChat from source requires PEP 660 support
|
|
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
|
|
python3 get-pip.py && \
|
|
rm get-pip.py && \
|
|
# Install Basic Python utilities
|
|
pip install --upgrade requests argparse urllib3 && \
|
|
# Download ipex-llm-tutorial
|
|
pip install --upgrade jupyterlab && \
|
|
git clone https://github.com/intel-analytics/ipex-llm-tutorial /llm/ipex-llm-tutorial && \
|
|
chmod +x /llm/start-notebook.sh && \
|
|
# Download all-in-one benchmark
|
|
git clone https://github.com/intel-analytics/IPEX-LLM && \
|
|
cp -r ./IPEX-LLM/python/llm/dev/benchmark/ /llm/benchmark && \
|
|
# Copy chat.py script
|
|
pip install --upgrade colorama && \
|
|
cp -r ./IPEX-LLM/python/llm/portable-zip/ /llm/portable-zip && \
|
|
# Install all-in-one dependencies
|
|
pip install --upgrade omegaconf && \
|
|
pip install --upgrade pandas && \
|
|
# Install ipex-llm
|
|
pip install --pre --upgrade ipex-llm[serving] && \
|
|
# Fix Trivy CVE Issues
|
|
pip install Jinja2==3.1.3 transformers==4.36.2 gradio==4.19.2 cryptography==42.0.4 && \
|
|
# Fix CVE-2024-22195
|
|
pip install Jinja2==3.1.3 && \
|
|
pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cpu && \
|
|
pip install intel-extension-for-pytorch==2.2.0 && \
|
|
pip install oneccl_bind_pt==2.2.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/ && \
|
|
pip install transformers==4.36.2 && \
|
|
# Install vllm dependencies
|
|
pip install --upgrade fastapi && \
|
|
pip install --upgrade "uvicorn[standard]" && \
|
|
# Add Qwen support
|
|
pip install --upgrade transformers_stream_generator einops && \
|
|
# Fix Qwen model adapter in fastchat
|
|
patch /usr/local/lib/python3.11/dist-packages/fastchat/model/model_adapter.py < /llm/model_adapter.py.patch && \
|
|
# Copy vLLM-Serving
|
|
cp -r ./IPEX-LLM/python/llm/example/CPU/vLLM-Serving/ /llm/vLLM-Serving && \
|
|
rm -rf ./IPEX-LLM && \
|
|
# Fix vllm service
|
|
pip install pydantic==1.10.11 && \
|
|
# Install vllm
|
|
apt-get install -y g++ gcc-12 g++-12 libnuma-dev && \
|
|
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
|
|
git clone https://github.com/vllm-project/vllm.git && \
|
|
cd ./vllm && \
|
|
git checkout v0.6.6.post1 && \
|
|
pip install cmake>=3.26 wheel packaging ninja "setuptools-scm>=8" numpy && \
|
|
pip uninstall -y intel-extension-for-pytorch && \
|
|
pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \
|
|
VLLM_TARGET_DEVICE=cpu python3 setup.py install && \
|
|
pip install ray && \
|
|
# Clean up unnecessary files to reduce image size
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
WORKDIR /llm/
|