[LLM] Use to build a more slim docker for k8s (#9608)

* Create Dockerfile.k8s * Update Dockerfile More slim standalone image * Update Dockerfile * Update Dockerfile.k8s * Update bigdl-qlora-finetuing-entrypoint.sh * Update qlora_finetuning_cpu.py * Update alpaca_qlora_finetuning_cpu.py Refer to this [pr](https://github.com/intel-analytics/BigDL/pull/9551/files#diff-2025188afa54672d21236e6955c7c7f7686bec9239532e41c7983858cc9aaa89), update the LoraConfig * update * update * update * update * update * update * update * update transformer version * update Dockerfile * update Docker image name * fix error
2023-12-08 10:25:36 +08:00 · 2023-12-08 10:25:36 +08:00 · d204125e88
commit d204125e88
parent 6eca8a8bb5
5 changed files with 119 additions and 40 deletions
--- a/docker/llm/finetune/qlora/cpu/docker/Dockerfile
+++ b/docker/llm/finetune/qlora/cpu/docker/Dockerfile
@ -18,57 +18,39 @@ ENV TRANSFORMERS_COMMIT_ID=95fe0f5
 COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
 RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list

-
 RUN mkdir -p /bigdl/data && mkdir -p /bigdl/model && \
-# install pytorch 2.1.0
+    # install pytorch 2.1.0
    apt-get update && \
-    apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \
+    apt-get install -y --no-install-recommends python3-pip python3.9-dev python3-wheel python3.9-distutils git software-properties-common && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
    pip3 install --upgrade pip && \
    export PIP_DEFAULT_TIMEOUT=100 && \
    pip install --upgrade torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu && \
-# install CPU bigdl-llm
-    pip3 install --pre --upgrade bigdl-llm[all] -i https://pypi.tuna.tsinghua.edu.cn/simple/ && \
-# install ipex and oneccl
+    # install CPU bigdl-llm
+    pip3 install --pre --upgrade bigdl-llm[all] && \
+    # install ipex and oneccl
    pip install intel_extension_for_pytorch==2.0.100 && \
    pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
-# install huggingface dependencies
-    pip install datasets https://files.pythonhosted.org/packages/9a/06/e4ec2a321e57c03b7e9345d709d554a52c33760e5015fdff0919d9459af0/transformers-4.35.0-py3-none-any.whl && \
+    # install huggingface dependencies
+    pip install datasets transformers==4.35.0 && \
    pip install fire peft==0.5.0 && \
    pip install accelerate==0.23.0 && \
-# install basic dependencies
-    apt-get install -y curl wget git gnupg gpg-agent software-properties-common libunwind8-dev vim less && \
-# install python 3.9
-    ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
-    env DEBIAN_FRONTEND=noninteractive apt-get update && \
-    add-apt-repository ppa:deadsnakes/ppa -y && \
-    apt-get install -y python3.9 && \
-    rm /usr/bin/python3 && \
-    ln -s /usr/bin/python3.9 /usr/bin/python3 && \
+    # install basic dependencies
+    apt-get update && apt-get install -y curl wget gpg gpg-agent software-properties-common libunwind8-dev && \
+    # get qlora example code
    ln -s /usr/bin/python3 /usr/bin/python && \
-    apt-get install -y python3-pip python3.9-dev python3-wheel python3.9-distutils && \
-    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
-# install OpenSSH for MPI to communicate between containers
-    apt-get install -y --no-install-recommends openssh-client openssh-server && \
-    mkdir -p /var/run/sshd && \
-# allow OpenSSH to talk to containers without asking for confirmation
-# by disabling StrictHostKeyChecking.
-# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need
-# to disable UserKnownHostsFile to avoid write permissions.
-# disabling StrictModes avoids directory and files read permission checks.
-    sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
-    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
-    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
-# add bigdl-llm qlora cpu example
    cd /bigdl && \
    git clone https://github.com/intel-analytics/BigDL.git && \
    mv BigDL/python/llm/example/CPU/QLoRA-FineTuning/* . && \
-    rm -r BigDL
-   
+    rm -r BigDL && \
+    chown -R mpiuser /bigdl
+
 # for docker directly run example
 COPY ./start-qlora-finetuning-on-cpu.sh /bigdl/start-qlora-finetuning-on-cpu.sh
 # for k8s
 COPY ./bigdl-qlora-finetuing-entrypoint.sh /bigdl/bigdl-qlora-finetuing-entrypoint.sh

-RUN chown -R mpiuser /bigdl
 USER mpiuser
+
 ENTRYPOINT ["/bin/bash"]
--- a/docker/llm/finetune/qlora/cpu/docker/Dockerfile.k8s
+++ b/docker/llm/finetune/qlora/cpu/docker/Dockerfile.k8s
@ -0,0 +1,73 @@
+# USE TO BUILD A MORE SLIM IMAGE FOR K8S
+FROM ubuntu:20.04 as key-getter
+ARG http_proxy
+ARG https_proxy
+
+RUN apt-get update && \
+    apt-get install -y curl gpg && \
+    curl -fsSL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB | gpg --dearmor | tee /root/intel-oneapi-archive-keyring.gpg
+
+FROM debian:bullseye as builder
+
+ARG http_proxy
+ARG https_proxy
+ENV TZ=Asia/Shanghai
+ARG PIP_NO_CACHE_DIR=false
+ENV TRANSFORMERS_COMMIT_ID=95fe0f5
+
+# add public key
+COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
+RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list
+
+RUN mkdir -p /bigdl/data && mkdir -p /bigdl/model && \
+    apt-get update && \
+    apt install -y --no-install-recommends openssh-server openssh-client libcap2-bin gnupg2 ca-certificates \ 
+    python3-pip python3.9-dev python3-wheel python3.9-distutils git software-properties-common && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    mkdir -p /var/run/sshd && \
+    setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd && \
+    apt remove libcap2-bin -y && \
+    sed -i "s/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g" /etc/ssh/ssh_config && \
+    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
+    sed -i "s/[ #]\(.*Port \).*/ \1$port/g" /etc/ssh/ssh_config && \
+    sed -i "s/#\(StrictModes \).*/\1no/g" /etc/ssh/sshd_config && \
+    sed -i "s/#\(Port \).*/\1$port/g" /etc/ssh/sshd_config && \
+    useradd -m mpiuser && \
+    cp -r /etc/ssh/sshd_config /home/mpiuser/.sshd_config && \
+    echo "Port $port" >> /home/mpiuser/.sshd_config && \
+    # install pytorch 2.1.0
+    pip3 install --upgrade pip && \
+    export PIP_DEFAULT_TIMEOUT=100 && \
+    pip install --upgrade torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu && \
+    # install CPU bigdl-llm
+    pip3 install --pre --upgrade bigdl-llm[all] && \
+    # install ipex and oneccl
+    pip install intel_extension_for_pytorch==2.0.100 && \
+    pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
+    # install huggingface dependencies
+    pip install datasets transformers==4.35.0 && \
+    pip install fire peft==0.5.0 && \
+    pip install accelerate==0.23.0 && \
+    # install basic dependencies
+    apt-get update && apt-get install -y curl wget gpg gpg-agent software-properties-common libunwind8-dev && \
+    # Install Intel oneAPI keys.
+    apt remove -y gnupg2 ca-certificates && \
+    apt autoremove -y && \
+    apt update && \
+    apt install -y --no-install-recommends dnsutils intel-oneapi-mpi && \
+    rm -rf /var/lib/apt/lists/* && \
+    # get qlora example code
+    ln -s /usr/bin/python3 /usr/bin/python && \
+    cd /bigdl && \
+    git clone https://github.com/intel-analytics/BigDL.git && \
+    mv BigDL/python/llm/example/CPU/QLoRA-FineTuning/* . && \
+    rm -r BigDL && \
+    chown -R mpiuser /bigdl
+
+# for standalone
+COPY ./start-qlora-finetuning-on-cpu.sh /bigdl/start-qlora-finetuning-on-cpu.sh
+
+USER mpiuser
+
+ENTRYPOINT ["/bin/bash"]
--- a/docker/llm/finetune/qlora/cpu/docker/README.md
+++ b/docker/llm/finetune/qlora/cpu/docker/README.md
@ -7,20 +7,35 @@ The following shows how to fine-tune LLM with Quantization (QLoRA built on BigDL
 You can download directly from Dockerhub like:

 ```bash
-docker pull intelanalytics/bigdl-llm-finetune-qlora-cpu:2.4.0-SNAPSHOT
+# For standalone
+docker pull intelanalytics/bigdl-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
+
+# For k8s
+docker pull intelanalytics/bigdl-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT
 ```

 Or build the image from source:

 ```bash
+# For standalone
 export HTTP_PROXY=your_http_proxy
 export HTTPS_PROXY=your_https_proxy

 docker build \
  --build-arg http_proxy=${HTTP_PROXY} \
  --build-arg https_proxy=${HTTPS_PROXY} \
-  -t intelanalytics/bigdl-llm-finetune-qlora-cpu:2.4.0-SNAPSHOT \
+  -t intelanalytics/bigdl-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT \
  -f ./Dockerfile .
+
+# For k8s
+export HTTP_PROXY=your_http_proxy
+export HTTPS_PROXY=your_https_proxy
+
+docker build \
+  --build-arg http_proxy=${HTTP_PROXY} \
+  --build-arg https_proxy=${HTTPS_PROXY} \
+  -t intelanalytics/bigdl-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT \
+  -f ./Dockerfile.k8s .
 ```

 ### 2. Prepare Base Model, Data and Container
@ -40,7 +55,7 @@ docker run -itd \
   -e https_proxy=${HTTPS_PROXY} \
   -v $BASE_MODE_PATH:/bigdl/model \
   -v $DATA_PATH:/bigdl/data/english_quotes \
-   intelanalytics/bigdl-llm-finetune-qlora-cpu:2.4.0-SNAPSHOT
+   intelanalytics/bigdl-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
 ```

 The download and mount of base model and data to a docker container demonstrates a standard fine-tuning process. You can skip this step for a quick start, and in this way, the fine-tuning codes will automatically download the needed files:
@ -54,7 +69,7 @@ docker run -itd \
   --name=bigdl-llm-fintune-qlora-cpu \
   -e http_proxy=${HTTP_PROXY} \
   -e https_proxy=${HTTPS_PROXY} \
-   intelanalytics/bigdl-llm-finetune-qlora-cpu:2.4.0-SNAPSHOT
+   intelanalytics/bigdl-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
 ```

 However, we do recommend you to handle them manually, because the automatical download can be blocked by Internet access and Huggingface authentication etc. according to different environment, and the manual method allows you to fine-tune in a custom way (with different base model and dataset).
@ -93,7 +108,9 @@ TrainOutput(global_step=200, training_loss=1.5072882556915284, metrics={'train_r
 ```

 ### 4. Merge the adapter into the original model
+
 Using the [export_merged_model.py](https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py) to merge.
+
 ```
 python ./export_merged_model.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --adapter_path ./outputs/checkpoint-200 --output_path ./outputs/checkpoint-200-merged
 ```
@ -101,14 +118,18 @@ python ./export_merged_model.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --
 Then you can use `./outputs/checkpoint-200-merged` as a normal huggingface transformer model to do inference.

 ### 5. Use BigDL-LLM to verify the fine-tuning effect
+
 Train more steps and try input sentence like `['quote'] -> [?]` to verify. For example, using `“QLoRA fine-tuning using BigDL-LLM 4bit optimizations on Intel CPU is Efficient and convenient” ->: ` to inference.
 BigDL-LLM llama2 example [link](https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/CPU/HF-Transformers-AutoModels/Model/llama2). Update the `LLAMA2_PROMPT_FORMAT = "{prompt}"`.
+
 ```bash
 python ./generate.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH --prompt "“QLoRA fine-tuning using BigDL-LLM 4bit optimizations on Intel CPU is Efficient and convenient” ->:"  --n-predict 20
 ```

 #### Sample Output
+
 Base_model output
+
 ```log
 Inference time: xxx s
 -------------------- Prompt --------------------
@ -116,7 +137,9 @@ Inference time: xxx s
 -------------------- Output --------------------
 “QLoRA fine-tuning using BigDL-LLM 4bit optimizations on Intel CPU is Efficient and convenient” ->: 💻 Fine-tuning a language model on a powerful device like an Intel CPU
 ```
+
 Merged_model output
+
 ```log
 Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
 Inference time: xxx s
@ -140,7 +163,7 @@ docker run -itd \
 -e WORKER_COUNT_DOCKER=your_worker_count \
 -v your_downloaded_base_model_path:/bigdl/model \
 -v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \
- intelanalytics/bigdl-llm-finetune-qlora-cpu:2.5.0-SNAPSHOT
+ intelanalytics/bigdl-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
 ```

 Note that `STANDALONE_DOCKER` is set to **TRUE** here.
--- a/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh
+++ b/docker/llm/finetune/qlora/cpu/docker/bigdl-qlora-finetuing-entrypoint.sh
@ -4,6 +4,7 @@ set -x
 source /opt/intel/oneapi/setvars.sh
 export CCL_WORKER_COUNT=$WORLD_SIZE
 source bigdl-llm-init -t
+cd /bigdl/alpaca-qlora
 if [ "$WORKER_ROLE" = "launcher" ]
 then
  sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile
--- a/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml
@ -1,4 +1,4 @@
-imageName: intelanalytics/bigdl-llm-finetune-qlora-cpu:2.5.0-SNAPSHOT
+imageName: intelanalytics/bigdl-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT
 trainerNum: 2
 microBatchSize: 8
 enableGradientCheckpoint: false # true will save more memory but increase latency