From 2f14f53b1c92c906d330a39dcfd50c94c39318f5 Mon Sep 17 00:00:00 2001
From: Ziteng Zhang <87107332+Jasonzzt@users.noreply.github.com>
Date: Wed, 18 Oct 2023 10:25:12 +0800
Subject: [PATCH] changed '/ppml' into '/bigdl' and modified llama-7b (#9209)

---
 docker/llm/finetune/lora/cpu/docker/Dockerfile     | 14 +++++++-------
 docker/llm/finetune/lora/cpu/docker/README.md      | 10 +++++-----
 .../cpu/docker/bigdl-lora-finetuing-entrypoint.sh  | 12 ++++++------
 .../templates/bigdl-lora-finetuning-job.yaml       | 12 ++++++------
 .../llm/finetune/lora/cpu/kubernetes/values.yaml   |  2 +-
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/docker/llm/finetune/lora/cpu/docker/Dockerfile b/docker/llm/finetune/lora/cpu/docker/Dockerfile
index 34d3d7fa..78226cde 100644
--- a/docker/llm/finetune/lora/cpu/docker/Dockerfile
+++ b/docker/llm/finetune/lora/cpu/docker/Dockerfile
@@ -12,13 +12,13 @@ FROM mpioperator/intel as builder
 ARG http_proxy
 ARG https_proxy
 ENV PIP_NO_CACHE_DIR=false
-ADD ./requirements.txt /ppml/requirements.txt
+ADD ./requirements.txt /bigdl/requirements.txt
 
 # add public key
 COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
 RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list
 
-RUN mkdir /ppml/data && mkdir /ppml/model && \
+RUN mkdir /bigdl/data && mkdir /bigdl/model && \
 # install pytorch 2.0.1
     apt-get update && \
     apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \
@@ -29,12 +29,12 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
     pip install intel_extension_for_pytorch==2.0.100 && \
     pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
 # install transformers etc.
-    cd /ppml && \
+    cd /bigdl && \
     git clone https://github.com/huggingface/transformers.git && \
     cd transformers && \
     git reset --hard 057e1d74733f52817dc05b673a340b4e3ebea08c && \
     pip install . && \
-    pip install -r /ppml/requirements.txt && \
+    pip install -r /bigdl/requirements.txt && \
 # install python
     add-apt-repository ppa:deadsnakes/ppa -y && \
     apt-get install -y python3.9 && \
@@ -56,9 +56,9 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
     echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
     sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
 
-ADD ./bigdl-lora-finetuing-entrypoint.sh /ppml/bigdl-lora-finetuing-entrypoint.sh
-ADD ./lora_finetune.py /ppml/lora_finetune.py
+ADD ./bigdl-lora-finetuing-entrypoint.sh /bigdl/bigdl-lora-finetuing-entrypoint.sh
+ADD ./lora_finetune.py /bigdl/lora_finetune.py
 
-RUN chown -R mpiuser /ppml
+RUN chown -R mpiuser /bigdl
 USER mpiuser
 ENTRYPOINT ["/bin/bash"]
\ No newline at end of file
diff --git a/docker/llm/finetune/lora/cpu/docker/README.md b/docker/llm/finetune/lora/cpu/docker/README.md
index 3be1c760..2f3d5b76 100644
--- a/docker/llm/finetune/lora/cpu/docker/README.md
+++ b/docker/llm/finetune/lora/cpu/docker/README.md
@@ -31,8 +31,8 @@ docker run -itd \
  --cpuset-cpus="your_expected_range_of_cpu_numbers" \
  -e STANDALONE_DOCKER=TRUE \
  -e WORKER_COUNT_DOCKER=your_worker_count \
- -v your_downloaded_base_model_path:/ppml/model \
- -v your_downloaded_data_path:/ppml/data/alpaca_data_cleaned_archive.json \
+ -v your_downloaded_base_model_path:/bigdl/model \
+ -v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \
  intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \
  bash
 ```
@@ -50,15 +50,15 @@ docker exec -it bigdl-llm-fintune-lora-cpu bash
 Then, run the script to start finetuning:
 
 ```
-bash /ppml/bigdl-lora-finetuing-entrypoint.sh
+bash /bigdl/bigdl-lora-finetuing-entrypoint.sh
 ```
 
 After minutes, it is expected to get results like:
 
 ```
 Training Alpaca-LoRA model with params:
-base_model: /ppml/model/
-data_path: /ppml/data/alpaca_data_cleaned_archive.json
+base_model: /bigdl/model/
+data_path: /bigdl/data/alpaca_data_cleaned_archive.json
 output_dir: /home/mpiuser/finetuned_model
 batch_size: 128
 micro_batch_size: 8
diff --git a/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh b/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh
index 340f35dc..3bd2305a 100644
--- a/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh
+++ b/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh
@@ -15,9 +15,9 @@ then
      -genv KMP_AFFINITY="granularity=fine,none" \
      -genv KMP_BLOCKTIME=1 \
      -genv TF_ENABLE_ONEDNN_OPTS=1 \
-     python /ppml/lora_finetune.py \
-       --base_model '/ppml/model/'  \
-       --data_path "/ppml/data/alpaca_data_cleaned_archive.json" \
+     python /bigdl/lora_finetune.py \
+       --base_model '/bigdl/model/'  \
+       --data_path "/bigdl/data/alpaca_data_cleaned_archive.json" \
        --output_dir "/home/mpiuser/finetuned_model" \
        --micro_batch_size 8 \
        --bf16 
@@ -29,7 +29,7 @@ else
   if [ "$WORKER_ROLE" = "launcher" ]
   then
     sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile
-    export DATA_PATH="/ppml/data/$DATA_SUB_PATH"
+    export DATA_PATH="/bigdl/data/$DATA_SUB_PATH"
     sleep 10
     mpirun \
       -n $WORLD_SIZE \
@@ -40,8 +40,8 @@ else
       -genv KMP_AFFINITY="granularity=fine,none" \
       -genv KMP_BLOCKTIME=1 \
       -genv TF_ENABLE_ONEDNN_OPTS=1 \
-      python /ppml/lora_finetune.py \
-        --base_model '/ppml/model/'  \
+      python /bigdl/lora_finetune.py \
+        --base_model '/bigdl/model/'  \
         --data_path "$DATA_PATH" \
         --output_dir "/home/mpiuser/finetuned_model" \
         --micro_batch_size $MICRO_BATCH_SIZE \
diff --git a/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml b/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml
index 4b425b9c..34d7170a 100644
--- a/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml
+++ b/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml
@@ -23,7 +23,7 @@ spec:
              name: bigdl-ppml-finetuning-launcher
              securityContext:
               runAsUser: 1000
-             command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
+             command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
              env:
              - name: WORKER_ROLE
                value: "launcher"
@@ -46,10 +46,10 @@ spec:
              volumeMounts:
              - name: nfs-storage
                subPath: {{ .Values.modelSubPath }}
-               mountPath: /ppml/model
+               mountPath: /bigdl/model
              - name: nfs-storage
                subPath: {{ .Values.dataSubPath }}
-               mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
+               mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
     Worker:
       replicas: {{ .Values.trainerNum }}
       template:
@@ -59,7 +59,7 @@ spec:
             name: bigdl-ppml-finetuning-worker
             securityContext:
               runAsUser: 1000
-            command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
+            command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
             env:
             - name: WORKER_ROLE
               value: "trainer"
@@ -78,10 +78,10 @@ spec:
             volumeMounts:
             - name: nfs-storage
               subPath: {{ .Values.modelSubPath }}
-              mountPath: /ppml/model
+              mountPath: /bigdl/model
             - name: nfs-storage
               subPath: {{ .Values.dataSubPath }}
-              mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
+              mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
             resources:
               requests:
                 cpu: {{ .Values.cpuPerPod }}
diff --git a/docker/llm/finetune/lora/cpu/kubernetes/values.yaml b/docker/llm/finetune/lora/cpu/kubernetes/values.yaml
index 8c3b9db2..6c0e9ae7 100644
--- a/docker/llm/finetune/lora/cpu/kubernetes/values.yaml
+++ b/docker/llm/finetune/lora/cpu/kubernetes/values.yaml
@@ -4,6 +4,6 @@ microBatchSize: 8
 nfsServerIp: your_nfs_server_ip
 nfsPath: a_nfs_shared_folder_path_on_the_server
 dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory
-modelSubPath: llama-7b-hf # a subpath of the model file (dir) under nfs directory
+modelSubPath: Llama-2-7b-chat-hf # a subpath of the model file (dir) under nfs directory
 ompNumThreads: 14
 cpuPerPod: 42