From 2f14f53b1c92c906d330a39dcfd50c94c39318f5 Mon Sep 17 00:00:00 2001 From: Ziteng Zhang <87107332+Jasonzzt@users.noreply.github.com> Date: Wed, 18 Oct 2023 10:25:12 +0800 Subject: [PATCH] changed '/ppml' into '/bigdl' and modified llama-7b (#9209) --- docker/llm/finetune/lora/cpu/docker/Dockerfile | 14 +++++++------- docker/llm/finetune/lora/cpu/docker/README.md | 10 +++++----- .../cpu/docker/bigdl-lora-finetuing-entrypoint.sh | 12 ++++++------ .../templates/bigdl-lora-finetuning-job.yaml | 12 ++++++------ .../llm/finetune/lora/cpu/kubernetes/values.yaml | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/docker/llm/finetune/lora/cpu/docker/Dockerfile b/docker/llm/finetune/lora/cpu/docker/Dockerfile index 34d3d7fa..78226cde 100644 --- a/docker/llm/finetune/lora/cpu/docker/Dockerfile +++ b/docker/llm/finetune/lora/cpu/docker/Dockerfile @@ -12,13 +12,13 @@ FROM mpioperator/intel as builder ARG http_proxy ARG https_proxy ENV PIP_NO_CACHE_DIR=false -ADD ./requirements.txt /ppml/requirements.txt +ADD ./requirements.txt /bigdl/requirements.txt # add public key COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list -RUN mkdir /ppml/data && mkdir /ppml/model && \ +RUN mkdir /bigdl/data && mkdir /bigdl/model && \ # install pytorch 2.0.1 apt-get update && \ apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \ @@ -29,12 +29,12 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \ pip install intel_extension_for_pytorch==2.0.100 && \ pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \ # install transformers etc. - cd /ppml && \ + cd /bigdl && \ git clone https://github.com/huggingface/transformers.git && \ cd transformers && \ git reset --hard 057e1d74733f52817dc05b673a340b4e3ebea08c && \ pip install . && \ - pip install -r /ppml/requirements.txt && \ + pip install -r /bigdl/requirements.txt && \ # install python add-apt-repository ppa:deadsnakes/ppa -y && \ apt-get install -y python3.9 && \ @@ -56,9 +56,9 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \ echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \ sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config -ADD ./bigdl-lora-finetuing-entrypoint.sh /ppml/bigdl-lora-finetuing-entrypoint.sh -ADD ./lora_finetune.py /ppml/lora_finetune.py +ADD ./bigdl-lora-finetuing-entrypoint.sh /bigdl/bigdl-lora-finetuing-entrypoint.sh +ADD ./lora_finetune.py /bigdl/lora_finetune.py -RUN chown -R mpiuser /ppml +RUN chown -R mpiuser /bigdl USER mpiuser ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/docker/llm/finetune/lora/cpu/docker/README.md b/docker/llm/finetune/lora/cpu/docker/README.md index 3be1c760..2f3d5b76 100644 --- a/docker/llm/finetune/lora/cpu/docker/README.md +++ b/docker/llm/finetune/lora/cpu/docker/README.md @@ -31,8 +31,8 @@ docker run -itd \ --cpuset-cpus="your_expected_range_of_cpu_numbers" \ -e STANDALONE_DOCKER=TRUE \ -e WORKER_COUNT_DOCKER=your_worker_count \ - -v your_downloaded_base_model_path:/ppml/model \ - -v your_downloaded_data_path:/ppml/data/alpaca_data_cleaned_archive.json \ + -v your_downloaded_base_model_path:/bigdl/model \ + -v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \ intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \ bash ``` @@ -50,15 +50,15 @@ docker exec -it bigdl-llm-fintune-lora-cpu bash Then, run the script to start finetuning: ``` -bash /ppml/bigdl-lora-finetuing-entrypoint.sh +bash /bigdl/bigdl-lora-finetuing-entrypoint.sh ``` After minutes, it is expected to get results like: ``` Training Alpaca-LoRA model with params: -base_model: /ppml/model/ -data_path: /ppml/data/alpaca_data_cleaned_archive.json +base_model: /bigdl/model/ +data_path: /bigdl/data/alpaca_data_cleaned_archive.json output_dir: /home/mpiuser/finetuned_model batch_size: 128 micro_batch_size: 8 diff --git a/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh b/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh index 340f35dc..3bd2305a 100644 --- a/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh +++ b/docker/llm/finetune/lora/cpu/docker/bigdl-lora-finetuing-entrypoint.sh @@ -15,9 +15,9 @@ then -genv KMP_AFFINITY="granularity=fine,none" \ -genv KMP_BLOCKTIME=1 \ -genv TF_ENABLE_ONEDNN_OPTS=1 \ - python /ppml/lora_finetune.py \ - --base_model '/ppml/model/' \ - --data_path "/ppml/data/alpaca_data_cleaned_archive.json" \ + python /bigdl/lora_finetune.py \ + --base_model '/bigdl/model/' \ + --data_path "/bigdl/data/alpaca_data_cleaned_archive.json" \ --output_dir "/home/mpiuser/finetuned_model" \ --micro_batch_size 8 \ --bf16 @@ -29,7 +29,7 @@ else if [ "$WORKER_ROLE" = "launcher" ] then sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile - export DATA_PATH="/ppml/data/$DATA_SUB_PATH" + export DATA_PATH="/bigdl/data/$DATA_SUB_PATH" sleep 10 mpirun \ -n $WORLD_SIZE \ @@ -40,8 +40,8 @@ else -genv KMP_AFFINITY="granularity=fine,none" \ -genv KMP_BLOCKTIME=1 \ -genv TF_ENABLE_ONEDNN_OPTS=1 \ - python /ppml/lora_finetune.py \ - --base_model '/ppml/model/' \ + python /bigdl/lora_finetune.py \ + --base_model '/bigdl/model/' \ --data_path "$DATA_PATH" \ --output_dir "/home/mpiuser/finetuned_model" \ --micro_batch_size $MICRO_BATCH_SIZE \ diff --git a/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml b/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml index 4b425b9c..34d7170a 100644 --- a/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml +++ b/docker/llm/finetune/lora/cpu/kubernetes/templates/bigdl-lora-finetuning-job.yaml @@ -23,7 +23,7 @@ spec: name: bigdl-ppml-finetuning-launcher securityContext: runAsUser: 1000 - command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh'] + command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh'] env: - name: WORKER_ROLE value: "launcher" @@ -46,10 +46,10 @@ spec: volumeMounts: - name: nfs-storage subPath: {{ .Values.modelSubPath }} - mountPath: /ppml/model + mountPath: /bigdl/model - name: nfs-storage subPath: {{ .Values.dataSubPath }} - mountPath: "/ppml/data/{{ .Values.dataSubPath }}" + mountPath: "/bigdl/data/{{ .Values.dataSubPath }}" Worker: replicas: {{ .Values.trainerNum }} template: @@ -59,7 +59,7 @@ spec: name: bigdl-ppml-finetuning-worker securityContext: runAsUser: 1000 - command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh'] + command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh'] env: - name: WORKER_ROLE value: "trainer" @@ -78,10 +78,10 @@ spec: volumeMounts: - name: nfs-storage subPath: {{ .Values.modelSubPath }} - mountPath: /ppml/model + mountPath: /bigdl/model - name: nfs-storage subPath: {{ .Values.dataSubPath }} - mountPath: "/ppml/data/{{ .Values.dataSubPath }}" + mountPath: "/bigdl/data/{{ .Values.dataSubPath }}" resources: requests: cpu: {{ .Values.cpuPerPod }} diff --git a/docker/llm/finetune/lora/cpu/kubernetes/values.yaml b/docker/llm/finetune/lora/cpu/kubernetes/values.yaml index 8c3b9db2..6c0e9ae7 100644 --- a/docker/llm/finetune/lora/cpu/kubernetes/values.yaml +++ b/docker/llm/finetune/lora/cpu/kubernetes/values.yaml @@ -4,6 +4,6 @@ microBatchSize: 8 nfsServerIp: your_nfs_server_ip nfsPath: a_nfs_shared_folder_path_on_the_server dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory -modelSubPath: llama-7b-hf # a subpath of the model file (dir) under nfs directory +modelSubPath: Llama-2-7b-chat-hf # a subpath of the model file (dir) under nfs directory ompNumThreads: 14 cpuPerPod: 42