changed '/ppml' into '/bigdl' and modified llama-7b (#9209)

This commit is contained in:
Ziteng Zhang 2023-10-18 10:25:12 +08:00 committed by GitHub
parent b9194c5786
commit 2f14f53b1c
5 changed files with 25 additions and 25 deletions

View file

@ -12,13 +12,13 @@ FROM mpioperator/intel as builder
ARG http_proxy ARG http_proxy
ARG https_proxy ARG https_proxy
ENV PIP_NO_CACHE_DIR=false ENV PIP_NO_CACHE_DIR=false
ADD ./requirements.txt /ppml/requirements.txt ADD ./requirements.txt /bigdl/requirements.txt
# add public key # add public key
COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list
RUN mkdir /ppml/data && mkdir /ppml/model && \ RUN mkdir /bigdl/data && mkdir /bigdl/model && \
# install pytorch 2.0.1 # install pytorch 2.0.1
apt-get update && \ apt-get update && \
apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \ apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \
@ -29,12 +29,12 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
pip install intel_extension_for_pytorch==2.0.100 && \ pip install intel_extension_for_pytorch==2.0.100 && \
pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \ pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
# install transformers etc. # install transformers etc.
cd /ppml && \ cd /bigdl && \
git clone https://github.com/huggingface/transformers.git && \ git clone https://github.com/huggingface/transformers.git && \
cd transformers && \ cd transformers && \
git reset --hard 057e1d74733f52817dc05b673a340b4e3ebea08c && \ git reset --hard 057e1d74733f52817dc05b673a340b4e3ebea08c && \
pip install . && \ pip install . && \
pip install -r /ppml/requirements.txt && \ pip install -r /bigdl/requirements.txt && \
# install python # install python
add-apt-repository ppa:deadsnakes/ppa -y && \ add-apt-repository ppa:deadsnakes/ppa -y && \
apt-get install -y python3.9 && \ apt-get install -y python3.9 && \
@ -56,9 +56,9 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \ echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
ADD ./bigdl-lora-finetuing-entrypoint.sh /ppml/bigdl-lora-finetuing-entrypoint.sh ADD ./bigdl-lora-finetuing-entrypoint.sh /bigdl/bigdl-lora-finetuing-entrypoint.sh
ADD ./lora_finetune.py /ppml/lora_finetune.py ADD ./lora_finetune.py /bigdl/lora_finetune.py
RUN chown -R mpiuser /ppml RUN chown -R mpiuser /bigdl
USER mpiuser USER mpiuser
ENTRYPOINT ["/bin/bash"] ENTRYPOINT ["/bin/bash"]

View file

@ -31,8 +31,8 @@ docker run -itd \
--cpuset-cpus="your_expected_range_of_cpu_numbers" \ --cpuset-cpus="your_expected_range_of_cpu_numbers" \
-e STANDALONE_DOCKER=TRUE \ -e STANDALONE_DOCKER=TRUE \
-e WORKER_COUNT_DOCKER=your_worker_count \ -e WORKER_COUNT_DOCKER=your_worker_count \
-v your_downloaded_base_model_path:/ppml/model \ -v your_downloaded_base_model_path:/bigdl/model \
-v your_downloaded_data_path:/ppml/data/alpaca_data_cleaned_archive.json \ -v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \
intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \ intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \
bash bash
``` ```
@ -50,15 +50,15 @@ docker exec -it bigdl-llm-fintune-lora-cpu bash
Then, run the script to start finetuning: Then, run the script to start finetuning:
``` ```
bash /ppml/bigdl-lora-finetuing-entrypoint.sh bash /bigdl/bigdl-lora-finetuing-entrypoint.sh
``` ```
After minutes, it is expected to get results like: After minutes, it is expected to get results like:
``` ```
Training Alpaca-LoRA model with params: Training Alpaca-LoRA model with params:
base_model: /ppml/model/ base_model: /bigdl/model/
data_path: /ppml/data/alpaca_data_cleaned_archive.json data_path: /bigdl/data/alpaca_data_cleaned_archive.json
output_dir: /home/mpiuser/finetuned_model output_dir: /home/mpiuser/finetuned_model
batch_size: 128 batch_size: 128
micro_batch_size: 8 micro_batch_size: 8

View file

@ -15,9 +15,9 @@ then
-genv KMP_AFFINITY="granularity=fine,none" \ -genv KMP_AFFINITY="granularity=fine,none" \
-genv KMP_BLOCKTIME=1 \ -genv KMP_BLOCKTIME=1 \
-genv TF_ENABLE_ONEDNN_OPTS=1 \ -genv TF_ENABLE_ONEDNN_OPTS=1 \
python /ppml/lora_finetune.py \ python /bigdl/lora_finetune.py \
--base_model '/ppml/model/' \ --base_model '/bigdl/model/' \
--data_path "/ppml/data/alpaca_data_cleaned_archive.json" \ --data_path "/bigdl/data/alpaca_data_cleaned_archive.json" \
--output_dir "/home/mpiuser/finetuned_model" \ --output_dir "/home/mpiuser/finetuned_model" \
--micro_batch_size 8 \ --micro_batch_size 8 \
--bf16 --bf16
@ -29,7 +29,7 @@ else
if [ "$WORKER_ROLE" = "launcher" ] if [ "$WORKER_ROLE" = "launcher" ]
then then
sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile
export DATA_PATH="/ppml/data/$DATA_SUB_PATH" export DATA_PATH="/bigdl/data/$DATA_SUB_PATH"
sleep 10 sleep 10
mpirun \ mpirun \
-n $WORLD_SIZE \ -n $WORLD_SIZE \
@ -40,8 +40,8 @@ else
-genv KMP_AFFINITY="granularity=fine,none" \ -genv KMP_AFFINITY="granularity=fine,none" \
-genv KMP_BLOCKTIME=1 \ -genv KMP_BLOCKTIME=1 \
-genv TF_ENABLE_ONEDNN_OPTS=1 \ -genv TF_ENABLE_ONEDNN_OPTS=1 \
python /ppml/lora_finetune.py \ python /bigdl/lora_finetune.py \
--base_model '/ppml/model/' \ --base_model '/bigdl/model/' \
--data_path "$DATA_PATH" \ --data_path "$DATA_PATH" \
--output_dir "/home/mpiuser/finetuned_model" \ --output_dir "/home/mpiuser/finetuned_model" \
--micro_batch_size $MICRO_BATCH_SIZE \ --micro_batch_size $MICRO_BATCH_SIZE \

View file

@ -23,7 +23,7 @@ spec:
name: bigdl-ppml-finetuning-launcher name: bigdl-ppml-finetuning-launcher
securityContext: securityContext:
runAsUser: 1000 runAsUser: 1000
command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh'] command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
env: env:
- name: WORKER_ROLE - name: WORKER_ROLE
value: "launcher" value: "launcher"
@ -46,10 +46,10 @@ spec:
volumeMounts: volumeMounts:
- name: nfs-storage - name: nfs-storage
subPath: {{ .Values.modelSubPath }} subPath: {{ .Values.modelSubPath }}
mountPath: /ppml/model mountPath: /bigdl/model
- name: nfs-storage - name: nfs-storage
subPath: {{ .Values.dataSubPath }} subPath: {{ .Values.dataSubPath }}
mountPath: "/ppml/data/{{ .Values.dataSubPath }}" mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
Worker: Worker:
replicas: {{ .Values.trainerNum }} replicas: {{ .Values.trainerNum }}
template: template:
@ -59,7 +59,7 @@ spec:
name: bigdl-ppml-finetuning-worker name: bigdl-ppml-finetuning-worker
securityContext: securityContext:
runAsUser: 1000 runAsUser: 1000
command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh'] command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
env: env:
- name: WORKER_ROLE - name: WORKER_ROLE
value: "trainer" value: "trainer"
@ -78,10 +78,10 @@ spec:
volumeMounts: volumeMounts:
- name: nfs-storage - name: nfs-storage
subPath: {{ .Values.modelSubPath }} subPath: {{ .Values.modelSubPath }}
mountPath: /ppml/model mountPath: /bigdl/model
- name: nfs-storage - name: nfs-storage
subPath: {{ .Values.dataSubPath }} subPath: {{ .Values.dataSubPath }}
mountPath: "/ppml/data/{{ .Values.dataSubPath }}" mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
resources: resources:
requests: requests:
cpu: {{ .Values.cpuPerPod }} cpu: {{ .Values.cpuPerPod }}

View file

@ -4,6 +4,6 @@ microBatchSize: 8
nfsServerIp: your_nfs_server_ip nfsServerIp: your_nfs_server_ip
nfsPath: a_nfs_shared_folder_path_on_the_server nfsPath: a_nfs_shared_folder_path_on_the_server
dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory
modelSubPath: llama-7b-hf # a subpath of the model file (dir) under nfs directory modelSubPath: Llama-2-7b-chat-hf # a subpath of the model file (dir) under nfs directory
ompNumThreads: 14 ompNumThreads: 14
cpuPerPod: 42 cpuPerPod: 42