changed '/ppml' into '/bigdl' and modified llama-7b (#9209)
This commit is contained in:
parent
b9194c5786
commit
2f14f53b1c
5 changed files with 25 additions and 25 deletions
|
|
@ -12,13 +12,13 @@ FROM mpioperator/intel as builder
|
||||||
ARG http_proxy
|
ARG http_proxy
|
||||||
ARG https_proxy
|
ARG https_proxy
|
||||||
ENV PIP_NO_CACHE_DIR=false
|
ENV PIP_NO_CACHE_DIR=false
|
||||||
ADD ./requirements.txt /ppml/requirements.txt
|
ADD ./requirements.txt /bigdl/requirements.txt
|
||||||
|
|
||||||
# add public key
|
# add public key
|
||||||
COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
|
COPY --from=key-getter /root/intel-oneapi-archive-keyring.gpg /usr/share/keyrings/intel-oneapi-archive-keyring.gpg
|
||||||
RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list
|
RUN echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " > /etc/apt/sources.list.d/oneAPI.list
|
||||||
|
|
||||||
RUN mkdir /ppml/data && mkdir /ppml/model && \
|
RUN mkdir /bigdl/data && mkdir /bigdl/model && \
|
||||||
# install pytorch 2.0.1
|
# install pytorch 2.0.1
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \
|
apt-get install -y python3-pip python3.9-dev python3-wheel git software-properties-common && \
|
||||||
|
|
@ -29,12 +29,12 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
|
||||||
pip install intel_extension_for_pytorch==2.0.100 && \
|
pip install intel_extension_for_pytorch==2.0.100 && \
|
||||||
pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
|
pip install oneccl_bind_pt -f https://developer.intel.com/ipex-whl-stable && \
|
||||||
# install transformers etc.
|
# install transformers etc.
|
||||||
cd /ppml && \
|
cd /bigdl && \
|
||||||
git clone https://github.com/huggingface/transformers.git && \
|
git clone https://github.com/huggingface/transformers.git && \
|
||||||
cd transformers && \
|
cd transformers && \
|
||||||
git reset --hard 057e1d74733f52817dc05b673a340b4e3ebea08c && \
|
git reset --hard 057e1d74733f52817dc05b673a340b4e3ebea08c && \
|
||||||
pip install . && \
|
pip install . && \
|
||||||
pip install -r /ppml/requirements.txt && \
|
pip install -r /bigdl/requirements.txt && \
|
||||||
# install python
|
# install python
|
||||||
add-apt-repository ppa:deadsnakes/ppa -y && \
|
add-apt-repository ppa:deadsnakes/ppa -y && \
|
||||||
apt-get install -y python3.9 && \
|
apt-get install -y python3.9 && \
|
||||||
|
|
@ -56,9 +56,9 @@ RUN mkdir /ppml/data && mkdir /ppml/model && \
|
||||||
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
|
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
|
||||||
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
|
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
|
||||||
|
|
||||||
ADD ./bigdl-lora-finetuing-entrypoint.sh /ppml/bigdl-lora-finetuing-entrypoint.sh
|
ADD ./bigdl-lora-finetuing-entrypoint.sh /bigdl/bigdl-lora-finetuing-entrypoint.sh
|
||||||
ADD ./lora_finetune.py /ppml/lora_finetune.py
|
ADD ./lora_finetune.py /bigdl/lora_finetune.py
|
||||||
|
|
||||||
RUN chown -R mpiuser /ppml
|
RUN chown -R mpiuser /bigdl
|
||||||
USER mpiuser
|
USER mpiuser
|
||||||
ENTRYPOINT ["/bin/bash"]
|
ENTRYPOINT ["/bin/bash"]
|
||||||
|
|
@ -31,8 +31,8 @@ docker run -itd \
|
||||||
--cpuset-cpus="your_expected_range_of_cpu_numbers" \
|
--cpuset-cpus="your_expected_range_of_cpu_numbers" \
|
||||||
-e STANDALONE_DOCKER=TRUE \
|
-e STANDALONE_DOCKER=TRUE \
|
||||||
-e WORKER_COUNT_DOCKER=your_worker_count \
|
-e WORKER_COUNT_DOCKER=your_worker_count \
|
||||||
-v your_downloaded_base_model_path:/ppml/model \
|
-v your_downloaded_base_model_path:/bigdl/model \
|
||||||
-v your_downloaded_data_path:/ppml/data/alpaca_data_cleaned_archive.json \
|
-v your_downloaded_data_path:/bigdl/data/alpaca_data_cleaned_archive.json \
|
||||||
intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \
|
intelanalytics/bigdl-llm-finetune-cpu:2.4.0-SNAPSHOT \
|
||||||
bash
|
bash
|
||||||
```
|
```
|
||||||
|
|
@ -50,15 +50,15 @@ docker exec -it bigdl-llm-fintune-lora-cpu bash
|
||||||
Then, run the script to start finetuning:
|
Then, run the script to start finetuning:
|
||||||
|
|
||||||
```
|
```
|
||||||
bash /ppml/bigdl-lora-finetuing-entrypoint.sh
|
bash /bigdl/bigdl-lora-finetuing-entrypoint.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
After minutes, it is expected to get results like:
|
After minutes, it is expected to get results like:
|
||||||
|
|
||||||
```
|
```
|
||||||
Training Alpaca-LoRA model with params:
|
Training Alpaca-LoRA model with params:
|
||||||
base_model: /ppml/model/
|
base_model: /bigdl/model/
|
||||||
data_path: /ppml/data/alpaca_data_cleaned_archive.json
|
data_path: /bigdl/data/alpaca_data_cleaned_archive.json
|
||||||
output_dir: /home/mpiuser/finetuned_model
|
output_dir: /home/mpiuser/finetuned_model
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
micro_batch_size: 8
|
micro_batch_size: 8
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,9 @@ then
|
||||||
-genv KMP_AFFINITY="granularity=fine,none" \
|
-genv KMP_AFFINITY="granularity=fine,none" \
|
||||||
-genv KMP_BLOCKTIME=1 \
|
-genv KMP_BLOCKTIME=1 \
|
||||||
-genv TF_ENABLE_ONEDNN_OPTS=1 \
|
-genv TF_ENABLE_ONEDNN_OPTS=1 \
|
||||||
python /ppml/lora_finetune.py \
|
python /bigdl/lora_finetune.py \
|
||||||
--base_model '/ppml/model/' \
|
--base_model '/bigdl/model/' \
|
||||||
--data_path "/ppml/data/alpaca_data_cleaned_archive.json" \
|
--data_path "/bigdl/data/alpaca_data_cleaned_archive.json" \
|
||||||
--output_dir "/home/mpiuser/finetuned_model" \
|
--output_dir "/home/mpiuser/finetuned_model" \
|
||||||
--micro_batch_size 8 \
|
--micro_batch_size 8 \
|
||||||
--bf16
|
--bf16
|
||||||
|
|
@ -29,7 +29,7 @@ else
|
||||||
if [ "$WORKER_ROLE" = "launcher" ]
|
if [ "$WORKER_ROLE" = "launcher" ]
|
||||||
then
|
then
|
||||||
sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile
|
sed "s/:1/ /g" /etc/mpi/hostfile > /home/mpiuser/hostfile
|
||||||
export DATA_PATH="/ppml/data/$DATA_SUB_PATH"
|
export DATA_PATH="/bigdl/data/$DATA_SUB_PATH"
|
||||||
sleep 10
|
sleep 10
|
||||||
mpirun \
|
mpirun \
|
||||||
-n $WORLD_SIZE \
|
-n $WORLD_SIZE \
|
||||||
|
|
@ -40,8 +40,8 @@ else
|
||||||
-genv KMP_AFFINITY="granularity=fine,none" \
|
-genv KMP_AFFINITY="granularity=fine,none" \
|
||||||
-genv KMP_BLOCKTIME=1 \
|
-genv KMP_BLOCKTIME=1 \
|
||||||
-genv TF_ENABLE_ONEDNN_OPTS=1 \
|
-genv TF_ENABLE_ONEDNN_OPTS=1 \
|
||||||
python /ppml/lora_finetune.py \
|
python /bigdl/lora_finetune.py \
|
||||||
--base_model '/ppml/model/' \
|
--base_model '/bigdl/model/' \
|
||||||
--data_path "$DATA_PATH" \
|
--data_path "$DATA_PATH" \
|
||||||
--output_dir "/home/mpiuser/finetuned_model" \
|
--output_dir "/home/mpiuser/finetuned_model" \
|
||||||
--micro_batch_size $MICRO_BATCH_SIZE \
|
--micro_batch_size $MICRO_BATCH_SIZE \
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ spec:
|
||||||
name: bigdl-ppml-finetuning-launcher
|
name: bigdl-ppml-finetuning-launcher
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsUser: 1000
|
runAsUser: 1000
|
||||||
command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
|
command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
|
||||||
env:
|
env:
|
||||||
- name: WORKER_ROLE
|
- name: WORKER_ROLE
|
||||||
value: "launcher"
|
value: "launcher"
|
||||||
|
|
@ -46,10 +46,10 @@ spec:
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: nfs-storage
|
- name: nfs-storage
|
||||||
subPath: {{ .Values.modelSubPath }}
|
subPath: {{ .Values.modelSubPath }}
|
||||||
mountPath: /ppml/model
|
mountPath: /bigdl/model
|
||||||
- name: nfs-storage
|
- name: nfs-storage
|
||||||
subPath: {{ .Values.dataSubPath }}
|
subPath: {{ .Values.dataSubPath }}
|
||||||
mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
|
mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
|
||||||
Worker:
|
Worker:
|
||||||
replicas: {{ .Values.trainerNum }}
|
replicas: {{ .Values.trainerNum }}
|
||||||
template:
|
template:
|
||||||
|
|
@ -59,7 +59,7 @@ spec:
|
||||||
name: bigdl-ppml-finetuning-worker
|
name: bigdl-ppml-finetuning-worker
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsUser: 1000
|
runAsUser: 1000
|
||||||
command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
|
command: ['sh' , '-c', 'bash /bigdl/bigdl-lora-finetuing-entrypoint.sh']
|
||||||
env:
|
env:
|
||||||
- name: WORKER_ROLE
|
- name: WORKER_ROLE
|
||||||
value: "trainer"
|
value: "trainer"
|
||||||
|
|
@ -78,10 +78,10 @@ spec:
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: nfs-storage
|
- name: nfs-storage
|
||||||
subPath: {{ .Values.modelSubPath }}
|
subPath: {{ .Values.modelSubPath }}
|
||||||
mountPath: /ppml/model
|
mountPath: /bigdl/model
|
||||||
- name: nfs-storage
|
- name: nfs-storage
|
||||||
subPath: {{ .Values.dataSubPath }}
|
subPath: {{ .Values.dataSubPath }}
|
||||||
mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
|
mountPath: "/bigdl/data/{{ .Values.dataSubPath }}"
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: {{ .Values.cpuPerPod }}
|
cpu: {{ .Values.cpuPerPod }}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,6 @@ microBatchSize: 8
|
||||||
nfsServerIp: your_nfs_server_ip
|
nfsServerIp: your_nfs_server_ip
|
||||||
nfsPath: a_nfs_shared_folder_path_on_the_server
|
nfsPath: a_nfs_shared_folder_path_on_the_server
|
||||||
dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory
|
dataSubPath: alpaca_data_cleaned_archive.json # a subpath of the data file under nfs directory
|
||||||
modelSubPath: llama-7b-hf # a subpath of the model file (dir) under nfs directory
|
modelSubPath: Llama-2-7b-chat-hf # a subpath of the model file (dir) under nfs directory
|
||||||
ompNumThreads: 14
|
ompNumThreads: 14
|
||||||
cpuPerPod: 42
|
cpuPerPod: 42
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue