LLM: add more example usages about alpaca qlora on different hardware (#9458)

2023-11-17 09:56:43 +08:00 · 2023-11-17 09:56:43 +08:00 · 3dac21ac7b
commit 3dac21ac7b
parent 921b263d6a
6 changed files with 161 additions and 9 deletions
--- a/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/README.md
+++ b/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/README.md
@ -15,6 +15,7 @@ conda activate llm
 pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
 pip install transformers==4.34.0
 pip install fire datasets peft==0.5.0
+pip install oneccl_bind_pt==2.0.100 -f https://developer.intel.com/ipex-whl-stable-xpu # necessary to run distributed finetuning
 pip install accelerate==0.23.0
 ```

@ -23,20 +24,41 @@ pip install accelerate==0.23.0
 source /opt/intel/oneapi/setvars.sh
 ```

-### 3. Finetuning LLaMA-2-7B on a single Arc:
+### 3. Finetune

-Example usage:
+Here, we provide example usages on different hardware. Please refer to the appropriate script based on your device:

-```
-python ./alpaca_qlora_finetuning.py \
-    --base_model "meta-llama/Llama-2-7b-hf" \
-    --data_path "yahma/alpaca-cleaned" \
-    --output_dir "./bigdl-qlora-alpaca"
+#### Finetuning LLaMA2-7B on single Arc A770
+
+```bash
+bash finetune_llama2_7b_arc_1_card.sh
 ```

-**Note**: You could also specify `--base_model` to the local path of the huggingface model checkpoint folder and `--data_path` to the local path of the dataset JSON file.
+#### Finetuning LLaMA2-7B on two Arc A770

-#### Sample Output
+```bash
+bash finetune_llama2_7b_arc_2_card.sh
+```
+
+#### Finetuning LLaMA2-7B on three Data Center GPU Flex 170
+
+```bash
+bash finetune_llama2_7b_flex_170_3_card.sh
+```
+
+#### Finetuning LLaMA2-7B on single Intel Data Center GPU Max 1100
+
+```bash
+bash finetune_llama2_7b_pvc_1100_1_card.sh
+```
+
+#### Finetuning LLaMA2-7B on four Intel Data Center GPU Max 1100
+
+```bash
+bash finetune_llama2_7b_pvc_1100_4_card.sh
+```
+
+### 4. Sample Output
 ```log
 {'loss': 1.9231, 'learning_rate': 2.9999945367033285e-05, 'epoch': 0.0}                                                                                                                            
 {'loss': 1.8622, 'learning_rate': 2.9999781468531096e-05, 'epoch': 0.01}                                                                                                                           
--- a/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_arc_1_card.sh
+++ b/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_arc_1_card.sh
@ -0,0 +1,21 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# You could also specify `--base_model` to the local path of the huggingface model checkpoint folder and `--data_path` to the local path of the dataset JSON file
+python ./alpaca_qlora_finetuning.py \
+    --base_model "meta-llama/Llama-2-7b-hf" \
+    --data_path "yahma/alpaca-cleaned" \
+    --output_dir "./bigdl-qlora-alpaca"
--- a/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_arc_2_card.sh
+++ b/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_arc_2_card.sh
@ -0,0 +1,28 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export CCL_ZE_IPC_EXCHANGE=sockets
+export MASTER_ADDR=127.0.0.1
+export OMP_NUM_THREADS=6 # adjust this to 1/4 of total physical cores
+export FI_PROVIDER=tcp
+
+torchrun --standalone \
+         --nnodes=1 \
+         --nproc-per-node 2 \
+         ./alpaca_qlora_finetuning.py \
+         --base_model "meta-llama/Llama-2-7b-hf" \
+         --data_path "yahma/alpaca-cleaned" \
+         --output_dir "./bigdl-qlora-alpaca"
--- a/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_flex_170_3_card.sh
+++ b/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_flex_170_3_card.sh
@ -0,0 +1,30 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export CCL_ZE_IPC_EXCHANGE=sockets
+export MASTER_ADDR=127.0.0.1
+export OMP_NUM_THREADS=16 # adjust this to 1/4 of total physical cores
+
+torchrun --standalone \
+         --nnodes=1 \
+         --nproc-per-node 3 \
+         ./alpaca_qlora_finetuning.py \
+         --base_model "meta-llama/Llama-2-7b-hf" \
+         --data_path "yahma/alpaca-cleaned" \
+         --output_dir "./bigdl-qlora-alpaca" \
+         --gradient_checkpointing False \
+         --micro_batch_size 2 \
+         --batch_size 128
--- a/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_pvc_1100_1_card.sh
+++ b/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_pvc_1100_1_card.sh
@ -0,0 +1,23 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# You could also specify `--base_model` to the local path of the huggingface model checkpoint folder and `--data_path` to the local path of the dataset JSON file
+python ./alpaca_qlora_finetuning.py \
+    --micro_batch_size 8 \
+    --batch_size 128 \
+    --base_model "meta-llama/Llama-2-7b-hf" \
+    --data_path "yahma/alpaca-cleaned" \
+    --output_dir "./bigdl-qlora-alpaca"
--- a/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_pvc_1100_4_card.sh
+++ b/python/llm/example/GPU/QLoRA-FineTuning/alpaca-qlora/finetune_llama2_7b_pvc_1100_4_card.sh
@ -0,0 +1,28 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export CCL_ZE_IPC_EXCHANGE=sockets
+export MASTER_ADDR=127.0.0.1
+export OMP_NUM_THREADS=28 # adjust this to 1/4 of total physical cores
+torchrun --standalone \
+         --nnodes=1 \
+         --nproc-per-node 4 \
+         ./alpaca_qlora_finetuning.py \
+         --base_model "meta-llama/Llama-2-7b-hf" \
+         --data_path "yahma/alpaca-cleaned" \
+         --output_dir "./bigdl-qlora-alpaca" \
+         --micro_batch_size 8 \
+         --batch_size 128