LLM: update Alpaca QLoRA finetuning example on GPU (#9492)
This commit is contained in:
		
							parent
							
								
									566ec85113
								
							
						
					
					
						commit
						b7ae572ac3
					
				
					 8 changed files with 139 additions and 30 deletions
				
			
		| 
						 | 
				
			
			@ -40,6 +40,12 @@ bash finetune_llama2_7b_arc_1_card.sh
 | 
			
		|||
bash finetune_llama2_7b_arc_2_card.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### Finetuning LLaMA2-7B on single Data Center GPU Flex 170
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
bash finetune_llama2_7b_flex_170_1_card.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### Finetuning LLaMA2-7B on three Data Center GPU Flex 170
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
| 
						 | 
				
			
			@ -58,6 +64,18 @@ bash finetune_llama2_7b_pvc_1100_1_card.sh
 | 
			
		|||
bash finetune_llama2_7b_pvc_1100_4_card.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### Finetuning LLaMA2-7B on single Intel Data Center GPU Max 1550
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
bash finetune_llama2_7b_pvc_1550_1_card.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### Finetuning LLaMA2-7B on four Intel Data Center GPU Max 1550
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
bash finetune_llama2_7b_pvc_1550_4_card.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### 4. Sample Output
 | 
			
		||||
```log
 | 
			
		||||
{'loss': 1.9231, 'learning_rate': 2.9999945367033285e-05, 'epoch': 0.0}                                                                                                                            
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -53,6 +53,21 @@ from bigdl.llm.transformers import AutoModelForCausalLM
 | 
			
		|||
# import them from bigdl.llm.transformers.qlora to get a BigDL-LLM compatible Peft model
 | 
			
		||||
from bigdl.llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training
 | 
			
		||||
 | 
			
		||||
def get_int_from_env(env_keys, default):
 | 
			
		||||
    """Returns the first positive env value found in the `env_keys` list or the default."""
 | 
			
		||||
    for e in env_keys:
 | 
			
		||||
        val = int(os.environ.get(e, -1))
 | 
			
		||||
        if val >= 0:
 | 
			
		||||
            return val
 | 
			
		||||
    return default
 | 
			
		||||
 
 | 
			
		||||
local_rank = get_int_from_env(["LOCAL_RANK","MPI_LOCALRANKID"], "0")
 | 
			
		||||
world_size = get_int_from_env(["WORLD_SIZE","PMI_SIZE"], "1")
 | 
			
		||||
port = get_int_from_env(["MASTER_PORT"], 29500)
 | 
			
		||||
os.environ["LOCAL_RANK"] = str(local_rank)
 | 
			
		||||
os.environ["WORLD_SIZE"] = str(world_size)
 | 
			
		||||
os.environ["RANK"] = str(local_rank)
 | 
			
		||||
os.environ["MASTER_PORT"] = str(port)
 | 
			
		||||
 | 
			
		||||
def train(
 | 
			
		||||
    # model/data params
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,15 +14,13 @@
 | 
			
		|||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
export CCL_ZE_IPC_EXCHANGE=sockets
 | 
			
		||||
export MASTER_ADDR=127.0.0.1
 | 
			
		||||
export OMP_NUM_THREADS=6 # adjust this to 1/4 of total physical cores
 | 
			
		||||
export FI_PROVIDER=tcp
 | 
			
		||||
export CCL_ATL_TRANSPORT=ofi
 | 
			
		||||
 | 
			
		||||
torchrun --standalone \
 | 
			
		||||
         --nnodes=1 \
 | 
			
		||||
         --nproc-per-node 2 \
 | 
			
		||||
         ./alpaca_qlora_finetuning.py \
 | 
			
		||||
         --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
         --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
         --output_dir "./bigdl-qlora-alpaca"
 | 
			
		||||
mpirun -n 2 \
 | 
			
		||||
       python -u ./alpaca_qlora_finetuning.py \
 | 
			
		||||
       --base_model /mnt/disk1/models/Llama-2-7b-hf \
 | 
			
		||||
       --data_path '/home/arda/binbin/dataset/alpaca-cleaned/alpaca_data_cleaned.json' \
 | 
			
		||||
       --output_dir "./bigdl-qlora-alpaca" > training.log
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,23 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
# You could also specify `--base_model` to the local path of the huggingface model checkpoint folder and `--data_path` to the local path of the dataset JSON file
 | 
			
		||||
python ./alpaca_qlora_finetuning.py \
 | 
			
		||||
    --micro_batch_size 2 \
 | 
			
		||||
    --batch_size 128 \
 | 
			
		||||
    --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
    --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
    --output_dir "./bigdl-qlora-alpaca"
 | 
			
		||||
| 
						 | 
				
			
			@ -14,17 +14,16 @@
 | 
			
		|||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
export CCL_ZE_IPC_EXCHANGE=sockets
 | 
			
		||||
export MASTER_ADDR=127.0.0.1
 | 
			
		||||
export OMP_NUM_THREADS=16 # adjust this to 1/4 of total physical cores
 | 
			
		||||
export OMP_NUM_THREADS=12 # adjust this to 1/4 of total physical cores
 | 
			
		||||
export FI_PROVIDER=tcp
 | 
			
		||||
export CCL_ATL_TRANSPORT=ofi
 | 
			
		||||
 | 
			
		||||
torchrun --standalone \
 | 
			
		||||
         --nnodes=1 \
 | 
			
		||||
         --nproc-per-node 3 \
 | 
			
		||||
         ./alpaca_qlora_finetuning.py \
 | 
			
		||||
         --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
         --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
         --output_dir "./bigdl-qlora-alpaca" \
 | 
			
		||||
         --gradient_checkpointing False \
 | 
			
		||||
         --micro_batch_size 2 \
 | 
			
		||||
         --batch_size 128
 | 
			
		||||
mpirun -n 3 \
 | 
			
		||||
       python -u ./alpaca_qlora_finetuning.py \
 | 
			
		||||
       --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
       --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
       --output_dir "./bigdl-qlora-alpaca" \
 | 
			
		||||
       --gradient_checkpointing False \
 | 
			
		||||
       --micro_batch_size 2 \
 | 
			
		||||
       --batch_size 128 > training.log
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,15 +14,15 @@
 | 
			
		|||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
export CCL_ZE_IPC_EXCHANGE=sockets
 | 
			
		||||
export MASTER_ADDR=127.0.0.1
 | 
			
		||||
export OMP_NUM_THREADS=28 # adjust this to 1/4 of total physical cores
 | 
			
		||||
torchrun --standalone \
 | 
			
		||||
         --nnodes=1 \
 | 
			
		||||
         --nproc-per-node 4 \
 | 
			
		||||
         ./alpaca_qlora_finetuning.py \
 | 
			
		||||
         --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
         --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
         --output_dir "./bigdl-qlora-alpaca" \
 | 
			
		||||
         --micro_batch_size 8 \
 | 
			
		||||
         --batch_size 128
 | 
			
		||||
export FI_PROVIDER=tcp
 | 
			
		||||
export CCL_ATL_TRANSPORT=ofi
 | 
			
		||||
 | 
			
		||||
mpirun -n 4 \
 | 
			
		||||
       python -u ./alpaca_qlora_finetuning.py \
 | 
			
		||||
       --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
       --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
       --output_dir "./bigdl-qlora-alpaca" \
 | 
			
		||||
       --micro_batch_size 8 \
 | 
			
		||||
       --batch_size 128 > training.log
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,28 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
export MASTER_ADDR=127.0.0.1
 | 
			
		||||
export OMP_NUM_THREADS=28 # adjust this to 1/4 of total physical cores
 | 
			
		||||
export FI_PROVIDER=tcp
 | 
			
		||||
export CCL_ATL_TRANSPORT=ofi
 | 
			
		||||
 | 
			
		||||
mpirun -n 2 \
 | 
			
		||||
       python -u ./alpaca_qlora_finetuning.py \
 | 
			
		||||
       --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
       --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
       --output_dir "./bigdl-qlora-alpaca" \
 | 
			
		||||
       --micro_batch_size 8 \
 | 
			
		||||
       --batch_size 128 > training.log
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,28 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
export MASTER_ADDR=127.0.0.1
 | 
			
		||||
export OMP_NUM_THREADS=28 # adjust this to 1/4 of total physical cores
 | 
			
		||||
export FI_PROVIDER=tcp
 | 
			
		||||
export CCL_ATL_TRANSPORT=ofi
 | 
			
		||||
 | 
			
		||||
mpirun -n 8 \
 | 
			
		||||
       python -u ./alpaca_qlora_finetuning.py \
 | 
			
		||||
       --base_model "meta-llama/Llama-2-7b-hf" \
 | 
			
		||||
       --data_path "yahma/alpaca-cleaned" \
 | 
			
		||||
       --output_dir "./bigdl-qlora-alpaca" \
 | 
			
		||||
       --micro_batch_size 8 \
 | 
			
		||||
       --batch_size 128 > training.log
 | 
			
		||||
		Loading…
	
		Reference in a new issue