diff --git a/.github/workflows/llm_unit_tests.yml b/.github/workflows/llm_unit_tests.yml index 77797414..1c6dab55 100644 --- a/.github/workflows/llm_unit_tests.yml +++ b/.github/workflows/llm_unit_tests.yml @@ -237,7 +237,7 @@ jobs: shell: bash run: | echo "DATASET_DIR=${ORIGIN_DIR}/../datasets" >> "$GITHUB_ENV" - echo "ABIRATE_ENGLISH_QUOTES_PATH=${ORIGIN_DIR}/../datasets/abirate_english_quotes" >> "$GITHUB_ENV" + echo "YAHMA_ALPACA_CLEANED_PATH=${ORIGIN_DIR}/../datasets/yahma_alpaca_cleaned" >> "$GITHUB_ENV" echo "SPEECH_DATASET_PATH=${ORIGIN_DIR}/../datasets/librispeech_asr_dummy" >> "$GITHUB_ENV" echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" @@ -308,9 +308,9 @@ jobs: if [ ! -d $DATASET_DIR ]; then mkdir -p $DATASET_DIR fi - if [ ! -d $ABIRATE_ENGLISH_QUOTES_PATH ]; then - echo "Directory $ABIRATE_ENGLISH_QUOTES_PATH not found. Downloading from FTP server..." - wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/abirate_english_quotes -P $DATASET_DIR + if [ ! -d $YAHMA_ALPACA_CLEANED_PATH ]; then + echo "Directory $YAHMA_ALPACA_CLEANED_PATH not found. Downloading from FTP server..." + wget -r -nH --no-verbose --cut-dirs=2 $LLM_FTP_URL/llm/datasets/yahma_alpaca_cleaned -P $DATASET_DIR fi if [ ! -d $SPEECH_DATASET_PATH ]; then echo "Directory $SPEECH_DATASET_PATH not found. Downloading from FTP server..." diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/README.md b/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/README.md index 9f79d466..43aa8146 100644 --- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/README.md @@ -8,7 +8,7 @@ To run this example with BigDL-LLM on Intel GPUs, we have some recommended requi ## Example: Finetune llama2-7b using qlora -This example is ported from [bnb-4bit-training](https://colab.research.google.com/drive/1VoYNfYDKcKRQRor98Zbf2-9VQTtGJ24k?usp=sharing). The `export_merged_model.py` is ported from [alpaca-lora](https://github.com/tloen/alpaca-lora/blob/main/export_hf_checkpoint.py). +This example is referred to [bnb-4bit-training](https://colab.research.google.com/drive/1VoYNfYDKcKRQRor98Zbf2-9VQTtGJ24k?usp=sharing) and utilizes a subset of [yahma/alpaca-cleaned](https://huggingface.co/datasets/yahma/alpaca-cleaned) for training. And the `export_merged_model.py` is ported from [alpaca-lora](https://github.com/tloen/alpaca-lora/blob/main/export_hf_checkpoint.py). ### 1. Install @@ -36,19 +36,19 @@ python ./qlora_finetuning.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH #### Sample Output ```log -{'loss': 1.6134, 'learning_rate': 0.0002, 'epoch': 0.03} -{'loss': 1.3038, 'learning_rate': 0.00017777777777777779, 'epoch': 0.06} -{'loss': 1.2634, 'learning_rate': 0.00015555555555555556, 'epoch': 0.1} -{'loss': 1.2389, 'learning_rate': 0.00013333333333333334, 'epoch': 0.13} -{'loss': 1.0399, 'learning_rate': 0.00011111111111111112, 'epoch': 0.16} -{'loss': 1.0406, 'learning_rate': 8.888888888888889e-05, 'epoch': 0.19} -{'loss': 1.3114, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.22} -{'loss': 0.9876, 'learning_rate': 4.4444444444444447e-05, 'epoch': 0.26} -{'loss': 1.1406, 'learning_rate': 2.2222222222222223e-05, 'epoch': 0.29} -{'loss': 1.1728, 'learning_rate': 0.0, 'epoch': 0.32} -{'train_runtime': 225.8005, 'train_samples_per_second': 3.543, 'train_steps_per_second': 0.886, 'train_loss': 1.211241865158081, 'epoch': 0.32} -100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [03:45<00:00, 1.13s/it] -TrainOutput(global_step=200, training_loss=1.211241865158081, metrics={'train_runtime': 225.8005, 'train_samples_per_second': 3.543, 'train_steps_per_second': 0.886, 'train_loss': 1.211241865158081, 'epoch': 0.32}) +{'loss': 1.7093, 'learning_rate': 2e-05, 'epoch': 0.02} +{'loss': 1.6595, 'learning_rate': 1.7777777777777777e-05, 'epoch': 0.03} +{'loss': 1.5172, 'learning_rate': 1.555555555555556e-05, 'epoch': 0.05} +{'loss': 1.3666, 'learning_rate': 1.3333333333333333e-05, 'epoch': 0.06} +{'loss': 1.2738, 'learning_rate': 1.1111111111111113e-05, 'epoch': 0.08} +{'loss': 1.2199, 'learning_rate': 8.888888888888888e-06, 'epoch': 0.09} +{'loss': 1.1703, 'learning_rate': 6.666666666666667e-06, 'epoch': 0.11} +{'loss': 1.108, 'learning_rate': 4.444444444444444e-06, 'epoch': 0.12} +{'loss': 1.1199, 'learning_rate': 2.222222222222222e-06, 'epoch': 0.14} +{'loss': 1.0668, 'learning_rate': 0.0, 'epoch': 0.15} +{'train_runtime': 279.3049, 'train_samples_per_second': 2.864, 'train_steps_per_second': 0.716, 'train_loss': 1.321143569946289, 'epoch': 0.15} +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [04:39<00:00, 1.40s/it] +TrainOutput(global_step=200, training_loss=1.321143569946289, metrics={'train_runtime': 279.3049, 'train_samples_per_second': 2.864, 'train_steps_per_second': 0.716, 'train_loss': 1.321143569946289, 'epoch': 0.15}) ``` ### 4. Merge the adapter into the original model diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/qlora_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/qlora_finetuning.py index 1a56e71c..2435e797 100644 --- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/qlora_finetuning.py +++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/qlora_finetuning.py @@ -26,22 +26,38 @@ from bigdl.llm.transformers import AutoModelForCausalLM from datasets import load_dataset import argparse +current_dir = os.path.dirname(os.path.realpath(__file__)) +common_util_path = os.path.join(current_dir, '..', '..') +import sys +sys.path.append(common_util_path) +from common.utils import Prompter, get_train_val_data + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Simple example of how to qlora finetune llama2 model using bigdl-llm') parser.add_argument('--repo-id-or-model-path', type=str, default="meta-llama/Llama-2-7b-hf", help='The huggingface repo id for the Llama2 (e.g. `meta-llama/Llama-2-7b-hf` and `meta-llama/Llama-2-13b-chat-hf`) to be downloaded' ', or the path to the huggingface checkpoint folder') - parser.add_argument('--dataset', type=str, default="Abirate/english_quotes") + parser.add_argument('--dataset', type=str, default="yahma/alpaca-cleaned") args = parser.parse_args() model_path = args.repo_id_or_model_path dataset_path = args.dataset tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True) - data = load_dataset(dataset_path) - data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) + if dataset_path.endswith(".json") or dataset_path.endswith(".jsonl"): + data = load_dataset("json", data_files=dataset_path) + else: + data = load_dataset(dataset_path) + + # For illustration purpose, only use part of data to train + data = data["train"].train_test_split(train_size=0.1, shuffle=False) + # Data processing + prompter = Prompter("alpaca") + train_data, _ = get_train_val_data(data, tokenizer, prompter, train_on_inputs=True, + add_eos_token=False, cutoff_len=256, val_set_size=0, seed=42) + bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=False, @@ -76,7 +92,7 @@ if __name__ == "__main__": tokenizer.padding_side = "left" trainer = transformers.Trainer( model=model, - train_dataset=data["train"], + train_dataset=train_data, args=transformers.TrainingArguments( per_device_train_batch_size=4, gradient_accumulation_steps= 1, @@ -90,7 +106,9 @@ if __name__ == "__main__": optim="adamw_hf", # paged_adamw_8bit is not supported yet # gradient_checkpointing=True, # can further reduce memory but slower ), - data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), + data_collator=transformers.DataCollatorForSeq2Seq( + tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True + ), ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! result = trainer.train() diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/README.md b/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/README.md index d17ca368..353a8b10 100644 --- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/README.md @@ -8,7 +8,7 @@ To run this example with BigDL-LLM on Intel GPUs, we have some recommended requi ## Example: Finetune llama2-7b using qlora -The `export_merged_model.py` is ported from [alpaca-lora](https://github.com/tloen/alpaca-lora/blob/main/export_hf_checkpoint.py). +This example utilizes a subset of [yahma/alpaca-cleaned](https://huggingface.co/datasets/yahma/alpaca-cleaned) for training. And the `export_merged_model.py` is ported from [alpaca-lora](https://github.com/tloen/alpaca-lora/blob/main/export_hf_checkpoint.py). ### 1. Install @@ -36,14 +36,19 @@ python ./qlora_finetuning.py --repo-id-or-model-path REPO_ID_OR_MODEL_PATH #### Sample Output ```log -{'loss': 1.7386, 'learning_rate': 8.888888888888888e-06, 'epoch': 0.19} -{'loss': 1.9242, 'learning_rate': 6.666666666666667e-06, 'epoch': 0.22} -{'loss': 1.6819, 'learning_rate': 4.444444444444444e-06, 'epoch': 0.26} -{'loss': 1.755, 'learning_rate': 2.222222222222222e-06, 'epoch': 0.29} -{'loss': 1.7455, 'learning_rate': 0.0, 'epoch': 0.32} -{'train_runtime': 172.8523, 'train_samples_per_second': 4.628, 'train_steps_per_second': 1.157, 'train_loss': 1.9101631927490235, 'epoch': 0.32} -100%|████████████████████████████████████████████| 200/200 [02:52<00:00, 1.16it/s] -TrainOutput(global_step=200, training_loss=1.9101631927490235, metrics={'train_runtime': 172.8523, 'train_samples_per_second': 4.628, 'train_steps_per_second': 1.157, 'train_loss': 1.9101631927490235, 'epoch': 0.32}) +{'loss': 3.1898, 'learning_rate': 2e-05, 'epoch': 0.02} +{'loss': 3.1854, 'learning_rate': 1.7777777777777777e-05, 'epoch': 0.03} +{'loss': 3.0359, 'learning_rate': 1.555555555555556e-05, 'epoch': 0.05} +{'loss': 2.9661, 'learning_rate': 1.3333333333333333e-05, 'epoch': 0.06} +{'loss': 2.7779, 'learning_rate': 1.1111111111111113e-05, 'epoch': 0.08} +{'loss': 2.7795, 'learning_rate': 8.888888888888888e-06, 'epoch': 0.09} +{'loss': 2.5149, 'learning_rate': 6.666666666666667e-06, 'epoch': 0.11} +{'loss': 2.5759, 'learning_rate': 4.444444444444444e-06, 'epoch': 0.12} +{'loss': 2.5976, 'learning_rate': 2.222222222222222e-06, 'epoch': 0.14} +{'loss': 2.5744, 'learning_rate': 0.0, 'epoch': 0.15} +{'train_runtime': 116.1914, 'train_samples_per_second': 6.885, 'train_steps_per_second': 1.721, 'train_loss': 2.819730052947998, 'epoch': 0.15} +100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [01:56<00:00, 1.72it/s] +TrainOutput(global_step=200, training_loss=2.819730052947998, metrics={'train_runtime': 116.1914, 'train_samples_per_second': 6.885, 'train_steps_per_second': 1.721, 'train_loss': 2.819730052947998, 'epoch': 0.15}) ``` ### 4. Merge the adapter into the original model diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py index eb34db48..db1f0656 100644 --- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py +++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py @@ -27,20 +27,37 @@ from datasets import load_dataset from trl import SFTTrainer import argparse +current_dir = os.path.dirname(os.path.realpath(__file__)) +common_util_path = os.path.join(current_dir, '..', '..') +import sys +sys.path.append(common_util_path) +from common.utils import Prompter, get_train_val_data + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Simple example of how to qlora finetune llama2 model using bigdl-llm and TRL') parser.add_argument('--repo-id-or-model-path', type=str, default="meta-llama/Llama-2-7b-hf", help='The huggingface repo id for the Llama2 (e.g. `meta-llama/Llama-2-7b-hf` and `meta-llama/Llama-2-13b-chat-hf`) to be downloaded' ', or the path to the huggingface checkpoint folder') - parser.add_argument('--dataset', type=str, default="Abirate/english_quotes") + parser.add_argument('--dataset', type=str, default="yahma/alpaca-cleaned") args = parser.parse_args() model_path = args.repo_id_or_model_path dataset_path = args.dataset tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True) - data = load_dataset(dataset_path, split="train") + if dataset_path.endswith(".json") or dataset_path.endswith(".jsonl"): + data = load_dataset("json", data_files=dataset_path) + else: + data = load_dataset(dataset_path) + + # For illustration purpose, only use part of data to train + data = data["train"].train_test_split(train_size=0.1, shuffle=False) + + # Data processing + prompter = Prompter("alpaca") + train_data, _ = get_train_val_data(data, tokenizer, prompter, train_on_inputs=True, + add_eos_token=False, cutoff_len=256, val_set_size=0, seed=42) bnb_config = BitsAndBytesConfig( load_in_4bit=True, @@ -73,7 +90,7 @@ if __name__ == "__main__": trainer = SFTTrainer( model=model, - train_dataset=data, + train_dataset=train_data, args=transformers.TrainingArguments( per_device_train_batch_size=4, gradient_accumulation_steps= 1, @@ -87,7 +104,7 @@ if __name__ == "__main__": optim="adamw_hf", # paged_adamw_8bit is not supported yet gradient_checkpointing=True, # can further reduce memory but slower ), - dataset_text_field="quote", + dataset_text_field="instruction", ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! result = trainer.train() diff --git a/python/llm/test/run-llm-example-tests-gpu.sh b/python/llm/test/run-llm-example-tests-gpu.sh index 40997ed1..be03a486 100644 --- a/python/llm/test/run-llm-example-tests-gpu.sh +++ b/python/llm/test/run-llm-example-tests-gpu.sh @@ -12,7 +12,7 @@ sed -i 's/max_steps=200/max_steps=2/; s/save_steps=100/save_steps=2/; s/logging_ python ${ANALYTICS_ZOO_ROOT}/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/qlora_finetuning.py \ --repo-id-or-model-path ${LLAMA2_7B_ORIGIN_PATH} \ ---dataset ${ABIRATE_ENGLISH_QUOTES_PATH} +--dataset ${YAHMA_ALPACA_CLEANED_PATH} python ${ANALYTICS_ZOO_ROOT}/python/llm/example/GPU/LLM-Finetuning/QLoRA/simple-example/export_merged_model.py \ --repo-id-or-model-path ${LLAMA2_7B_ORIGIN_PATH} \