diff --git a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md index b920d66d..42e5a959 100644 --- a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md +++ b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/README.md @@ -96,4 +96,34 @@ python ./quotes_qlora_finetuning_cpu.py \ --data_path "./english_quotes" \ --output_dir "./bigdl-qlora-alpaca" \ --prompt_template_name "english_quotes" +``` + + +### Guide to finetuning QLoRA using different models +Make sure you fully understand the entire finetune process and the model is the latest version. +Using [Baichuan-7B](https://huggingface.co/baichuan-inc/Baichuan-7B/tree/main) as an example: +1. Update the Tokenizer first. Because the base example is for llama model. +```bash +from transformers import LlamaTokenizer +AutoTokenizer.from_pretrained(base_model) +``` +2. Maybe some models need to add `trust_remote_code=True` in from_pretrained model and tokenizer +``` +tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained(base_model, xxxxx, trust_remote_code=True) +``` +3. Modify the `target_modules` according to the model you need to train, you can refer to [here](https://stackoverflow.com/questions/76768226/target-modules-for-applying-peft-lora-on-different-models/76779946#76779946). +Or just search for the recommended training target modules. +```bash +lora_target_modules: List[str] = ["W_pack"] +``` +4. Maybe need to change the `tokenizer.pad_token_id = tokenizer.eod_id` (Qwen) +5. (Only for baichuan) According to this [issue](https://github.com/baichuan-inc/Baichuan2/issues/204#issuecomment-1774372008), +need to modify the [tokenization_baichuan.py](https://huggingface.co/baichuan-inc/Baichuan-7B/blob/main/tokenization_baichuan.py#L74) to fix issue. +6. finetune as normal +7. Using the [export_merged_model.py](https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py) to merge. But also need to update tokenizer and model to ensure successful merge weight. +```bash +from transformers import AutoTokenizer # noqa: F402 +tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) +base_model = AutoModelForCausalLM.from_pretrained(base_model,trust_remote_code=True) ``` \ No newline at end of file diff --git a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/alpaca_qlora_finetuning_cpu.py b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/alpaca_qlora_finetuning_cpu.py index 7cfedaf5..dc96c166 100644 --- a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/alpaca_qlora_finetuning_cpu.py +++ b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/alpaca_qlora_finetuning_cpu.py @@ -51,6 +51,7 @@ from bigdl.llm.transformers import AutoModelForCausalLM # import them from bigdl.llm.transformers.qlora to get a BigDL-LLM compatible Peft model from bigdl.llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training +from bigdl.llm.utils.isa_checker import ISAChecker def get_int_from_env(env_keys, default): """Returns the first positive env value found in the `env_keys` list or the default.""" @@ -293,6 +294,9 @@ def train( else: train_data = data["train"].shuffle().map(generate_and_tokenize_prompt) val_data = None + + isa_checker = ISAChecker() + bf16_flag = isa_checker.check_avx512() args = transformers.TrainingArguments( per_device_train_batch_size=micro_batch_size, gradient_accumulation_steps=gradient_accumulation_steps, @@ -303,7 +307,7 @@ def train( num_train_epochs=num_epochs, learning_rate=learning_rate, lr_scheduler_type="cosine", - bf16=True, # ensure training more stable + bf16=bf16_flag, # ensure training more stable logging_steps=1, optim="adamw_torch", evaluation_strategy="steps" if val_set_size > 0 else "no", diff --git a/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py b/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py index 2df8fee1..97079671 100644 --- a/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py +++ b/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py @@ -58,9 +58,6 @@ if __name__ == "__main__": device_map={"": "cpu"}, ) - first_weight = base_model.model.layers[0].self_attn.q_proj.weight - first_weight_old = first_weight.clone() - lora_model = PeftModel.from_pretrained( base_model, adapter_path, @@ -68,10 +65,6 @@ if __name__ == "__main__": torch_dtype=torch.float16, ) - lora_weight = lora_model.base_model.model.model.layers[ - 0 - ].self_attn.q_proj.weight - # merge weights - new merging method from peft lora_model = lora_model.merge_and_unload()