LLM: Add cpu qlora support other models guide (#9567)
* use bf16 flag * add using baichuan model * update merge * remove * update
This commit is contained in:
parent
bda404fc8f
commit
ed0dc57c6e
3 changed files with 35 additions and 8 deletions
|
|
@ -96,4 +96,34 @@ python ./quotes_qlora_finetuning_cpu.py \
|
||||||
--data_path "./english_quotes" \
|
--data_path "./english_quotes" \
|
||||||
--output_dir "./bigdl-qlora-alpaca" \
|
--output_dir "./bigdl-qlora-alpaca" \
|
||||||
--prompt_template_name "english_quotes"
|
--prompt_template_name "english_quotes"
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Guide to finetuning QLoRA using different models
|
||||||
|
Make sure you fully understand the entire finetune process and the model is the latest version.
|
||||||
|
Using [Baichuan-7B](https://huggingface.co/baichuan-inc/Baichuan-7B/tree/main) as an example:
|
||||||
|
1. Update the Tokenizer first. Because the base example is for llama model.
|
||||||
|
```bash
|
||||||
|
from transformers import LlamaTokenizer
|
||||||
|
AutoTokenizer.from_pretrained(base_model)
|
||||||
|
```
|
||||||
|
2. Maybe some models need to add `trust_remote_code=True` in from_pretrained model and tokenizer
|
||||||
|
```
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(base_model, xxxxx, trust_remote_code=True)
|
||||||
|
```
|
||||||
|
3. Modify the `target_modules` according to the model you need to train, you can refer to [here](https://stackoverflow.com/questions/76768226/target-modules-for-applying-peft-lora-on-different-models/76779946#76779946).
|
||||||
|
Or just search for the recommended training target modules.
|
||||||
|
```bash
|
||||||
|
lora_target_modules: List[str] = ["W_pack"]
|
||||||
|
```
|
||||||
|
4. Maybe need to change the `tokenizer.pad_token_id = tokenizer.eod_id` (Qwen)
|
||||||
|
5. (Only for baichuan) According to this [issue](https://github.com/baichuan-inc/Baichuan2/issues/204#issuecomment-1774372008),
|
||||||
|
need to modify the [tokenization_baichuan.py](https://huggingface.co/baichuan-inc/Baichuan-7B/blob/main/tokenization_baichuan.py#L74) to fix issue.
|
||||||
|
6. finetune as normal
|
||||||
|
7. Using the [export_merged_model.py](https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/GPU/QLoRA-FineTuning/export_merged_model.py) to merge. But also need to update tokenizer and model to ensure successful merge weight.
|
||||||
|
```bash
|
||||||
|
from transformers import AutoTokenizer # noqa: F402
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||||
|
base_model = AutoModelForCausalLM.from_pretrained(base_model,trust_remote_code=True)
|
||||||
```
|
```
|
||||||
|
|
@ -51,6 +51,7 @@ from bigdl.llm.transformers import AutoModelForCausalLM
|
||||||
|
|
||||||
# import them from bigdl.llm.transformers.qlora to get a BigDL-LLM compatible Peft model
|
# import them from bigdl.llm.transformers.qlora to get a BigDL-LLM compatible Peft model
|
||||||
from bigdl.llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training
|
from bigdl.llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training
|
||||||
|
from bigdl.llm.utils.isa_checker import ISAChecker
|
||||||
|
|
||||||
def get_int_from_env(env_keys, default):
|
def get_int_from_env(env_keys, default):
|
||||||
"""Returns the first positive env value found in the `env_keys` list or the default."""
|
"""Returns the first positive env value found in the `env_keys` list or the default."""
|
||||||
|
|
@ -293,6 +294,9 @@ def train(
|
||||||
else:
|
else:
|
||||||
train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
|
train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
|
||||||
val_data = None
|
val_data = None
|
||||||
|
|
||||||
|
isa_checker = ISAChecker()
|
||||||
|
bf16_flag = isa_checker.check_avx512()
|
||||||
args = transformers.TrainingArguments(
|
args = transformers.TrainingArguments(
|
||||||
per_device_train_batch_size=micro_batch_size,
|
per_device_train_batch_size=micro_batch_size,
|
||||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||||
|
|
@ -303,7 +307,7 @@ def train(
|
||||||
num_train_epochs=num_epochs,
|
num_train_epochs=num_epochs,
|
||||||
learning_rate=learning_rate,
|
learning_rate=learning_rate,
|
||||||
lr_scheduler_type="cosine",
|
lr_scheduler_type="cosine",
|
||||||
bf16=True, # ensure training more stable
|
bf16=bf16_flag, # ensure training more stable
|
||||||
logging_steps=1,
|
logging_steps=1,
|
||||||
optim="adamw_torch",
|
optim="adamw_torch",
|
||||||
evaluation_strategy="steps" if val_set_size > 0 else "no",
|
evaluation_strategy="steps" if val_set_size > 0 else "no",
|
||||||
|
|
|
||||||
|
|
@ -58,9 +58,6 @@ if __name__ == "__main__":
|
||||||
device_map={"": "cpu"},
|
device_map={"": "cpu"},
|
||||||
)
|
)
|
||||||
|
|
||||||
first_weight = base_model.model.layers[0].self_attn.q_proj.weight
|
|
||||||
first_weight_old = first_weight.clone()
|
|
||||||
|
|
||||||
lora_model = PeftModel.from_pretrained(
|
lora_model = PeftModel.from_pretrained(
|
||||||
base_model,
|
base_model,
|
||||||
adapter_path,
|
adapter_path,
|
||||||
|
|
@ -68,10 +65,6 @@ if __name__ == "__main__":
|
||||||
torch_dtype=torch.float16,
|
torch_dtype=torch.float16,
|
||||||
)
|
)
|
||||||
|
|
||||||
lora_weight = lora_model.base_model.model.model.layers[
|
|
||||||
0
|
|
||||||
].self_attn.q_proj.weight
|
|
||||||
|
|
||||||
# merge weights - new merging method from peft
|
# merge weights - new merging method from peft
|
||||||
lora_model = lora_model.merge_and_unload()
|
lora_model = lora_model.merge_and_unload()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue