LLM: make finetuning examples more common for other models (#10078)

This commit is contained in:
binbin Deng 2024-02-04 16:03:52 +08:00 committed by GitHub
parent 90f004b80b
commit 7e49fbc5dd
8 changed files with 27 additions and 19 deletions

View file

@ -39,7 +39,7 @@ import transformers
from datasets import load_dataset
import accelerate
from transformers import LlamaTokenizer
from transformers import AutoTokenizer
from peft import (
get_peft_model_state_dict,
set_peft_model_state_dict,
@ -161,6 +161,7 @@ def train(
optimize_model=False,
torch_dtype=torch.bfloat16,
modules_to_not_convert=["lm_head"],
trust_remote_code=True,
)
else:
model = AutoModelForCausalLM.from_pretrained(
@ -169,13 +170,14 @@ def train(
optimize_model=False,
torch_dtype=torch.bfloat16,
modules_to_not_convert=["lm_head"],
trust_remote_code=True,
)
print(f"Model loaded on rank {os.environ.get('LOCAL_RANK')}")
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
tokenizer.pad_token_id = (

View file

@ -16,7 +16,7 @@
import os
import torch
from transformers import LlamaTokenizer # noqa: F402
from transformers import AutoTokenizer
import argparse
current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
adapter_path = args.adapter_path
output_path = args.output_path
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
merge_adapter(base_model, tokenizer, adapter_path, output_path)
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

View file

@ -39,7 +39,7 @@ import transformers
from datasets import load_dataset
import accelerate
from transformers import LlamaTokenizer
from transformers import AutoTokenizer
from peft import (
get_peft_model_state_dict,
set_peft_model_state_dict,
@ -161,6 +161,7 @@ def train(
optimize_model=False,
torch_dtype=torch.bfloat16,
modules_to_not_convert=["lm_head"],
trust_remote_code=True,
)
else:
# Default 4-bit format for qa-lora is sym_int4
@ -172,7 +173,8 @@ def train(
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(base_model,
quantization_config=bnb_config, )
quantization_config=bnb_config,
trust_remote_code=True,)
# below is also supported
# Load the base model from a directory or the HF Hub to 4-bit format
# model = AutoModelForCausalLM.from_pretrained(
@ -187,7 +189,7 @@ def train(
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
tokenizer.pad_token_id = (

View file

@ -16,7 +16,7 @@
import os
import torch
from transformers import LlamaTokenizer # noqa: F402
from transformers import AutoTokenizer
import argparse
current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
adapter_path = args.adapter_path
output_path = args.output_path
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
merge_adapter(base_model, tokenizer, adapter_path, output_path)
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

View file

@ -39,7 +39,7 @@ import transformers
from datasets import load_dataset
import accelerate
from transformers import LlamaTokenizer
from transformers import AutoTokenizer
from peft import (
get_peft_model_state_dict,
set_peft_model_state_dict,
@ -161,6 +161,7 @@ def train(
optimize_model=False,
torch_dtype=torch.bfloat16,
modules_to_not_convert=["lm_head"],
trust_remote_code=True,
)
else:
# According to the QLoRA paper, using "nf4" could yield better model quality than "int4"
@ -172,7 +173,8 @@ def train(
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(base_model,
quantization_config=bnb_config, )
quantization_config=bnb_config,
trust_remote_code=True)
# below is also supported
# Load the base model from a directory or the HF Hub to 4-bit format
# model = AutoModelForCausalLM.from_pretrained(
@ -187,7 +189,7 @@ def train(
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
tokenizer.pad_token_id = (

View file

@ -16,7 +16,7 @@
import os
import torch
from transformers import LlamaTokenizer # noqa: F402
from transformers import AutoTokenizer
import argparse
current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
adapter_path = args.adapter_path
output_path = args.output_path
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
merge_adapter(base_model, tokenizer, adapter_path, output_path)
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

View file

@ -39,7 +39,7 @@ import transformers
from datasets import load_dataset
import accelerate
from transformers import LlamaTokenizer
from transformers import AutoTokenizer
from peft import (
get_peft_model_state_dict,
set_peft_model_state_dict,
@ -174,6 +174,7 @@ def train(
optimize_model=False,
torch_dtype=torch.bfloat16,
modules_to_not_convert=["lm_head"],
trust_remote_code=True,
)
else:
# use bnb_config for qlora/qalora/relora, which use 4bit for base model
@ -184,7 +185,8 @@ def train(
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(base_model,
quantization_config=bnb_config, )
quantization_config=bnb_config,
trust_remote_code=True)
# below is also supported
# Load the base model from a directory or the HF Hub to 4-bit format
# model = AutoModelForCausalLM.from_pretrained(
@ -199,7 +201,7 @@ def train(
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
tokenizer.pad_token_id = (

View file

@ -16,7 +16,7 @@
import os
import torch
from transformers import LlamaTokenizer # noqa: F402
from transformers import AutoTokenizer
import argparse
current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
adapter_path = args.adapter_path
output_path = args.output_path
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
merge_adapter(base_model, tokenizer, adapter_path, output_path)
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')