LLM: make finetuning examples more common for other models (#10078)
This commit is contained in:
parent
90f004b80b
commit
7e49fbc5dd
8 changed files with 27 additions and 19 deletions
|
|
@ -39,7 +39,7 @@ import transformers
|
|||
from datasets import load_dataset
|
||||
import accelerate
|
||||
|
||||
from transformers import LlamaTokenizer
|
||||
from transformers import AutoTokenizer
|
||||
from peft import (
|
||||
get_peft_model_state_dict,
|
||||
set_peft_model_state_dict,
|
||||
|
|
@ -161,6 +161,7 @@ def train(
|
|||
optimize_model=False,
|
||||
torch_dtype=torch.bfloat16,
|
||||
modules_to_not_convert=["lm_head"],
|
||||
trust_remote_code=True,
|
||||
)
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
|
|
@ -169,13 +170,14 @@ def train(
|
|||
optimize_model=False,
|
||||
torch_dtype=torch.bfloat16,
|
||||
modules_to_not_convert=["lm_head"],
|
||||
trust_remote_code=True,
|
||||
)
|
||||
|
||||
print(f"Model loaded on rank {os.environ.get('LOCAL_RANK')}")
|
||||
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
|
||||
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer.pad_token_id = (
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
import os
|
||||
|
||||
import torch
|
||||
from transformers import LlamaTokenizer # noqa: F402
|
||||
from transformers import AutoTokenizer
|
||||
import argparse
|
||||
|
||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
|
@ -39,6 +39,6 @@ if __name__ == "__main__":
|
|||
adapter_path = args.adapter_path
|
||||
output_path = args.output_path
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
merge_adapter(base_model, tokenizer, adapter_path, output_path)
|
||||
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import transformers
|
|||
from datasets import load_dataset
|
||||
import accelerate
|
||||
|
||||
from transformers import LlamaTokenizer
|
||||
from transformers import AutoTokenizer
|
||||
from peft import (
|
||||
get_peft_model_state_dict,
|
||||
set_peft_model_state_dict,
|
||||
|
|
@ -161,6 +161,7 @@ def train(
|
|||
optimize_model=False,
|
||||
torch_dtype=torch.bfloat16,
|
||||
modules_to_not_convert=["lm_head"],
|
||||
trust_remote_code=True,
|
||||
)
|
||||
else:
|
||||
# Default 4-bit format for qa-lora is sym_int4
|
||||
|
|
@ -172,7 +173,8 @@ def train(
|
|||
bnb_4bit_compute_dtype=torch.bfloat16
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(base_model,
|
||||
quantization_config=bnb_config, )
|
||||
quantization_config=bnb_config,
|
||||
trust_remote_code=True,)
|
||||
# below is also supported
|
||||
# Load the base model from a directory or the HF Hub to 4-bit format
|
||||
# model = AutoModelForCausalLM.from_pretrained(
|
||||
|
|
@ -187,7 +189,7 @@ def train(
|
|||
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
|
||||
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer.pad_token_id = (
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
import os
|
||||
|
||||
import torch
|
||||
from transformers import LlamaTokenizer # noqa: F402
|
||||
from transformers import AutoTokenizer
|
||||
import argparse
|
||||
|
||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
|
@ -39,6 +39,6 @@ if __name__ == "__main__":
|
|||
adapter_path = args.adapter_path
|
||||
output_path = args.output_path
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
merge_adapter(base_model, tokenizer, adapter_path, output_path)
|
||||
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import transformers
|
|||
from datasets import load_dataset
|
||||
import accelerate
|
||||
|
||||
from transformers import LlamaTokenizer
|
||||
from transformers import AutoTokenizer
|
||||
from peft import (
|
||||
get_peft_model_state_dict,
|
||||
set_peft_model_state_dict,
|
||||
|
|
@ -161,6 +161,7 @@ def train(
|
|||
optimize_model=False,
|
||||
torch_dtype=torch.bfloat16,
|
||||
modules_to_not_convert=["lm_head"],
|
||||
trust_remote_code=True,
|
||||
)
|
||||
else:
|
||||
# According to the QLoRA paper, using "nf4" could yield better model quality than "int4"
|
||||
|
|
@ -172,7 +173,8 @@ def train(
|
|||
bnb_4bit_compute_dtype=torch.bfloat16
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(base_model,
|
||||
quantization_config=bnb_config, )
|
||||
quantization_config=bnb_config,
|
||||
trust_remote_code=True)
|
||||
# below is also supported
|
||||
# Load the base model from a directory or the HF Hub to 4-bit format
|
||||
# model = AutoModelForCausalLM.from_pretrained(
|
||||
|
|
@ -187,7 +189,7 @@ def train(
|
|||
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
|
||||
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer.pad_token_id = (
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
import os
|
||||
|
||||
import torch
|
||||
from transformers import LlamaTokenizer # noqa: F402
|
||||
from transformers import AutoTokenizer
|
||||
import argparse
|
||||
|
||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
|
@ -39,6 +39,6 @@ if __name__ == "__main__":
|
|||
adapter_path = args.adapter_path
|
||||
output_path = args.output_path
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
merge_adapter(base_model, tokenizer, adapter_path, output_path)
|
||||
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import transformers
|
|||
from datasets import load_dataset
|
||||
import accelerate
|
||||
|
||||
from transformers import LlamaTokenizer
|
||||
from transformers import AutoTokenizer
|
||||
from peft import (
|
||||
get_peft_model_state_dict,
|
||||
set_peft_model_state_dict,
|
||||
|
|
@ -174,6 +174,7 @@ def train(
|
|||
optimize_model=False,
|
||||
torch_dtype=torch.bfloat16,
|
||||
modules_to_not_convert=["lm_head"],
|
||||
trust_remote_code=True,
|
||||
)
|
||||
else:
|
||||
# use bnb_config for qlora/qalora/relora, which use 4bit for base model
|
||||
|
|
@ -184,7 +185,8 @@ def train(
|
|||
bnb_4bit_compute_dtype=torch.bfloat16
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(base_model,
|
||||
quantization_config=bnb_config, )
|
||||
quantization_config=bnb_config,
|
||||
trust_remote_code=True)
|
||||
# below is also supported
|
||||
# Load the base model from a directory or the HF Hub to 4-bit format
|
||||
# model = AutoModelForCausalLM.from_pretrained(
|
||||
|
|
@ -199,7 +201,7 @@ def train(
|
|||
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
|
||||
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
|
||||
|
||||
tokenizer.pad_token_id = (
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
import os
|
||||
|
||||
import torch
|
||||
from transformers import LlamaTokenizer # noqa: F402
|
||||
from transformers import AutoTokenizer
|
||||
import argparse
|
||||
|
||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
|
@ -39,6 +39,6 @@ if __name__ == "__main__":
|
|||
adapter_path = args.adapter_path
|
||||
output_path = args.output_path
|
||||
|
||||
tokenizer = LlamaTokenizer.from_pretrained(base_model)
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
merge_adapter(base_model, tokenizer, adapter_path, output_path)
|
||||
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
|
||||
|
|
|
|||
Loading…
Reference in a new issue