LLM: make finetuning examples more common for other models (#10078)

2024-02-04 16:03:52 +08:00 · 2024-02-04 16:03:52 +08:00 · 7e49fbc5dd
commit 7e49fbc5dd
parent 90f004b80b
8 changed files with 27 additions and 19 deletions
--- a/python/llm/example/GPU/LLM-Finetuning/LoRA/alpaca_lora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/LoRA/alpaca_lora_finetuning.py
@ -39,7 +39,7 @@ import transformers
 from datasets import load_dataset
 import accelerate

-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
    get_peft_model_state_dict,
    set_peft_model_state_dict,
@ -161,6 +161,7 @@ def train(
            optimize_model=False,
            torch_dtype=torch.bfloat16,
            modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
        )
    else:
        model = AutoModelForCausalLM.from_pretrained(
@ -169,13 +170,14 @@ def train(
            optimize_model=False,
            torch_dtype=torch.bfloat16,
            modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
        )

    print(f"Model loaded on rank {os.environ.get('LOCAL_RANK')}")
    model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
    print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")

-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")

    tokenizer.pad_token_id = (
--- a/python/llm/example/GPU/LLM-Finetuning/LoRA/export_merged_model.py
+++ b/python/llm/example/GPU/LLM-Finetuning/LoRA/export_merged_model.py
@ -16,7 +16,7 @@
 import os

 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse

 current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
    adapter_path = args.adapter_path
    output_path = args.output_path
    
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    merge_adapter(base_model, tokenizer, adapter_path, output_path)
    print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
--- a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/alpaca_qalora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/alpaca_qalora_finetuning.py
@ -39,7 +39,7 @@ import transformers
 from datasets import load_dataset
 import accelerate

-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
    get_peft_model_state_dict,
    set_peft_model_state_dict,
@ -161,6 +161,7 @@ def train(
            optimize_model=False,
            torch_dtype=torch.bfloat16,
            modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
        )
    else:
        # Default 4-bit format for qa-lora is sym_int4
@ -172,7 +173,8 @@ def train(
            bnb_4bit_compute_dtype=torch.bfloat16
        )
        model = AutoModelForCausalLM.from_pretrained(base_model,
-                                                     quantization_config=bnb_config, )
+                                                     quantization_config=bnb_config,
+                                                     trust_remote_code=True,)
        # below is also supported
        # Load the base model from a directory or the HF Hub to 4-bit format
        # model = AutoModelForCausalLM.from_pretrained(
@ -187,7 +189,7 @@ def train(
    model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
    print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")

-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")

    tokenizer.pad_token_id = (
--- a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/export_merged_model.py
+++ b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/export_merged_model.py
@ -16,7 +16,7 @@
 import os

 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse

 current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
    adapter_path = args.adapter_path
    output_path = args.output_path
    
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    merge_adapter(base_model, tokenizer, adapter_path, output_path)
    print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
--- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/alpaca_qlora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/alpaca_qlora_finetuning.py
@ -39,7 +39,7 @@ import transformers
 from datasets import load_dataset
 import accelerate

-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
    get_peft_model_state_dict,
    set_peft_model_state_dict,
@ -161,6 +161,7 @@ def train(
            optimize_model=False,
            torch_dtype=torch.bfloat16,
            modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
        )
    else:
        # According to the QLoRA paper, using "nf4" could yield better model quality than "int4"
@ -172,7 +173,8 @@ def train(
            bnb_4bit_compute_dtype=torch.bfloat16
        )
        model = AutoModelForCausalLM.from_pretrained(base_model,
-                                                     quantization_config=bnb_config, )
+                                                     quantization_config=bnb_config,
+                                                     trust_remote_code=True)
        # below is also supported
        # Load the base model from a directory or the HF Hub to 4-bit format
        # model = AutoModelForCausalLM.from_pretrained(
@ -187,7 +189,7 @@ def train(
    model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
    print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")

-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")

    tokenizer.pad_token_id = (
--- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/export_merged_model.py
+++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/export_merged_model.py
@ -16,7 +16,7 @@
 import os

 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse

 current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
    adapter_path = args.adapter_path
    output_path = args.output_path
    
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    merge_adapter(base_model, tokenizer, adapter_path, output_path)
    print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
--- a/python/llm/example/GPU/LLM-Finetuning/ReLora/alpaca_relora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/ReLora/alpaca_relora_finetuning.py
@ -39,7 +39,7 @@ import transformers
 from datasets import load_dataset
 import accelerate

-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
    get_peft_model_state_dict,
    set_peft_model_state_dict,
@ -174,6 +174,7 @@ def train(
            optimize_model=False,
            torch_dtype=torch.bfloat16,
            modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
        )
    else:
        # use bnb_config for qlora/qalora/relora, which use 4bit for base model
@ -184,7 +185,8 @@ def train(
            bnb_4bit_compute_dtype=torch.bfloat16
        )
        model = AutoModelForCausalLM.from_pretrained(base_model,
-                                                     quantization_config=bnb_config, )
+                                                     quantization_config=bnb_config,
+                                                     trust_remote_code=True)
        # below is also supported
        # Load the base model from a directory or the HF Hub to 4-bit format
        # model = AutoModelForCausalLM.from_pretrained(
@ -199,7 +201,7 @@ def train(
    model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
    print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")

-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")

    tokenizer.pad_token_id = (
--- a/python/llm/example/GPU/LLM-Finetuning/ReLora/export_merged_model.py
+++ b/python/llm/example/GPU/LLM-Finetuning/ReLora/export_merged_model.py
@ -16,7 +16,7 @@
 import os

 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse

 current_dir = os.path.dirname(os.path.realpath(__file__))
@ -39,6 +39,6 @@ if __name__ == "__main__":
    adapter_path = args.adapter_path
    output_path = args.output_path
    
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    merge_adapter(base_model, tokenizer, adapter_path, output_path)
    print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')