Fix LoRA tokenizer for Llama and chatglm (#11186)

* Set pad_token to eos_token if it's None. Otherwise, use model config.
2024-06-03 15:35:38 +08:00 · 2024-06-03 15:35:38 +08:00 · 15a6205790
commit 15a6205790
parent 3eb13ccd8c
4 changed files with 14 additions and 15 deletions
--- a/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/alpaca_qlora_finetuning_cpu.py
+++ b/python/llm/example/CPU/QLoRA-FineTuning/alpaca-qlora/alpaca_qlora_finetuning_cpu.py
@ -39,7 +39,7 @@ import transformers
 from datasets import load_dataset
 import accelerate
-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
    get_peft_model_state_dict,
    set_peft_model_state_dict,
@ -198,13 +198,12 @@ def train(
    model = model.to("cpu")
    print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
-    tokenizer.pad_token_id = (
+    # For Llama family
-        0  # unk. we want this to be different from the eos token
+    if tokenizer.pad_token is None:
-    )
+        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "left"  # Allow batched inference
    print(model)
--- a/python/llm/example/GPU/LLM-Finetuning/LoRA/alpaca_lora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/LoRA/alpaca_lora_finetuning.py
@ -180,10 +180,9 @@ def train(
    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
-    tokenizer.pad_token_id = (
+    # For Llama family
-        0  # unk. we want this to be different from the eos token
+    if tokenizer.pad_token is None:
-    )
+        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "left"  # Allow batched inference
    print(model)
--- a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/alpaca_qalora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/alpaca_qalora_finetuning.py
@ -192,11 +192,9 @@ def train(
    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
-    tokenizer.pad_token_id = (
+    # For Llama family
-        0  # unk. we want this to be different from the eos token
+    if tokenizer.pad_token is None:
-    )
+        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "left"  # Allow batched inference
    print(model)
    # Prepare a IPEX-LLM compatible Peft model
--- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/alpaca_qlora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/alpaca_qlora_finetuning.py
@ -192,6 +192,9 @@ def train(
    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
    print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
    # For Llama family
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    print(model)
    # Prepare a IPEX-LLM compatible Peft model