Add padding_token=eos_token for GPU trl QLora example (#12398)

* Avoid tokenizer doesn't have a padding token error.
2024-11-14 10:51:30 +08:00 · 2024-11-14 10:51:30 +08:00 · 7e50ff113c
commit 7e50ff113c
parent d2cbcb060c
1 changed files with 3 additions and 0 deletions
--- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py
+++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py
@ -45,6 +45,9 @@ if __name__ == "__main__":
    model_path = args.repo_id_or_model_path
    dataset_path = args.dataset
    tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    # Avoid tokenizer doesn't have a padding token
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token

    if dataset_path.endswith(".json") or dataset_path.endswith(".jsonl"):
        data = load_dataset("json", data_files=dataset_path)