From 7e50ff113cedbee39c06f1926142fc4b8747b2c7 Mon Sep 17 00:00:00 2001 From: Qiyuan Gong Date: Thu, 14 Nov 2024 10:51:30 +0800 Subject: [PATCH] Add padding_token=eos_token for GPU trl QLora example (#12398) * Avoid tokenizer doesn't have a padding token error. --- .../GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py index 2e2551cd..4d1fb72c 100644 --- a/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py +++ b/python/llm/example/GPU/LLM-Finetuning/QLoRA/trl-example/qlora_finetuning.py @@ -45,6 +45,9 @@ if __name__ == "__main__": model_path = args.repo_id_or_model_path dataset_path = args.dataset tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True) + # Avoid tokenizer doesn't have a padding token + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token if dataset_path.endswith(".json") or dataset_path.endswith(".jsonl"): data = load_dataset("json", data_files=dataset_path)