Add padding_token=eos_token for GPU trl QLora example (#12398)
* Avoid tokenizer doesn't have a padding token error.
This commit is contained in:
parent
d2cbcb060c
commit
7e50ff113c
1 changed files with 3 additions and 0 deletions
|
|
@ -45,6 +45,9 @@ if __name__ == "__main__":
|
||||||
model_path = args.repo_id_or_model_path
|
model_path = args.repo_id_or_model_path
|
||||||
dataset_path = args.dataset
|
dataset_path = args.dataset
|
||||||
tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
# Avoid tokenizer doesn't have a padding token
|
||||||
|
if tokenizer.pad_token is None:
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
|
|
||||||
if dataset_path.endswith(".json") or dataset_path.endswith(".jsonl"):
|
if dataset_path.endswith(".json") or dataset_path.endswith(".jsonl"):
|
||||||
data = load_dataset("json", data_files=dataset_path)
|
data = load_dataset("json", data_files=dataset_path)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue