update example to reduce peak memory usage (#12274)

This commit is contained in:
Ruonan Wang 2024-10-25 17:09:26 +08:00 committed by GitHub
parent e713296090
commit 854398f6e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -60,6 +60,7 @@ if __name__ == "__main__":
optimize_model=True,
pipeline=True,
max_output_len=args.max_output_len,
torch_dtype=torch.float16,
attn_implementation="eager")
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)