[NPU] update convert script based on latest usage (#12617)

This commit is contained in:
Ruonan Wang 2024-12-25 19:23:04 -08:00 committed by GitHub
parent ef585d3360
commit d841e1dc0d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -60,7 +60,6 @@ if __name__ == "__main__":
t0 = time.perf_counter() t0 = time.perf_counter()
model = AutoModelForCausalLM.from_pretrained(model_path, model = AutoModelForCausalLM.from_pretrained(model_path,
optimize_model=True, optimize_model=True,
pipeline=True,
load_in_low_bit=args.low_bit, load_in_low_bit=args.low_bit,
max_context_len=args.max_context_len, max_context_len=args.max_context_len,
max_prompt_len=args.max_prompt_len, max_prompt_len=args.max_prompt_len,
@ -69,7 +68,6 @@ if __name__ == "__main__":
attn_implementation="eager", attn_implementation="eager",
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True, trust_remote_code=True,
convert_model=True,
save_directory=save_dir) save_directory=save_dir)
t1 = time.perf_counter() t1 = time.perf_counter()