[NPU] Fix load-low-bit benchmark script (#12502)

This commit is contained in:
binbin Deng 2024-12-05 10:01:32 +08:00 committed by GitHub
parent 84f1c4ad57
commit f56a111aa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -789,6 +789,7 @@ def run_transformer_int4_loadlowbit_npu_win(repo_id,
load_time = end - st
print(">> loading of model costs {}s".format(load_time))
if not hasattr(model, "model_ptr"):
model = BenchmarkWrapper(model)
result = {}
@ -802,6 +803,7 @@ def run_transformer_int4_loadlowbit_npu_win(repo_id,
# slice the input_ids to ensure the prompt length is required length.
input_ids = tokenizer.encode(input_str, return_tensors="pt")
input_ids = input_ids[:, :in_len]
if repo_id not in MINICPM_IDS:
true_str = tokenizer.batch_decode(input_ids)[0]
input_list = [true_str] * batch_size
input_ids = tokenizer(input_list, return_tensors="pt").input_ids