diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 56d45796..138adc48 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -127,6 +127,8 @@ def run_transformer_int4(repo_id, # slice the input_ids to ensure the prompt length is required length. input_ids = tokenizer.encode(input_str, return_tensors="pt") input_ids = input_ids[:, :in_len] + true_str = tokenizer.batch_decode(input_ids)[0] + input_ids = tokenizer.encode(true_str, return_tensors="pt") result[in_out] = [] for i in range(num_trials + warm_up): st = time.perf_counter()