From 7952bbc9192f7e5044e9c357d98023ae5e99e498 Mon Sep 17 00:00:00 2001 From: Xin Qiu Date: Fri, 26 Jan 2024 15:48:48 +0800 Subject: [PATCH] add conf batch_size to run_model (#10010) --- python/llm/dev/benchmark/all-in-one/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index bc662f4a..47391489 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -941,7 +941,7 @@ if __name__ == '__main__': if model_id_input in excludes: in_out_pairs.remove(in_out) run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'], - conf['low_bit'], conf['cpu_embedding']) + conf['low_bit'], conf['cpu_embedding'], conf['batch_size']) df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)', 'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding', 'peak mem (GB)'])