LLM: add batch_size to the csv and html (#10080)
* LLM: add batch_size to the csv and html * small fix
This commit is contained in:
parent
136f042f84
commit
c2e562d037
1 changed files with 4 additions and 3 deletions
|
|
@ -96,6 +96,7 @@ def run_model(repo_id, test_api, in_out_pairs, local_model_hub=None, warm_up=1,
|
||||||
round(np.mean(result[in_out_pair], axis=0)[1]*1000.0, 2),
|
round(np.mean(result[in_out_pair], axis=0)[1]*1000.0, 2),
|
||||||
round(np.mean(result[in_out_pair], axis=0)[2]*1000.0, 2),
|
round(np.mean(result[in_out_pair], axis=0)[2]*1000.0, 2),
|
||||||
in_out_pair,
|
in_out_pair,
|
||||||
|
batch_size,
|
||||||
f'{int(np.mean(result[in_out_pair], axis=0)[3])}' +
|
f'{int(np.mean(result[in_out_pair], axis=0)[3])}' +
|
||||||
f'-{int(np.mean(result[in_out_pair], axis=0)[4])}',
|
f'-{int(np.mean(result[in_out_pair], axis=0)[4])}',
|
||||||
num_beams,
|
num_beams,
|
||||||
|
|
@ -445,8 +446,8 @@ def run_transformer_int4_gpu(repo_id,
|
||||||
csv_writer = csv.writer(file)
|
csv_writer = csv.writer(file)
|
||||||
file.seek(0, os.SEEK_END)
|
file.seek(0, os.SEEK_END)
|
||||||
if file.tell() == 0:
|
if file.tell() == 0:
|
||||||
csv_writer.writerow(["","model","1st token avg latency (ms)","2+ avg latency (ms/token)","encoder time (ms)","input/output tokens","actual input/output tokens","num_beams","low_bit","cpu_embedding","peak mem (GB)"])
|
csv_writer.writerow(["","model","1st token avg latency (ms)","2+ avg latency (ms/token)","encoder time (ms)","input/output tokens", "batch_size", "actual input/output tokens","num_beams","low_bit","cpu_embedding","peak mem (GB)"])
|
||||||
csv_writer.writerow(['', repo_id, first_token_latency, rest_token_latency, encoder_time, input_output_tokens, actual_input_output_tokens, num_beams, low_bit, '', peak_mem])
|
csv_writer.writerow(['', repo_id, first_token_latency, rest_token_latency, encoder_time, input_output_tokens, batch_size, actual_input_output_tokens, num_beams, low_bit, '', peak_mem])
|
||||||
|
|
||||||
model.to('cpu')
|
model.to('cpu')
|
||||||
torch.xpu.synchronize()
|
torch.xpu.synchronize()
|
||||||
|
|
@ -960,7 +961,7 @@ if __name__ == '__main__':
|
||||||
run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
|
run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
|
||||||
conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
|
conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
|
||||||
df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)',
|
df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)',
|
||||||
'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding',
|
'input/output tokens', 'batch_size', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding',
|
||||||
'peak mem (GB)'])
|
'peak mem (GB)'])
|
||||||
df.to_csv(csv_name)
|
df.to_csv(csv_name)
|
||||||
results = []
|
results = []
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue