From 9a144655603359e80a5efee6bbedd739f7b9e841 Mon Sep 17 00:00:00 2001 From: WeiguangHan Date: Wed, 3 Jan 2024 18:18:19 +0800 Subject: [PATCH] LLM: add peak diff (#9789) * add peak diff * small fix * revert yml file --- python/llm/test/benchmark/csv_to_html.py | 52 ++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/python/llm/test/benchmark/csv_to_html.py b/python/llm/test/benchmark/csv_to_html.py index b38ffebc..75b48884 100644 --- a/python/llm/test/benchmark/csv_to_html.py +++ b/python/llm/test/benchmark/csv_to_html.py @@ -33,6 +33,19 @@ def highlight_vals(val, max=3.0): def is_diffs_within_normal_range(diff1, diff2, threshold=5.0): return not any(diff < (-threshold) for diff in diff1 + diff2 if isinstance(diff, float)) +def add_to_dict(dict, key, value): + if key not in dict: + dict[key] = [] + dict[key].append(value) + +def best_in_dict(dict, key, value): + if key in dict: + best_value=min(dict[key]) + if best_value < value: + return best_value + return value + return value + def main(): parser = argparse.ArgumentParser(description="convert .csv file to .html file") parser.add_argument("-f", "--folder_path", type=str, dest="folder_path", @@ -68,9 +81,25 @@ def main(): last2=['']*len(latest_csv.index) diff2=['']*len(latest_csv.index) + best_last1=['']*len(latest_csv.index) + best_diff1=['']*len(latest_csv.index) + best_last2=['']*len(latest_csv.index) + best_diff2=['']*len(latest_csv.index) + latency_1st_token='1st token avg latency (ms)' latency_2_avg='2+ avg latency (ms/token)' + csv_dict = {} + for csv_file in csv_files: + current_csv = pd.read_csv(csv_file, index_col=0) + for current_csv_ind,current_csv_row in current_csv.iterrows(): + current_csv_model=current_csv_row['model'].strip() + current_csv_input_output_pairs=current_csv_row['input/output tokens'].strip() + current_csv_model_input_1st=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'1st' + current_csv_model_input_2nd=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'2nd' + add_to_dict(csv_dict, current_csv_model_input_1st, current_csv_row[latency_1st_token]) + add_to_dict(csv_dict, current_csv_model_input_2nd, current_csv_row[latency_2_avg]) + for latest_csv_ind,latest_csv_row in latest_csv.iterrows(): latest_csv_model=latest_csv_row['model'].strip() @@ -78,6 +107,17 @@ def main(): latest_1st_token_latency=latest_csv_row[latency_1st_token] latest_2_avg_latency=latest_csv_row[latency_2_avg] + key1=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'1st' + key2=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'2nd' + + best_last1_value=best_in_dict(csv_dict, key1, latest_1st_token_latency) + best_last2_value=best_in_dict(csv_dict, key2, latest_2_avg_latency) + + best_last1[latest_csv_ind]=best_last1_value + best_diff1[latest_csv_ind]=round((best_last1_value-latest_1st_token_latency)*100/best_last1_value,2) + best_last2[latest_csv_ind]=best_last2_value + best_diff2[latest_csv_ind]=round((best_last2_value-latest_2_avg_latency)*100/best_last2_value,2) + in_previous_flag=False for previous_csv_ind,previous_csv_row in previous_csv.iterrows(): @@ -106,11 +146,17 @@ def main(): latest_csv.insert(loc=5,column='last2',value=last2) latest_csv.insert(loc=6,column='diff2(%)',value=diff2) - diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, highlight_threshold) + latest_csv.insert(loc=7,column='best 1',value=best_last1) + latest_csv.insert(loc=8,column='best diff1(%)',value=best_diff1) + latest_csv.insert(loc=9,column='best 2',value=best_last2) + latest_csv.insert(loc=10,column='best diff2(%)',value=best_diff2) - subset=['diff1(%)','diff2(%)'] + diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, threshold=highlight_threshold) + + subset=['diff1(%)','diff2(%)','best diff1(%)','best diff2(%)'] columns={'1st token avg latency (ms)': '{:.2f}', '2+ avg latency (ms/token)': '{:.2f}', 'last1': '{:.2f}', 'diff1(%)': '{:.2f}', - 'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}'} + 'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}', + 'best 1': '{:.2f}', 'best diff1(%)': '{:.2f}', 'best 2': '{:.2f}', 'best diff2(%)': '{:.2f}'} with open(daily_html, 'w') as f: f.write(latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=highlight_threshold), subset)