diff --git a/python/llm/test/benchmark/csv_to_html.py b/python/llm/test/benchmark/csv_to_html.py
index b38ffebc..75b48884 100644
--- a/python/llm/test/benchmark/csv_to_html.py
+++ b/python/llm/test/benchmark/csv_to_html.py
@@ -33,6 +33,19 @@ def highlight_vals(val, max=3.0):
def is_diffs_within_normal_range(diff1, diff2, threshold=5.0):
return not any(diff < (-threshold) for diff in diff1 + diff2 if isinstance(diff, float))
+def add_to_dict(dict, key, value):
+ if key not in dict:
+ dict[key] = []
+ dict[key].append(value)
+
+def best_in_dict(dict, key, value):
+ if key in dict:
+ best_value=min(dict[key])
+ if best_value < value:
+ return best_value
+ return value
+ return value
+
def main():
parser = argparse.ArgumentParser(description="convert .csv file to .html file")
parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
@@ -68,9 +81,25 @@ def main():
last2=['']*len(latest_csv.index)
diff2=['']*len(latest_csv.index)
+ best_last1=['']*len(latest_csv.index)
+ best_diff1=['']*len(latest_csv.index)
+ best_last2=['']*len(latest_csv.index)
+ best_diff2=['']*len(latest_csv.index)
+
latency_1st_token='1st token avg latency (ms)'
latency_2_avg='2+ avg latency (ms/token)'
+ csv_dict = {}
+ for csv_file in csv_files:
+ current_csv = pd.read_csv(csv_file, index_col=0)
+ for current_csv_ind,current_csv_row in current_csv.iterrows():
+ current_csv_model=current_csv_row['model'].strip()
+ current_csv_input_output_pairs=current_csv_row['input/output tokens'].strip()
+ current_csv_model_input_1st=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'1st'
+ current_csv_model_input_2nd=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'2nd'
+ add_to_dict(csv_dict, current_csv_model_input_1st, current_csv_row[latency_1st_token])
+ add_to_dict(csv_dict, current_csv_model_input_2nd, current_csv_row[latency_2_avg])
+
for latest_csv_ind,latest_csv_row in latest_csv.iterrows():
latest_csv_model=latest_csv_row['model'].strip()
@@ -78,6 +107,17 @@ def main():
latest_1st_token_latency=latest_csv_row[latency_1st_token]
latest_2_avg_latency=latest_csv_row[latency_2_avg]
+ key1=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'1st'
+ key2=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'2nd'
+
+ best_last1_value=best_in_dict(csv_dict, key1, latest_1st_token_latency)
+ best_last2_value=best_in_dict(csv_dict, key2, latest_2_avg_latency)
+
+ best_last1[latest_csv_ind]=best_last1_value
+ best_diff1[latest_csv_ind]=round((best_last1_value-latest_1st_token_latency)*100/best_last1_value,2)
+ best_last2[latest_csv_ind]=best_last2_value
+ best_diff2[latest_csv_ind]=round((best_last2_value-latest_2_avg_latency)*100/best_last2_value,2)
+
in_previous_flag=False
for previous_csv_ind,previous_csv_row in previous_csv.iterrows():
@@ -106,11 +146,17 @@ def main():
latest_csv.insert(loc=5,column='last2',value=last2)
latest_csv.insert(loc=6,column='diff2(%)',value=diff2)
- diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, highlight_threshold)
+ latest_csv.insert(loc=7,column='best 1',value=best_last1)
+ latest_csv.insert(loc=8,column='best diff1(%)',value=best_diff1)
+ latest_csv.insert(loc=9,column='best 2',value=best_last2)
+ latest_csv.insert(loc=10,column='best diff2(%)',value=best_diff2)
- subset=['diff1(%)','diff2(%)']
+ diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, threshold=highlight_threshold)
+
+ subset=['diff1(%)','diff2(%)','best diff1(%)','best diff2(%)']
columns={'1st token avg latency (ms)': '{:.2f}', '2+ avg latency (ms/token)': '{:.2f}', 'last1': '{:.2f}', 'diff1(%)': '{:.2f}',
- 'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}'}
+ 'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}',
+ 'best 1': '{:.2f}', 'best diff1(%)': '{:.2f}', 'best 2': '{:.2f}', 'best diff2(%)': '{:.2f}'}
with open(daily_html, 'w') as f:
f.write(latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=highlight_threshold), subset)