From 9a144655603359e80a5efee6bbedd739f7b9e841 Mon Sep 17 00:00:00 2001
From: WeiguangHan <weiguang.han@intel.com>
Date: Wed, 3 Jan 2024 18:18:19 +0800
Subject: [PATCH] LLM: add peak diff (#9789)

* add peak diff

* small fix

* revert yml file
---
 python/llm/test/benchmark/csv_to_html.py | 52 ++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/python/llm/test/benchmark/csv_to_html.py b/python/llm/test/benchmark/csv_to_html.py
index b38ffebc..75b48884 100644
--- a/python/llm/test/benchmark/csv_to_html.py
+++ b/python/llm/test/benchmark/csv_to_html.py
@@ -33,6 +33,19 @@ def highlight_vals(val, max=3.0):
 def is_diffs_within_normal_range(diff1, diff2, threshold=5.0):
     return not any(diff < (-threshold) for diff in diff1 + diff2 if isinstance(diff, float))
 
+def add_to_dict(dict, key, value):
+    if key not in dict:
+        dict[key] = []
+    dict[key].append(value)
+
+def best_in_dict(dict, key, value):
+    if key in dict:
+        best_value=min(dict[key])
+        if best_value < value:
+            return best_value
+        return value
+    return value
+
 def main():
     parser = argparse.ArgumentParser(description="convert .csv file to .html file")
     parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
@@ -68,9 +81,25 @@ def main():
         last2=['']*len(latest_csv.index)
         diff2=['']*len(latest_csv.index)
 
+        best_last1=['']*len(latest_csv.index)
+        best_diff1=['']*len(latest_csv.index)
+        best_last2=['']*len(latest_csv.index)
+        best_diff2=['']*len(latest_csv.index)
+
         latency_1st_token='1st token avg latency (ms)'
         latency_2_avg='2+ avg latency (ms/token)'
 
+        csv_dict = {}
+        for csv_file in csv_files:
+            current_csv = pd.read_csv(csv_file, index_col=0)
+            for current_csv_ind,current_csv_row in current_csv.iterrows():
+                current_csv_model=current_csv_row['model'].strip()
+                current_csv_input_output_pairs=current_csv_row['input/output tokens'].strip()
+                current_csv_model_input_1st=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'1st'
+                current_csv_model_input_2nd=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'2nd'
+                add_to_dict(csv_dict, current_csv_model_input_1st, current_csv_row[latency_1st_token])
+                add_to_dict(csv_dict, current_csv_model_input_2nd, current_csv_row[latency_2_avg])
+
         for latest_csv_ind,latest_csv_row in latest_csv.iterrows():
 
             latest_csv_model=latest_csv_row['model'].strip()
@@ -78,6 +107,17 @@ def main():
             latest_1st_token_latency=latest_csv_row[latency_1st_token]
             latest_2_avg_latency=latest_csv_row[latency_2_avg]
 
+            key1=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'1st'
+            key2=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'2nd'
+
+            best_last1_value=best_in_dict(csv_dict, key1, latest_1st_token_latency)
+            best_last2_value=best_in_dict(csv_dict, key2, latest_2_avg_latency)
+
+            best_last1[latest_csv_ind]=best_last1_value
+            best_diff1[latest_csv_ind]=round((best_last1_value-latest_1st_token_latency)*100/best_last1_value,2)
+            best_last2[latest_csv_ind]=best_last2_value
+            best_diff2[latest_csv_ind]=round((best_last2_value-latest_2_avg_latency)*100/best_last2_value,2)
+
             in_previous_flag=False
 
             for previous_csv_ind,previous_csv_row in previous_csv.iterrows():
@@ -106,11 +146,17 @@ def main():
         latest_csv.insert(loc=5,column='last2',value=last2)
         latest_csv.insert(loc=6,column='diff2(%)',value=diff2)
 
-        diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, highlight_threshold)
+        latest_csv.insert(loc=7,column='best 1',value=best_last1)
+        latest_csv.insert(loc=8,column='best diff1(%)',value=best_diff1)
+        latest_csv.insert(loc=9,column='best 2',value=best_last2)
+        latest_csv.insert(loc=10,column='best diff2(%)',value=best_diff2)
 
-        subset=['diff1(%)','diff2(%)']
+        diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, threshold=highlight_threshold)
+
+        subset=['diff1(%)','diff2(%)','best diff1(%)','best diff2(%)']
         columns={'1st token avg latency (ms)': '{:.2f}', '2+ avg latency (ms/token)': '{:.2f}', 'last1': '{:.2f}', 'diff1(%)': '{:.2f}',
-                'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}'}
+                'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}',
+                'best 1': '{:.2f}', 'best diff1(%)': '{:.2f}', 'best 2': '{:.2f}', 'best diff2(%)': '{:.2f}'}
 
         with open(daily_html, 'w') as f:
             f.write(latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=highlight_threshold), subset)