LLM: add peak diff (#9789)

* add peak diff * small fix * revert yml file
2024-01-03 18:18:19 +08:00 · 2024-01-03 18:18:19 +08:00 · 9a14465560
commit 9a14465560
parent f4eb5da42d
1 changed files with 49 additions and 3 deletions
--- a/python/llm/test/benchmark/csv_to_html.py
+++ b/python/llm/test/benchmark/csv_to_html.py
@ -33,6 +33,19 @@ def highlight_vals(val, max=3.0):
 def is_diffs_within_normal_range(diff1, diff2, threshold=5.0):
    return not any(diff < (-threshold) for diff in diff1 + diff2 if isinstance(diff, float))

+def add_to_dict(dict, key, value):
+    if key not in dict:
+        dict[key] = []
+    dict[key].append(value)
+
+def best_in_dict(dict, key, value):
+    if key in dict:
+        best_value=min(dict[key])
+        if best_value < value:
+            return best_value
+        return value
+    return value
+
 def main():
    parser = argparse.ArgumentParser(description="convert .csv file to .html file")
    parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
@ -68,9 +81,25 @@ def main():
        last2=['']*len(latest_csv.index)
        diff2=['']*len(latest_csv.index)

+        best_last1=['']*len(latest_csv.index)
+        best_diff1=['']*len(latest_csv.index)
+        best_last2=['']*len(latest_csv.index)
+        best_diff2=['']*len(latest_csv.index)
+
        latency_1st_token='1st token avg latency (ms)'
        latency_2_avg='2+ avg latency (ms/token)'

+        csv_dict = {}
+        for csv_file in csv_files:
+            current_csv = pd.read_csv(csv_file, index_col=0)
+            for current_csv_ind,current_csv_row in current_csv.iterrows():
+                current_csv_model=current_csv_row['model'].strip()
+                current_csv_input_output_pairs=current_csv_row['input/output tokens'].strip()
+                current_csv_model_input_1st=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'1st'
+                current_csv_model_input_2nd=current_csv_model+'-'+current_csv_input_output_pairs+'-'+'2nd'
+                add_to_dict(csv_dict, current_csv_model_input_1st, current_csv_row[latency_1st_token])
+                add_to_dict(csv_dict, current_csv_model_input_2nd, current_csv_row[latency_2_avg])
+
        for latest_csv_ind,latest_csv_row in latest_csv.iterrows():

            latest_csv_model=latest_csv_row['model'].strip()
@ -78,6 +107,17 @@ def main():
            latest_1st_token_latency=latest_csv_row[latency_1st_token]
            latest_2_avg_latency=latest_csv_row[latency_2_avg]

+            key1=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'1st'
+            key2=latest_csv_model+'-'+latest_csv_input_output_pairs+'-'+'2nd'
+
+            best_last1_value=best_in_dict(csv_dict, key1, latest_1st_token_latency)
+            best_last2_value=best_in_dict(csv_dict, key2, latest_2_avg_latency)
+
+            best_last1[latest_csv_ind]=best_last1_value
+            best_diff1[latest_csv_ind]=round((best_last1_value-latest_1st_token_latency)*100/best_last1_value,2)
+            best_last2[latest_csv_ind]=best_last2_value
+            best_diff2[latest_csv_ind]=round((best_last2_value-latest_2_avg_latency)*100/best_last2_value,2)
+
            in_previous_flag=False

            for previous_csv_ind,previous_csv_row in previous_csv.iterrows():
@ -106,11 +146,17 @@ def main():
        latest_csv.insert(loc=5,column='last2',value=last2)
        latest_csv.insert(loc=6,column='diff2(%)',value=diff2)

-        diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, highlight_threshold)
+        latest_csv.insert(loc=7,column='best 1',value=best_last1)
+        latest_csv.insert(loc=8,column='best diff1(%)',value=best_diff1)
+        latest_csv.insert(loc=9,column='best 2',value=best_last2)
+        latest_csv.insert(loc=10,column='best diff2(%)',value=best_diff2)

-        subset=['diff1(%)','diff2(%)']
+        diffs_within_normal_range = is_diffs_within_normal_range(diff1, diff2, threshold=highlight_threshold)
+
+        subset=['diff1(%)','diff2(%)','best diff1(%)','best diff2(%)']
        columns={'1st token avg latency (ms)': '{:.2f}', '2+ avg latency (ms/token)': '{:.2f}', 'last1': '{:.2f}', 'diff1(%)': '{:.2f}',
-                'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}'}
+                'last2': '{:.2f}', 'diff2(%)': '{:.2f}', 'encoder time (ms)': '{:.2f}', 'peak mem (GB)': '{:.2f}',
+                'best 1': '{:.2f}', 'best diff1(%)': '{:.2f}', 'best 2': '{:.2f}', 'best diff2(%)': '{:.2f}'}

        with open(daily_html, 'w') as f:
            f.write(latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=highlight_threshold), subset)