From 34bb80418986674f8d486e2975867191d91dbf2d Mon Sep 17 00:00:00 2001
From: WeiguangHan <weiguang.han@intel.com>
Date: Thu, 21 Dec 2023 09:54:33 +0800
Subject: [PATCH] LLM: check csv and its corresponding yaml file (#9702)

* LLM: check csv and its corresponding yaml file

* run PR arc perf test

* modify the name of some variables

* execute the check results script in right place

* use cp to replace mv command

* resolve some comments

* resolve more comments

* revert the llm_performance_test.yaml file
---
 .github/workflows/llm_performance_tests.yml | 10 +++--
 python/llm/test/benchmark/check_results.py  | 50 ++++++++++++++-------
 python/llm/test/benchmark/concat_csv.py     |  2 -
 3 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml
index f38ac1d8..6262a952 100644
--- a/.github/workflows/llm_performance_tests.yml
+++ b/.github/workflows/llm_performance_tests.yml
@@ -79,7 +79,7 @@ jobs:
           source /home/arda/intel/oneapi/setvars.sh
           export USE_XETLA=OFF
           export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-          mv python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
           cd python/llm/dev/benchmark/all-in-one
           # hide time info
           sed -i 's/str(end - st)/"xxxxxx"/g' run.py
@@ -88,17 +88,19 @@ jobs:
           python run.py
           # upgrade transformers for model Mistral-7B-v0.1
           python -m pip install transformers==4.34.0
-          mv ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
+          cp ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
           # change csv name
           sed -i 's/test1/test2/g' run.py
           python run.py
           python ../../../test/benchmark/concat_csv.py
-          cp ./*.csv $CSV_SAVE_PATH
           cd ../../../test/benchmark
           python -m pip install pandas==1.5.3
           python csv_to_html.py -f $CSV_SAVE_PATH
           cd ../../dev/benchmark/all-in-one/
-          python ../../../test/benchmark/check_results.py -n 45
+          python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
+          python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
+          find . -name "*test*.csv" -delete
+          cp ./*.csv $CSV_SAVE_PATH
           if [ ${{ github.event.schedule}} ]; then
             curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
           fi
diff --git a/python/llm/test/benchmark/check_results.py b/python/llm/test/benchmark/check_results.py
index 1b5f5255..528c41df 100644
--- a/python/llm/test/benchmark/check_results.py
+++ b/python/llm/test/benchmark/check_results.py
@@ -20,26 +20,46 @@ import os
 import sys
 import argparse
 import pandas as pd
+from omegaconf import OmegaConf
 
 def main():
     parser = argparse.ArgumentParser(description="check if the number of lines in html meets expectation")
-    parser.add_argument("-n", "--expected_lines", type=int, dest="expected_lines",
-                        help="the number of expected html lines", default=0)
-    parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
-                        help="The directory which stores the .csv files", default="./")
+    parser.add_argument("-c", "--csv_file", type=str, dest="csv_name", help="name of csv")
+    parser.add_argument("-y", "--yaml_file", type=str, dest="yaml_name", help="name of yaml")
+    parser.add_argument("-n", "--expected_lines", type=int, dest="expected_lines", help="the number of expected csv lines")
     args = parser.parse_args()
 
-    csv_files = []
-    for file_name in os.listdir(args.folder_path):
-        file_path = os.path.join(args.folder_path, file_name)
-        if os.path.isfile(file_path) and file_name.endswith(".csv"):
-            csv_files.append(file_path)
-    csv_files.sort()
-    
-    number_of_expected_lines=args.expected_lines
-    num_rows = len(pd.read_csv(csv_files[0], index_col=0))
-    if num_rows!=number_of_expected_lines:
-        raise ValueError("The number of arc perf test results does not match the expected value. Please check carefully.")
+    csv_file  = [file for file in os.listdir() if file.endswith('.csv') and args.csv_name in file][0]
+    csv_dataframe = pd.read_csv(csv_file, index_col=0)
+    actual_test_num = len(csv_dataframe)
+    actual_test_cases = []
+    for index, row in csv_dataframe.iterrows():
+        actual_test_cases.append(row['model'] + ":" + row['input/output tokens'].split('-')[0])
+
+    if args.yaml_name:
+        yaml_name = args.yaml_name
+        conf = OmegaConf.load(yaml_name)
+        all_test_cases = []
+        for model in conf.repo_id:
+            for in_out in conf['in_out_pairs']:
+                model_id_input = model + ':' + in_out.split('-')[0]
+                all_test_cases.append(model_id_input)
+        exclude_test_cases = []
+        if 'exclude' in conf and conf['exclude'] is not None:
+            exclude_test_cases = conf['exclude']
+        expected_test_num = len(all_test_cases) - len(exclude_test_cases)
+        if actual_test_num != expected_test_num:
+            print("---------------The test cases should be tested!------------")
+            for test_case in all_test_cases:
+                if test_case not in actual_test_cases and test_case not in exclude_test_cases:
+                    print(test_case)
+            raise ValueError("The above tests failed. Please check the errors in the log.")
+    elif args.expected_lines:
+        expected_test_num = args.expected_lines
+        if actual_test_num != expected_test_num:
+            raise ValueError("Missing some expected test cases! Please check the yaml file and the given expected_lines manually.")
+    else:
+        raise ValueError("You should provide the value of either yaml_name or expected_lines!")
 
 if __name__ == "__main__":
     sys.exit(main())
\ No newline at end of file
diff --git a/python/llm/test/benchmark/concat_csv.py b/python/llm/test/benchmark/concat_csv.py
index d230c21b..cb2280ce 100644
--- a/python/llm/test/benchmark/concat_csv.py
+++ b/python/llm/test/benchmark/concat_csv.py
@@ -41,8 +41,6 @@ def main():
 
     merged_csv=csv_files[0].replace("_test1", "")
     merged_df.to_csv(merged_csv)
-    os.remove(csv_files[0])
-    os.remove(csv_files[1])
 
 if __name__ == "__main__":
     sys.exit(main())