LLM: check csv and its corresponding yaml file (#9702)

* LLM: check csv and its corresponding yaml file * run PR arc perf test * modify the name of some variables * execute the check results script in right place * use cp to replace mv command * resolve some comments * resolve more comments * revert the llm_performance_test.yaml file
2023-12-21 09:54:33 +08:00 · 2023-12-21 09:54:33 +08:00 · 34bb804189
commit 34bb804189
parent 4bda975a3e
3 changed files with 41 additions and 21 deletions
--- a/.github/workflows/llm_performance_tests.yml
+++ b/.github/workflows/llm_performance_tests.yml
@ -79,7 +79,7 @@ jobs:
          source /home/arda/intel/oneapi/setvars.sh
          export USE_XETLA=OFF
          export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-          mv python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
+          cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
          cd python/llm/dev/benchmark/all-in-one
          # hide time info
          sed -i 's/str(end - st)/"xxxxxx"/g' run.py
@ -88,17 +88,19 @@ jobs:
          python run.py
          # upgrade transformers for model Mistral-7B-v0.1
          python -m pip install transformers==4.34.0
-          mv ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
+          cp ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
          # change csv name
          sed -i 's/test1/test2/g' run.py
          python run.py
          python ../../../test/benchmark/concat_csv.py
-          cp ./*.csv $CSV_SAVE_PATH
          cd ../../../test/benchmark
          python -m pip install pandas==1.5.3
          python csv_to_html.py -f $CSV_SAVE_PATH
          cd ../../dev/benchmark/all-in-one/
-          python ../../../test/benchmark/check_results.py -n 45
+          python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
+          python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
+          find . -name "*test*.csv" -delete
+          cp ./*.csv $CSV_SAVE_PATH
          if [ ${{ github.event.schedule}} ]; then
            curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
          fi
--- a/python/llm/test/benchmark/check_results.py
+++ b/python/llm/test/benchmark/check_results.py
@ -20,26 +20,46 @@ import os
 import sys
 import argparse
 import pandas as pd
+from omegaconf import OmegaConf

 def main():
    parser = argparse.ArgumentParser(description="check if the number of lines in html meets expectation")
-    parser.add_argument("-n", "--expected_lines", type=int, dest="expected_lines",
-                        help="the number of expected html lines", default=0)
-    parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
-                        help="The directory which stores the .csv files", default="./")
+    parser.add_argument("-c", "--csv_file", type=str, dest="csv_name", help="name of csv")
+    parser.add_argument("-y", "--yaml_file", type=str, dest="yaml_name", help="name of yaml")
+    parser.add_argument("-n", "--expected_lines", type=int, dest="expected_lines", help="the number of expected csv lines")
    args = parser.parse_args()

-    csv_files = []
-    for file_name in os.listdir(args.folder_path):
-        file_path = os.path.join(args.folder_path, file_name)
-        if os.path.isfile(file_path) and file_name.endswith(".csv"):
-            csv_files.append(file_path)
-    csv_files.sort()
-    
-    number_of_expected_lines=args.expected_lines
-    num_rows = len(pd.read_csv(csv_files[0], index_col=0))
-    if num_rows!=number_of_expected_lines:
-        raise ValueError("The number of arc perf test results does not match the expected value. Please check carefully.")
+    csv_file  = [file for file in os.listdir() if file.endswith('.csv') and args.csv_name in file][0]
+    csv_dataframe = pd.read_csv(csv_file, index_col=0)
+    actual_test_num = len(csv_dataframe)
+    actual_test_cases = []
+    for index, row in csv_dataframe.iterrows():
+        actual_test_cases.append(row['model'] + ":" + row['input/output tokens'].split('-')[0])
+
+    if args.yaml_name:
+        yaml_name = args.yaml_name
+        conf = OmegaConf.load(yaml_name)
+        all_test_cases = []
+        for model in conf.repo_id:
+            for in_out in conf['in_out_pairs']:
+                model_id_input = model + ':' + in_out.split('-')[0]
+                all_test_cases.append(model_id_input)
+        exclude_test_cases = []
+        if 'exclude' in conf and conf['exclude'] is not None:
+            exclude_test_cases = conf['exclude']
+        expected_test_num = len(all_test_cases) - len(exclude_test_cases)
+        if actual_test_num != expected_test_num:
+            print("---------------The test cases should be tested!------------")
+            for test_case in all_test_cases:
+                if test_case not in actual_test_cases and test_case not in exclude_test_cases:
+                    print(test_case)
+            raise ValueError("The above tests failed. Please check the errors in the log.")
+    elif args.expected_lines:
+        expected_test_num = args.expected_lines
+        if actual_test_num != expected_test_num:
+            raise ValueError("Missing some expected test cases! Please check the yaml file and the given expected_lines manually.")
+    else:
+        raise ValueError("You should provide the value of either yaml_name or expected_lines!")

 if __name__ == "__main__":
    sys.exit(main())
--- a/python/llm/test/benchmark/concat_csv.py
+++ b/python/llm/test/benchmark/concat_csv.py
@ -41,8 +41,6 @@ def main():

    merged_csv=csv_files[0].replace("_test1", "")
    merged_df.to_csv(merged_csv)
-    os.remove(csv_files[0])
-    os.remove(csv_files[1])

 if __name__ == "__main__":
    sys.exit(main())