LLM: check csv and its corresponding yaml file (#9702)

* LLM: check csv and its corresponding yaml file

* run PR arc perf test

* modify the name of some variables

* execute the check results script in right place

* use cp to replace mv command

* resolve some comments

* resolve more comments

* revert the llm_performance_test.yaml file
This commit is contained in:
WeiguangHan 2023-12-21 09:54:33 +08:00 committed by GitHub
parent 4bda975a3e
commit 34bb804189
3 changed files with 41 additions and 21 deletions

View file

@ -79,7 +79,7 @@ jobs:
source /home/arda/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
mv python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cp python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
# hide time info
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
@ -88,17 +88,19 @@ jobs:
python run.py
# upgrade transformers for model Mistral-7B-v0.1
python -m pip install transformers==4.34.0
mv ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
cp ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
# change csv name
sed -i 's/test1/test2/g' run.py
python run.py
python ../../../test/benchmark/concat_csv.py
cp ./*.csv $CSV_SAVE_PATH
cd ../../../test/benchmark
python -m pip install pandas==1.5.3
python csv_to_html.py -f $CSV_SAVE_PATH
cd ../../dev/benchmark/all-in-one/
python ../../../test/benchmark/check_results.py -n 45
python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
find . -name "*test*.csv" -delete
cp ./*.csv $CSV_SAVE_PATH
if [ ${{ github.event.schedule}} ]; then
curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
fi

View file

@ -20,26 +20,46 @@ import os
import sys
import argparse
import pandas as pd
from omegaconf import OmegaConf
def main():
parser = argparse.ArgumentParser(description="check if the number of lines in html meets expectation")
parser.add_argument("-n", "--expected_lines", type=int, dest="expected_lines",
help="the number of expected html lines", default=0)
parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
help="The directory which stores the .csv files", default="./")
parser.add_argument("-c", "--csv_file", type=str, dest="csv_name", help="name of csv")
parser.add_argument("-y", "--yaml_file", type=str, dest="yaml_name", help="name of yaml")
parser.add_argument("-n", "--expected_lines", type=int, dest="expected_lines", help="the number of expected csv lines")
args = parser.parse_args()
csv_files = []
for file_name in os.listdir(args.folder_path):
file_path = os.path.join(args.folder_path, file_name)
if os.path.isfile(file_path) and file_name.endswith(".csv"):
csv_files.append(file_path)
csv_files.sort()
number_of_expected_lines=args.expected_lines
num_rows = len(pd.read_csv(csv_files[0], index_col=0))
if num_rows!=number_of_expected_lines:
raise ValueError("The number of arc perf test results does not match the expected value. Please check carefully.")
csv_file = [file for file in os.listdir() if file.endswith('.csv') and args.csv_name in file][0]
csv_dataframe = pd.read_csv(csv_file, index_col=0)
actual_test_num = len(csv_dataframe)
actual_test_cases = []
for index, row in csv_dataframe.iterrows():
actual_test_cases.append(row['model'] + ":" + row['input/output tokens'].split('-')[0])
if args.yaml_name:
yaml_name = args.yaml_name
conf = OmegaConf.load(yaml_name)
all_test_cases = []
for model in conf.repo_id:
for in_out in conf['in_out_pairs']:
model_id_input = model + ':' + in_out.split('-')[0]
all_test_cases.append(model_id_input)
exclude_test_cases = []
if 'exclude' in conf and conf['exclude'] is not None:
exclude_test_cases = conf['exclude']
expected_test_num = len(all_test_cases) - len(exclude_test_cases)
if actual_test_num != expected_test_num:
print("---------------The test cases should be tested!------------")
for test_case in all_test_cases:
if test_case not in actual_test_cases and test_case not in exclude_test_cases:
print(test_case)
raise ValueError("The above tests failed. Please check the errors in the log.")
elif args.expected_lines:
expected_test_num = args.expected_lines
if actual_test_num != expected_test_num:
raise ValueError("Missing some expected test cases! Please check the yaml file and the given expected_lines manually.")
else:
raise ValueError("You should provide the value of either yaml_name or expected_lines!")
if __name__ == "__main__":
sys.exit(main())

View file

@ -41,8 +41,6 @@ def main():
merged_csv=csv_files[0].replace("_test1", "")
merged_df.to_csv(merged_csv)
os.remove(csv_files[0])
os.remove(csv_files[1])
if __name__ == "__main__":
sys.exit(main())