diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 171fb9cf..59667276 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -84,7 +84,7 @@ jobs: source /opt/intel/oneapi/setvars.sh bash python/llm/test/run-llm-install-tests.sh - - name: Test on xpu + - name: Test on xpu(transformers==4.31.0) shell: bash run: | date_for_test_version=$(date -d yesterday +%Y-%m-%d) @@ -100,12 +100,25 @@ jobs: # change csv name sed -i 's/{today}/{today}_test1/g' run.py python run.py + + - name: Test on xpu(transformers==4.34.0) + shell: bash + run: | + source /opt/intel/oneapi/setvars.sh + export USE_XETLA=OFF + export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 # upgrade transformers for model Mistral-7B-v0.1 python -m pip install transformers==4.34.0 - cp ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml + cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml + cd python/llm/dev/benchmark/all-in-one # change csv name sed -i 's/test1/test2/g' run.py python run.py + + - name: Concat csv and generate html + shell: bash + run: | + cd python/llm/dev/benchmark/all-in-one python ../../../test/benchmark/concat_csv.py for file in *.csv; do if [[ $file != *test* ]]; then @@ -115,7 +128,11 @@ jobs: python -m pip install pandas==1.5.3 cd ../../../test/benchmark python csv_to_html.py -f $CSV_SAVE_PATH - cd ../../dev/benchmark/all-in-one/ + + - name: Check and upload results to ftp + shell: bash + run: | + cd python/llm/dev/benchmark/all-in-one python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml find . -name "*test*.csv" -delete diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 47391489..993a4fe0 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -21,6 +21,7 @@ import time import gc import traceback import threading +import csv import numpy as np from datetime import date @@ -432,6 +433,21 @@ def run_transformer_int4_gpu(repo_id, thread = threading.Thread(target=run_model_in_thread, args=(model, in_out, tokenizer, result, warm_up, num_beams, input_ids, out_len, actual_in_len, num_trials)) thread.start() thread.join() + + if result[in_out]: + first_token_latency = round(np.mean(result[in_out], axis=0)[0]*1000.0, 2) + rest_token_latency = round(np.mean(result[in_out], axis=0)[1]*1000.0, 2) + encoder_time = round(np.mean(result[in_out], axis=0)[2]*1000.0, 2) + input_output_tokens = in_out + actual_input_output_tokens = f'{int(np.mean(result[in_out], axis=0)[3])}' + f'-{int(np.mean(result[in_out], axis=0)[4])}' + peak_mem = result[in_out][-1][5] + with open(csv_name, mode='a', newline='') as file: + csv_writer = csv.writer(file) + file.seek(0, os.SEEK_END) + if file.tell() == 0: + csv_writer.writerow(["","model","1st token avg latency (ms)","2+ avg latency (ms/token)","encoder time (ms)","input/output tokens","actual input/output tokens","num_beams","low_bit","cpu_embedding","peak mem (GB)"]) + csv_writer.writerow(['', repo_id, first_token_latency, rest_token_latency, encoder_time, input_output_tokens, actual_input_output_tokens, num_beams, low_bit, '', peak_mem]) + model.to('cpu') torch.xpu.synchronize() torch.xpu.empty_cache() @@ -439,7 +455,6 @@ def run_transformer_int4_gpu(repo_id, gc.collect() return result - def run_optimize_model_gpu(repo_id, local_model_hub, in_out_pairs, @@ -933,6 +948,8 @@ if __name__ == '__main__': import pandas as pd for api in conf.test_api: + global csv_name + csv_name = f'{current_dir}/{api}-results-{today}.csv' for model in conf.repo_id: in_out_pairs = conf['in_out_pairs'].copy() if excludes: @@ -943,8 +960,7 @@ if __name__ == '__main__': run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'], conf['low_bit'], conf['cpu_embedding'], conf['batch_size']) df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)', - 'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding', + 'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding', 'peak mem (GB)']) - - df.to_csv(f'{current_dir}/{api}-results-{today}.csv') + df.to_csv(csv_name) results = [] diff --git a/python/llm/test/benchmark/concat_csv.py b/python/llm/test/benchmark/concat_csv.py index cb2280ce..f2a712f3 100644 --- a/python/llm/test/benchmark/concat_csv.py +++ b/python/llm/test/benchmark/concat_csv.py @@ -34,12 +34,9 @@ def main(): csv_files.append(file_path) csv_files.sort() - df1 = pd.read_csv(csv_files[0], index_col=0) - df2 = pd.read_csv(csv_files[1], index_col=0) - merged_df = pd.concat([df1, df2], ignore_index=True) + merged_df = pd.concat([pd.read_csv(file, index_col=0) for file in csv_files], ignore_index=True) merged_df.reset_index(drop=True, inplace=True) - - merged_csv=csv_files[0].replace("_test1", "") + merged_csv = csv_files[0].replace("_test1", "").replace("_test2", "") merged_df.to_csv(merged_csv) if __name__ == "__main__":