LLM: ensure the result of daily arc perf test (#10016)

* ensure the result of daily arc perf test

* small fix

* small fix

* small fix

* small fix

* small fix

* small fix

* small fix

* small fix

* small fix

* small fix

* concat more csvs

* small fix

* revert some files
This commit is contained in:
WeiguangHan 2024-01-31 18:26:21 +08:00 committed by GitHub
parent 9724939499
commit d2d3f6b091
3 changed files with 42 additions and 12 deletions

View file

@ -84,7 +84,7 @@ jobs:
source /opt/intel/oneapi/setvars.sh source /opt/intel/oneapi/setvars.sh
bash python/llm/test/run-llm-install-tests.sh bash python/llm/test/run-llm-install-tests.sh
- name: Test on xpu - name: Test on xpu(transformers==4.31.0)
shell: bash shell: bash
run: | run: |
date_for_test_version=$(date -d yesterday +%Y-%m-%d) date_for_test_version=$(date -d yesterday +%Y-%m-%d)
@ -100,12 +100,25 @@ jobs:
# change csv name # change csv name
sed -i 's/{today}/{today}_test1/g' run.py sed -i 's/{today}/{today}_test1/g' run.py
python run.py python run.py
- name: Test on xpu(transformers==4.34.0)
shell: bash
run: |
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
# upgrade transformers for model Mistral-7B-v0.1 # upgrade transformers for model Mistral-7B-v0.1
python -m pip install transformers==4.34.0 python -m pip install transformers==4.34.0
cp ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml
cd python/llm/dev/benchmark/all-in-one
# change csv name # change csv name
sed -i 's/test1/test2/g' run.py sed -i 's/test1/test2/g' run.py
python run.py python run.py
- name: Concat csv and generate html
shell: bash
run: |
cd python/llm/dev/benchmark/all-in-one
python ../../../test/benchmark/concat_csv.py python ../../../test/benchmark/concat_csv.py
for file in *.csv; do for file in *.csv; do
if [[ $file != *test* ]]; then if [[ $file != *test* ]]; then
@ -115,7 +128,11 @@ jobs:
python -m pip install pandas==1.5.3 python -m pip install pandas==1.5.3
cd ../../../test/benchmark cd ../../../test/benchmark
python csv_to_html.py -f $CSV_SAVE_PATH python csv_to_html.py -f $CSV_SAVE_PATH
cd ../../dev/benchmark/all-in-one/
- name: Check and upload results to ftp
shell: bash
run: |
cd python/llm/dev/benchmark/all-in-one
python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
find . -name "*test*.csv" -delete find . -name "*test*.csv" -delete

View file

@ -21,6 +21,7 @@ import time
import gc import gc
import traceback import traceback
import threading import threading
import csv
import numpy as np import numpy as np
from datetime import date from datetime import date
@ -432,6 +433,21 @@ def run_transformer_int4_gpu(repo_id,
thread = threading.Thread(target=run_model_in_thread, args=(model, in_out, tokenizer, result, warm_up, num_beams, input_ids, out_len, actual_in_len, num_trials)) thread = threading.Thread(target=run_model_in_thread, args=(model, in_out, tokenizer, result, warm_up, num_beams, input_ids, out_len, actual_in_len, num_trials))
thread.start() thread.start()
thread.join() thread.join()
if result[in_out]:
first_token_latency = round(np.mean(result[in_out], axis=0)[0]*1000.0, 2)
rest_token_latency = round(np.mean(result[in_out], axis=0)[1]*1000.0, 2)
encoder_time = round(np.mean(result[in_out], axis=0)[2]*1000.0, 2)
input_output_tokens = in_out
actual_input_output_tokens = f'{int(np.mean(result[in_out], axis=0)[3])}' + f'-{int(np.mean(result[in_out], axis=0)[4])}'
peak_mem = result[in_out][-1][5]
with open(csv_name, mode='a', newline='') as file:
csv_writer = csv.writer(file)
file.seek(0, os.SEEK_END)
if file.tell() == 0:
csv_writer.writerow(["","model","1st token avg latency (ms)","2+ avg latency (ms/token)","encoder time (ms)","input/output tokens","actual input/output tokens","num_beams","low_bit","cpu_embedding","peak mem (GB)"])
csv_writer.writerow(['', repo_id, first_token_latency, rest_token_latency, encoder_time, input_output_tokens, actual_input_output_tokens, num_beams, low_bit, '', peak_mem])
model.to('cpu') model.to('cpu')
torch.xpu.synchronize() torch.xpu.synchronize()
torch.xpu.empty_cache() torch.xpu.empty_cache()
@ -439,7 +455,6 @@ def run_transformer_int4_gpu(repo_id,
gc.collect() gc.collect()
return result return result
def run_optimize_model_gpu(repo_id, def run_optimize_model_gpu(repo_id,
local_model_hub, local_model_hub,
in_out_pairs, in_out_pairs,
@ -933,6 +948,8 @@ if __name__ == '__main__':
import pandas as pd import pandas as pd
for api in conf.test_api: for api in conf.test_api:
global csv_name
csv_name = f'{current_dir}/{api}-results-{today}.csv'
for model in conf.repo_id: for model in conf.repo_id:
in_out_pairs = conf['in_out_pairs'].copy() in_out_pairs = conf['in_out_pairs'].copy()
if excludes: if excludes:
@ -943,8 +960,7 @@ if __name__ == '__main__':
run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'], run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
conf['low_bit'], conf['cpu_embedding'], conf['batch_size']) conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)', df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)',
'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding', 'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding',
'peak mem (GB)']) 'peak mem (GB)'])
df.to_csv(csv_name)
df.to_csv(f'{current_dir}/{api}-results-{today}.csv')
results = [] results = []

View file

@ -34,12 +34,9 @@ def main():
csv_files.append(file_path) csv_files.append(file_path)
csv_files.sort() csv_files.sort()
df1 = pd.read_csv(csv_files[0], index_col=0) merged_df = pd.concat([pd.read_csv(file, index_col=0) for file in csv_files], ignore_index=True)
df2 = pd.read_csv(csv_files[1], index_col=0)
merged_df = pd.concat([df1, df2], ignore_index=True)
merged_df.reset_index(drop=True, inplace=True) merged_df.reset_index(drop=True, inplace=True)
merged_csv = csv_files[0].replace("_test1", "").replace("_test2", "")
merged_csv=csv_files[0].replace("_test1", "")
merged_df.to_csv(merged_csv) merged_df.to_csv(merged_csv)
if __name__ == "__main__": if __name__ == "__main__":