LLM: ensure the result of daily arc perf test (#10016)
* ensure the result of daily arc perf test * small fix * small fix * small fix * small fix * small fix * small fix * small fix * small fix * small fix * small fix * concat more csvs * small fix * revert some files
This commit is contained in:
parent
9724939499
commit
d2d3f6b091
3 changed files with 42 additions and 12 deletions
23
.github/workflows/llm_performance_tests.yml
vendored
23
.github/workflows/llm_performance_tests.yml
vendored
|
|
@ -84,7 +84,7 @@ jobs:
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
bash python/llm/test/run-llm-install-tests.sh
|
bash python/llm/test/run-llm-install-tests.sh
|
||||||
|
|
||||||
- name: Test on xpu
|
- name: Test on xpu(transformers==4.31.0)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
date_for_test_version=$(date -d yesterday +%Y-%m-%d)
|
date_for_test_version=$(date -d yesterday +%Y-%m-%d)
|
||||||
|
|
@ -100,12 +100,25 @@ jobs:
|
||||||
# change csv name
|
# change csv name
|
||||||
sed -i 's/{today}/{today}_test1/g' run.py
|
sed -i 's/{today}/{today}_test1/g' run.py
|
||||||
python run.py
|
python run.py
|
||||||
|
|
||||||
|
- name: Test on xpu(transformers==4.34.0)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
export USE_XETLA=OFF
|
||||||
|
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||||
# upgrade transformers for model Mistral-7B-v0.1
|
# upgrade transformers for model Mistral-7B-v0.1
|
||||||
python -m pip install transformers==4.34.0
|
python -m pip install transformers==4.34.0
|
||||||
cp ../../../test/benchmark/arc-perf-transformers-434.yaml ./config.yaml
|
cp python/llm/test/benchmark/arc-perf-transformers-434.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||||
|
cd python/llm/dev/benchmark/all-in-one
|
||||||
# change csv name
|
# change csv name
|
||||||
sed -i 's/test1/test2/g' run.py
|
sed -i 's/test1/test2/g' run.py
|
||||||
python run.py
|
python run.py
|
||||||
|
|
||||||
|
- name: Concat csv and generate html
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd python/llm/dev/benchmark/all-in-one
|
||||||
python ../../../test/benchmark/concat_csv.py
|
python ../../../test/benchmark/concat_csv.py
|
||||||
for file in *.csv; do
|
for file in *.csv; do
|
||||||
if [[ $file != *test* ]]; then
|
if [[ $file != *test* ]]; then
|
||||||
|
|
@ -115,7 +128,11 @@ jobs:
|
||||||
python -m pip install pandas==1.5.3
|
python -m pip install pandas==1.5.3
|
||||||
cd ../../../test/benchmark
|
cd ../../../test/benchmark
|
||||||
python csv_to_html.py -f $CSV_SAVE_PATH
|
python csv_to_html.py -f $CSV_SAVE_PATH
|
||||||
cd ../../dev/benchmark/all-in-one/
|
|
||||||
|
- name: Check and upload results to ftp
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd python/llm/dev/benchmark/all-in-one
|
||||||
python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
|
python ../../../test/benchmark/check_results.py -c test1 -y ../../../test/benchmark/arc-perf-test.yaml
|
||||||
python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
|
python ../../../test/benchmark/check_results.py -c test2 -y ../../../test/benchmark/arc-perf-transformers-434.yaml
|
||||||
find . -name "*test*.csv" -delete
|
find . -name "*test*.csv" -delete
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import time
|
||||||
import gc
|
import gc
|
||||||
import traceback
|
import traceback
|
||||||
import threading
|
import threading
|
||||||
|
import csv
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
@ -432,6 +433,21 @@ def run_transformer_int4_gpu(repo_id,
|
||||||
thread = threading.Thread(target=run_model_in_thread, args=(model, in_out, tokenizer, result, warm_up, num_beams, input_ids, out_len, actual_in_len, num_trials))
|
thread = threading.Thread(target=run_model_in_thread, args=(model, in_out, tokenizer, result, warm_up, num_beams, input_ids, out_len, actual_in_len, num_trials))
|
||||||
thread.start()
|
thread.start()
|
||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
|
if result[in_out]:
|
||||||
|
first_token_latency = round(np.mean(result[in_out], axis=0)[0]*1000.0, 2)
|
||||||
|
rest_token_latency = round(np.mean(result[in_out], axis=0)[1]*1000.0, 2)
|
||||||
|
encoder_time = round(np.mean(result[in_out], axis=0)[2]*1000.0, 2)
|
||||||
|
input_output_tokens = in_out
|
||||||
|
actual_input_output_tokens = f'{int(np.mean(result[in_out], axis=0)[3])}' + f'-{int(np.mean(result[in_out], axis=0)[4])}'
|
||||||
|
peak_mem = result[in_out][-1][5]
|
||||||
|
with open(csv_name, mode='a', newline='') as file:
|
||||||
|
csv_writer = csv.writer(file)
|
||||||
|
file.seek(0, os.SEEK_END)
|
||||||
|
if file.tell() == 0:
|
||||||
|
csv_writer.writerow(["","model","1st token avg latency (ms)","2+ avg latency (ms/token)","encoder time (ms)","input/output tokens","actual input/output tokens","num_beams","low_bit","cpu_embedding","peak mem (GB)"])
|
||||||
|
csv_writer.writerow(['', repo_id, first_token_latency, rest_token_latency, encoder_time, input_output_tokens, actual_input_output_tokens, num_beams, low_bit, '', peak_mem])
|
||||||
|
|
||||||
model.to('cpu')
|
model.to('cpu')
|
||||||
torch.xpu.synchronize()
|
torch.xpu.synchronize()
|
||||||
torch.xpu.empty_cache()
|
torch.xpu.empty_cache()
|
||||||
|
|
@ -439,7 +455,6 @@ def run_transformer_int4_gpu(repo_id,
|
||||||
gc.collect()
|
gc.collect()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def run_optimize_model_gpu(repo_id,
|
def run_optimize_model_gpu(repo_id,
|
||||||
local_model_hub,
|
local_model_hub,
|
||||||
in_out_pairs,
|
in_out_pairs,
|
||||||
|
|
@ -933,6 +948,8 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
for api in conf.test_api:
|
for api in conf.test_api:
|
||||||
|
global csv_name
|
||||||
|
csv_name = f'{current_dir}/{api}-results-{today}.csv'
|
||||||
for model in conf.repo_id:
|
for model in conf.repo_id:
|
||||||
in_out_pairs = conf['in_out_pairs'].copy()
|
in_out_pairs = conf['in_out_pairs'].copy()
|
||||||
if excludes:
|
if excludes:
|
||||||
|
|
@ -943,8 +960,7 @@ if __name__ == '__main__':
|
||||||
run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
|
run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'],
|
||||||
conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
|
conf['low_bit'], conf['cpu_embedding'], conf['batch_size'])
|
||||||
df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)',
|
df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)',
|
||||||
'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding',
|
'input/output tokens', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding',
|
||||||
'peak mem (GB)'])
|
'peak mem (GB)'])
|
||||||
|
df.to_csv(csv_name)
|
||||||
df.to_csv(f'{current_dir}/{api}-results-{today}.csv')
|
|
||||||
results = []
|
results = []
|
||||||
|
|
|
||||||
|
|
@ -34,12 +34,9 @@ def main():
|
||||||
csv_files.append(file_path)
|
csv_files.append(file_path)
|
||||||
csv_files.sort()
|
csv_files.sort()
|
||||||
|
|
||||||
df1 = pd.read_csv(csv_files[0], index_col=0)
|
merged_df = pd.concat([pd.read_csv(file, index_col=0) for file in csv_files], ignore_index=True)
|
||||||
df2 = pd.read_csv(csv_files[1], index_col=0)
|
|
||||||
merged_df = pd.concat([df1, df2], ignore_index=True)
|
|
||||||
merged_df.reset_index(drop=True, inplace=True)
|
merged_df.reset_index(drop=True, inplace=True)
|
||||||
|
merged_csv = csv_files[0].replace("_test1", "").replace("_test2", "")
|
||||||
merged_csv=csv_files[0].replace("_test1", "")
|
|
||||||
merged_df.to_csv(merged_csv)
|
merged_df.to_csv(merged_csv)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue