Add batch 4 perf test (#11355)
* copy files to this branch * add tasks * comment one model * change the model to test the 4.36 * only test batch-4 * typo * typo * typo * typo * typo * typo * add 4.37-batch4 * change the file name * revet yaml file * no print * add batch4 task * revert --------- Co-authored-by: Yishuo Wang <yishuo.wang@intel.com>
This commit is contained in:
parent
a721c1ae43
commit
b2f62a8561
4 changed files with 125 additions and 3 deletions
46
.github/workflows/llm_performance_tests.yml
vendored
46
.github/workflows/llm_performance_tests.yml
vendored
|
|
@ -120,6 +120,7 @@ jobs:
|
||||||
cd python/llm/dev/benchmark/all-in-one
|
cd python/llm/dev/benchmark/all-in-one
|
||||||
mkdir test_batch1
|
mkdir test_batch1
|
||||||
mkdir test_batch2
|
mkdir test_batch2
|
||||||
|
mkdir test_batch4
|
||||||
# batch_size 1
|
# batch_size 1
|
||||||
# hide time info
|
# hide time info
|
||||||
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
|
sed -i 's/str(end - st)/"xxxxxx"/g' run.py
|
||||||
|
|
@ -135,6 +136,14 @@ jobs:
|
||||||
sed -i 's/batch1/batch2/g' run.py
|
sed -i 's/batch1/batch2/g' run.py
|
||||||
python run.py
|
python run.py
|
||||||
mv *.csv test_batch2
|
mv *.csv test_batch2
|
||||||
|
# batch_size 4
|
||||||
|
cd ../../../../../
|
||||||
|
cp python/llm/test/benchmark/arc-perf-test-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||||
|
cd python/llm/dev/benchmark/all-in-one
|
||||||
|
# change csv name
|
||||||
|
sed -i 's/batch2/batch4/g' run.py
|
||||||
|
python run.py
|
||||||
|
mv *.csv test_batch4
|
||||||
|
|
||||||
- name: Test on xpu(transformers==4.37.0)
|
- name: Test on xpu(transformers==4.37.0)
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
@ -148,7 +157,7 @@ jobs:
|
||||||
cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
cp python/llm/test/benchmark/arc-perf-transformers-437.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||||
cd python/llm/dev/benchmark/all-in-one
|
cd python/llm/dev/benchmark/all-in-one
|
||||||
# change csv name
|
# change csv name
|
||||||
sed -i 's/test1_batch2/test2_batch1/g' run.py
|
sed -i 's/test1_batch4/test2_batch1/g' run.py
|
||||||
python run.py
|
python run.py
|
||||||
mv *.csv test_batch1
|
mv *.csv test_batch1
|
||||||
# batch_size 2
|
# batch_size 2
|
||||||
|
|
@ -159,6 +168,14 @@ jobs:
|
||||||
sed -i 's/batch1/batch2/g' run.py
|
sed -i 's/batch1/batch2/g' run.py
|
||||||
python run.py
|
python run.py
|
||||||
mv *.csv test_batch2
|
mv *.csv test_batch2
|
||||||
|
# batch_size 4
|
||||||
|
cd ../../../../../
|
||||||
|
cp python/llm/test/benchmark/arc-perf-transformers-437-batch4.yaml python/llm/dev/benchmark/all-in-one/config.yaml
|
||||||
|
cd python/llm/dev/benchmark/all-in-one
|
||||||
|
# change csv name
|
||||||
|
sed -i 's/batch2/batch4/g' run.py
|
||||||
|
python run.py
|
||||||
|
mv *.csv test_batch4
|
||||||
|
|
||||||
- name: Concat csv and generate html
|
- name: Concat csv and generate html
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
@ -185,6 +202,17 @@ jobs:
|
||||||
done
|
done
|
||||||
cd ../../../../test/benchmark
|
cd ../../../../test/benchmark
|
||||||
python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2
|
python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2
|
||||||
|
# batch_size 4
|
||||||
|
cd ../../../../
|
||||||
|
cd python/llm/dev/benchmark/all-in-one/test_batch4
|
||||||
|
python ../../../../test/benchmark/concat_csv.py
|
||||||
|
for file in *.csv; do
|
||||||
|
if [[ $file != *test* ]]; then
|
||||||
|
cp "$file" $CSV_SAVE_PATH/batch_size_4
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
cd ../../../../test/benchmark
|
||||||
|
python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_4
|
||||||
|
|
||||||
- name: Merge and sort csv files of multiple batches and generate html
|
- name: Merge and sort csv files of multiple batches and generate html
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
@ -204,6 +232,12 @@ jobs:
|
||||||
cp "$file" ../../../../test/benchmark/merged_temp
|
cp "$file" ../../../../test/benchmark/merged_temp
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
cd ../test_batch4
|
||||||
|
for file in *.csv; do
|
||||||
|
if [[ $file != *test* ]]; then
|
||||||
|
cp "$file" ../../../../test/benchmark/merged_temp
|
||||||
|
fi
|
||||||
|
done
|
||||||
cd ../../../../test/benchmark
|
cd ../../../../test/benchmark
|
||||||
python merge_csv_batch.py -f ./merged_temp
|
python merge_csv_batch.py -f ./merged_temp
|
||||||
cd merged_temp
|
cd merged_temp
|
||||||
|
|
@ -244,6 +278,16 @@ jobs:
|
||||||
fi
|
fi
|
||||||
cd ../
|
cd ../
|
||||||
rm -r test_batch2
|
rm -r test_batch2
|
||||||
|
# batch_size 4
|
||||||
|
cd test_batch4
|
||||||
|
python ../../../../test/benchmark/check_results.py -c test1 -y ../../../../test/benchmark/arc-perf-test-batch4.yaml
|
||||||
|
python ../../../../test/benchmark/check_results.py -c test2 -y ../../../../test/benchmark/arc-perf-transformers-437-batch4.yaml
|
||||||
|
find . -name "*test*.csv" -delete
|
||||||
|
if [ ${{ github.event_name }} == "schedule" ] || [ ${{ github.event_name }} == "workflow_dispatch" ]; then
|
||||||
|
curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/gpu/
|
||||||
|
fi
|
||||||
|
cd ../
|
||||||
|
rm -r test_batch4
|
||||||
|
|
||||||
|
|
||||||
llm-performance-test-on-spr:
|
llm-performance-test-on-spr:
|
||||||
|
|
|
||||||
55
python/llm/test/benchmark/arc-perf-test-batch4.yaml
Normal file
55
python/llm/test/benchmark/arc-perf-test-batch4.yaml
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
repo_id:
|
||||||
|
- 'meta-llama/Llama-2-7b-chat-hf'
|
||||||
|
- 'meta-llama/Llama-2-13b-chat-hf'
|
||||||
|
- 'THUDM/chatglm2-6b'
|
||||||
|
- 'THUDM/chatglm3-6b-4bit'
|
||||||
|
- 'tiiuae/falcon-7b-instruct-with-patch'
|
||||||
|
- 'mosaicml/mpt-7b-chat'
|
||||||
|
- 'redpajama/gptneox-7b-redpajama-bf16'
|
||||||
|
- 'bigcode/starcoder-15.5b-4bit'
|
||||||
|
- 'databricks/dolly-v1-6b'
|
||||||
|
- 'databricks/dolly-v2-7b'
|
||||||
|
- 'databricks/dolly-v2-12b'
|
||||||
|
- 'internlm/internlm-chat-7b'
|
||||||
|
- 'Qwen/Qwen-7B-Chat'
|
||||||
|
- 'BAAI/AquilaChat-7B'
|
||||||
|
- 'baichuan-inc/Baichuan2-7B-Chat'
|
||||||
|
- 'baichuan-inc/Baichuan2-13B-Chat-4bit'
|
||||||
|
- 'bigscience/bloomz-7b1'
|
||||||
|
# - 'fnlp/moss-moon-003-sft-4bit' # moss-moon-003-sft cannot work on transformers 4.34+
|
||||||
|
- 'mistralai/Mistral-7B-v0.1' #mwj: need to check
|
||||||
|
local_model_hub: '/mnt/disk1/models'
|
||||||
|
warm_up: 1
|
||||||
|
num_trials: 3
|
||||||
|
num_beams: 1 # default to greedy search
|
||||||
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 4 # default to 1
|
||||||
|
in_out_pairs:
|
||||||
|
- '32-32'
|
||||||
|
- '1024-128'
|
||||||
|
- '2048-256'
|
||||||
|
test_api:
|
||||||
|
- "transformer_int4_fp16_gpu" # on Intel GPU
|
||||||
|
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
|
||||||
|
exclude:
|
||||||
|
- 'meta-llama/Llama-2-13b-chat-hf:2048'
|
||||||
|
- 'tiiuae/falcon-7b-instruct-with-patch:2048'
|
||||||
|
- 'mosaicml/mpt-7b-chat:2048'
|
||||||
|
- 'redpajama/gptneox-7b-redpajama-bf16:2048'
|
||||||
|
- 'bigcode/starcoder-15.5b-4bit:1024'
|
||||||
|
- 'bigcode/starcoder-15.5b-4bit:2048'
|
||||||
|
- 'databricks/dolly-v1-6b:2048'
|
||||||
|
- 'databricks/dolly-v2-7b:2048'
|
||||||
|
- 'databricks/dolly-v2-12b:1024'
|
||||||
|
- 'databricks/dolly-v2-12b:2048'
|
||||||
|
- 'internlm/internlm-chat-7b:2048'
|
||||||
|
- 'Qwen/Qwen-7B-Chat:2048'
|
||||||
|
- 'BAAI/AquilaChat-7B:2048'
|
||||||
|
- 'baichuan-inc/Baichuan2-7B-Chat:2048'
|
||||||
|
- 'baichuan-inc/Baichuan2-13B-Chat-4bit:1024'
|
||||||
|
- 'baichuan-inc/Baichuan2-13B-Chat-4bit:2048'
|
||||||
|
- 'bigscience/bloomz-7b1:1024'
|
||||||
|
- 'bigscience/bloomz-7b1:2048'
|
||||||
|
# - 'fnlp/moss-moon-003-sft-4bit:1024'
|
||||||
|
# - 'fnlp/moss-moon-003-sft-4bit:2048'
|
||||||
|
task: 'continuation' # task can be 'continuation', 'QA' and 'summarize'
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
# For the models that require transformers 4.37.0
|
||||||
|
repo_id:
|
||||||
|
- 'Qwen/Qwen1.5-7B-Chat'
|
||||||
|
- 'microsoft/phi-2'
|
||||||
|
- 'microsoft/Phi-3-mini-4k-instruct'
|
||||||
|
- 'meta-llama/Meta-Llama-3-8B-Instruct' # mwj: need to test
|
||||||
|
local_model_hub: '/mnt/disk1/models'
|
||||||
|
warm_up: 1
|
||||||
|
num_trials: 3
|
||||||
|
num_beams: 1 # default to greedy search
|
||||||
|
low_bit: 'sym_int4' # default to use 'sym_int4' (i.e. symmetric int4)
|
||||||
|
batch_size: 4 # default to 1
|
||||||
|
in_out_pairs:
|
||||||
|
- '32-32'
|
||||||
|
- '1024-128'
|
||||||
|
- '2048-256'
|
||||||
|
test_api:
|
||||||
|
- "transformer_int4_fp16_gpu" # on Intel GPU
|
||||||
|
cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api)
|
||||||
|
exclude:
|
||||||
|
- 'Qwen/Qwen1.5-7B-Chat:2048'
|
||||||
|
- 'meta-llama/Meta-Llama-3-8B-Instruct:2048'
|
||||||
|
task: 'continuation' # task can be 'continuation', 'QA' and 'summarize'
|
||||||
|
|
@ -24,13 +24,13 @@ from pathlib import Path
|
||||||
def update_html_in_parent_folder(folder_path):
|
def update_html_in_parent_folder(folder_path):
|
||||||
|
|
||||||
current_folder = Path(folder_path)
|
current_folder = Path(folder_path)
|
||||||
folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'merged/']
|
folder_list = [current_folder/'batch_size_1/',current_folder/'batch_size_2/',current_folder/'batch_size_4/',current_folder/'merged/']
|
||||||
|
|
||||||
# List all html files under current folder and delete them
|
# List all html files under current folder and delete them
|
||||||
for html_file in current_folder.glob('*.html'):
|
for html_file in current_folder.glob('*.html'):
|
||||||
html_file.unlink()
|
html_file.unlink()
|
||||||
for folder in folder_list:
|
for folder in folder_list:
|
||||||
# Find latest html file under batch1/batch2/merged folders
|
# Find latest html file under batch1/batch2/batch4/merged folders
|
||||||
latest_html_file = max(Path(folder).glob('*.html'), key=os.path.getctime, default=None)
|
latest_html_file = max(Path(folder).glob('*.html'), key=os.path.getctime, default=None)
|
||||||
# Copy the latest html file to parent folder
|
# Copy the latest html file to parent folder
|
||||||
if latest_html_file is not None:
|
if latest_html_file is not None:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue