Merge multiple batches (#11264)
* add merge steps * move to pr mode * remove build + add merge.py * add tohtml and change cp * change test_batch folder path * change merge_temp path * change to html folder * revert * change place * revert 437 * revert space --------- Co-authored-by: Yishuo Wang <yishuo.wang@intel.com>
This commit is contained in:
parent
4b07712fd8
commit
70b17c87be
2 changed files with 74 additions and 0 deletions
29
.github/workflows/llm_performance_tests.yml
vendored
29
.github/workflows/llm_performance_tests.yml
vendored
|
|
@ -164,6 +164,35 @@ jobs:
|
|||
done
|
||||
cd ../../../../test/benchmark
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2
|
||||
|
||||
- name: Merge and sort csv files of multiple batches and generate html
|
||||
shell: bash
|
||||
run: |
|
||||
cd python/llm/test/benchmark
|
||||
mkdir merged_temp
|
||||
# go through all the files and go to merged_temp
|
||||
cd ../../dev/benchmark/all-in-one/test_batch1
|
||||
for file in *.csv; do
|
||||
if [[ $file != *test* ]]; then
|
||||
cp "$file" ../../../../test/benchmark/merged_temp
|
||||
fi
|
||||
done
|
||||
cd ../test_batch2
|
||||
for file in *.csv; do
|
||||
if [[ $file != *test* ]]; then
|
||||
cp "$file" ../../../../test/benchmark/merged_temp
|
||||
fi
|
||||
done
|
||||
cd ../../../../test/benchmark
|
||||
python merge_csv_batch.py -f ./merged_temp
|
||||
cd merged_temp
|
||||
find . -name "*batch*.csv" -delete
|
||||
for file in *.csv; do
|
||||
cp "$file" $CSV_SAVE_PATH/merged
|
||||
done
|
||||
cd ..
|
||||
python csv_to_html.py -f $CSV_SAVE_PATH/merged
|
||||
rm -r merged_temp
|
||||
|
||||
- name: Check and upload results to ftp
|
||||
shell: bash
|
||||
|
|
|
|||
45
python/llm/test/benchmark/merge_csv_batch.py
Normal file
45
python/llm/test/benchmark/merge_csv_batch.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Python program to concat CSVs
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import pandas as pd
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="concat .csv files")
|
||||
parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
|
||||
help="The directory which stores the .csv files", default="./")
|
||||
args = parser.parse_args()
|
||||
|
||||
csv_files = []
|
||||
for file_name in os.listdir(args.folder_path):
|
||||
file_path = os.path.join(args.folder_path, file_name)
|
||||
if os.path.isfile(file_path) and file_name.endswith(".csv"):
|
||||
csv_files.append(file_path)
|
||||
csv_files.sort()
|
||||
|
||||
merged_df = pd.concat([pd.read_csv(file, index_col=0) for file in csv_files], ignore_index=True)
|
||||
merged_df["input_len"] = merged_df["input/output tokens"].apply(lambda x: int(x.split("-")[0]))
|
||||
merged_df = merged_df.sort_values(by=["model", "input_len", "batch_size"])
|
||||
merged_df.reset_index(drop=True, inplace=True)
|
||||
merged_csv = csv_files[0].replace("_batch1", "").replace("_batch2", "").replace("_batch4", "")
|
||||
merged_df.drop("input_len", axis=1).to_csv(merged_csv)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Reference in a new issue