Merge multiple batches (#11264)
* add merge steps * move to pr mode * remove build + add merge.py * add tohtml and change cp * change test_batch folder path * change merge_temp path * change to html folder * revert * change place * revert 437 * revert space --------- Co-authored-by: Yishuo Wang <yishuo.wang@intel.com>
This commit is contained in:
		
							parent
							
								
									4b07712fd8
								
							
						
					
					
						commit
						70b17c87be
					
				
					 2 changed files with 74 additions and 0 deletions
				
			
		
							
								
								
									
										29
									
								
								.github/workflows/llm_performance_tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								.github/workflows/llm_performance_tests.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -165,6 +165,35 @@ jobs:
 | 
			
		|||
          cd ../../../../test/benchmark
 | 
			
		||||
          python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2
 | 
			
		||||
      
 | 
			
		||||
      - name: Merge and sort csv files of multiple batches and generate html 
 | 
			
		||||
        shell: bash
 | 
			
		||||
        run: |
 | 
			
		||||
          cd python/llm/test/benchmark
 | 
			
		||||
          mkdir merged_temp
 | 
			
		||||
          # go through all the files and go to merged_temp
 | 
			
		||||
          cd ../../dev/benchmark/all-in-one/test_batch1
 | 
			
		||||
          for file in *.csv; do
 | 
			
		||||
              if [[ $file != *test* ]]; then
 | 
			
		||||
                  cp "$file" ../../../../test/benchmark/merged_temp
 | 
			
		||||
              fi
 | 
			
		||||
          done
 | 
			
		||||
          cd ../test_batch2
 | 
			
		||||
          for file in *.csv; do
 | 
			
		||||
              if [[ $file != *test* ]]; then
 | 
			
		||||
                  cp "$file" ../../../../test/benchmark/merged_temp
 | 
			
		||||
              fi
 | 
			
		||||
          done
 | 
			
		||||
          cd ../../../../test/benchmark
 | 
			
		||||
          python merge_csv_batch.py -f ./merged_temp
 | 
			
		||||
          cd merged_temp
 | 
			
		||||
          find . -name "*batch*.csv" -delete
 | 
			
		||||
          for file in *.csv; do
 | 
			
		||||
              cp "$file" $CSV_SAVE_PATH/merged
 | 
			
		||||
          done
 | 
			
		||||
          cd ..
 | 
			
		||||
          python csv_to_html.py -f $CSV_SAVE_PATH/merged
 | 
			
		||||
          rm -r merged_temp
 | 
			
		||||
 | 
			
		||||
      - name: Check and upload results to ftp
 | 
			
		||||
        shell: bash
 | 
			
		||||
        run: |
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										45
									
								
								python/llm/test/benchmark/merge_csv_batch.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								python/llm/test/benchmark/merge_csv_batch.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,45 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
# Python program to concat CSVs
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import argparse
 | 
			
		||||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    parser = argparse.ArgumentParser(description="concat .csv files")
 | 
			
		||||
    parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
 | 
			
		||||
                        help="The directory which stores the .csv files", default="./")
 | 
			
		||||
    args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
    csv_files = []
 | 
			
		||||
    for file_name in os.listdir(args.folder_path):
 | 
			
		||||
        file_path = os.path.join(args.folder_path, file_name)
 | 
			
		||||
        if os.path.isfile(file_path) and file_name.endswith(".csv"):
 | 
			
		||||
            csv_files.append(file_path)
 | 
			
		||||
    csv_files.sort()
 | 
			
		||||
 | 
			
		||||
    merged_df = pd.concat([pd.read_csv(file, index_col=0) for file in csv_files], ignore_index=True)
 | 
			
		||||
    merged_df["input_len"] = merged_df["input/output tokens"].apply(lambda x: int(x.split("-")[0]))
 | 
			
		||||
    merged_df = merged_df.sort_values(by=["model", "input_len", "batch_size"])
 | 
			
		||||
    merged_df.reset_index(drop=True, inplace=True)
 | 
			
		||||
    merged_csv = csv_files[0].replace("_batch1", "").replace("_batch2", "").replace("_batch4", "")
 | 
			
		||||
    merged_df.drop("input_len", axis=1).to_csv(merged_csv)
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    sys.exit(main())
 | 
			
		||||
		Loading…
	
		Reference in a new issue