Harness eval: Add is_last parameter and fix logical operator in highlight_vals (#10192)
* Add is_last parameter and fix logical operator in highlight_vals * Add script to update HTML files in parent folder * Add running update_html_in_parent_folder.py in summarize step * Add licence info * Remove update_html_in_parent_folder.py in Summarize the results for pull request
This commit is contained in:
		
							parent
							
								
									c7e839e66c
								
							
						
					
					
						commit
						b1a97b71a9
					
				
					 3 changed files with 54 additions and 4 deletions
				
			
		
							
								
								
									
										1
									
								
								.github/workflows/llm-harness-evaluation.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/workflows/llm-harness-evaluation.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -305,6 +305,7 @@ jobs:
 | 
			
		|||
          pip install pandas==1.5.3
 | 
			
		||||
          python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_and_csv.py ${{ env.NIGHTLY_FOLDER}}/${{ env.OUTPUT_PATH }} ${{ env.NIGHTLY_FOLDER}}
 | 
			
		||||
          python ${{ github.workspace }}/python/llm/dev/benchmark/harness/harness_csv_to_html.py -f ${{ env.NIGHTLY_FOLDER}}
 | 
			
		||||
          python ${{ github.workspace }}/python/llm/dev/benchmark/harness/update_html_in_parent_folder.py -f ${{ env.NIGHTLY_FOLDER }}
 | 
			
		||||
 | 
			
		||||
      - name: Summarize the results for pull request
 | 
			
		||||
        if: github.event_name == 'pull_request'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,13 +21,13 @@ import sys
 | 
			
		|||
import argparse
 | 
			
		||||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
def highlight_vals(val, max=3.0, color1='red', color2='green', color3='yellow'):
 | 
			
		||||
def highlight_vals(val, max=3.0, color1='red', color2='green', color3='yellow', is_last=False):
 | 
			
		||||
    if isinstance(val, float):
 | 
			
		||||
        if val > max:
 | 
			
		||||
            return 'background-color: %s' % color2
 | 
			
		||||
        elif val <= -max:
 | 
			
		||||
            return 'background-color: %s' % color1
 | 
			
		||||
        elif val != 0.0:
 | 
			
		||||
        elif val != 0.0 and is_last:
 | 
			
		||||
            return 'background-color: %s' % color3
 | 
			
		||||
    else:
 | 
			
		||||
        return ''
 | 
			
		||||
| 
						 | 
				
			
			@ -194,9 +194,9 @@ def main():
 | 
			
		|||
 | 
			
		||||
        latest_csv.drop('Index', axis=1, inplace=True)
 | 
			
		||||
 | 
			
		||||
        styled_df = latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=3.0, color1='red', color2='green'), subset=subset1)
 | 
			
		||||
        styled_df = latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=3.0, is_last=True), subset=subset1)
 | 
			
		||||
        for task in ['Arc', 'TruthfulQA', 'Winogrande']:
 | 
			
		||||
            styled_df = styled_df.applymap(lambda val: highlight_vals(val, max=highlight_threshold, color1='red', color2='green'), subset=[f'{task}_diff_FP16(%)'])
 | 
			
		||||
            styled_df = styled_df.applymap(lambda val: highlight_vals(val, max=highlight_threshold, is_last=False), subset=[f'{task}_diff_FP16(%)'])
 | 
			
		||||
        
 | 
			
		||||
        # add css style to restrict width and wrap text
 | 
			
		||||
        styled_df.set_table_styles([{
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,49 @@
 | 
			
		|||
#
 | 
			
		||||
# Copyright 2016 The BigDL Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
# Python program to update Html in parent folder
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
import argparse
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
def update_html_in_parent_folder(folder_path):
 | 
			
		||||
    # Get parent folder
 | 
			
		||||
    parent_folder = Path(folder_path).parent
 | 
			
		||||
 | 
			
		||||
    # List all html files under parent folder and delete them
 | 
			
		||||
    for html_file in parent_folder.glob('*.html'):
 | 
			
		||||
        html_file.unlink()
 | 
			
		||||
 | 
			
		||||
    # Find latest html file under folder_path
 | 
			
		||||
    latest_html_file = max(Path(folder_path).glob('*.html'), key=os.path.getctime, default=None)
 | 
			
		||||
 | 
			
		||||
    # Copy the latest html file to parent folder
 | 
			
		||||
    if latest_html_file is not None:
 | 
			
		||||
        shutil.copy(latest_html_file, parent_folder)
 | 
			
		||||
    
 | 
			
		||||
    print(latest_html_file.name)
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    parser = argparse.ArgumentParser(description="Update HTML in parent folder.")
 | 
			
		||||
    parser.add_argument("-f", "--folder", type=str, help="Path to the folder")
 | 
			
		||||
    args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
    update_html_in_parent_folder(args.folder)
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    main()
 | 
			
		||||
		Loading…
	
		Reference in a new issue