Fix harness workflow (#9704)
* error when larger than 0.001 * fix env setup * fix typo * fix typo
This commit is contained in:
		
							parent
							
								
									12df70953e
								
							
						
					
					
						commit
						b3647507c0
					
				
					 2 changed files with 7 additions and 7 deletions
				
			
		
							
								
								
									
										12
									
								
								.github/workflows/llm-harness-evaluation.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/workflows/llm-harness-evaluation.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -17,20 +17,20 @@ on:
 | 
			
		|||
  workflow_dispatch:
 | 
			
		||||
    inputs:
 | 
			
		||||
      model_name:
 | 
			
		||||
        description: 'A list of models added to the job matrix.'
 | 
			
		||||
        description: 'Model names, seperated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      precision:
 | 
			
		||||
        description: 'A list of precisions added to the job matrix'
 | 
			
		||||
        description: 'Precisions, seperated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      task:
 | 
			
		||||
        description: 'A list of precisions added to the job matrix'
 | 
			
		||||
        description: 'Tasks, seperated by comma and must be quoted.'
 | 
			
		||||
        required: true
 | 
			
		||||
        type: string
 | 
			
		||||
      runs-on:
 | 
			
		||||
        description: 'Labels to filter the runners.'
 | 
			
		||||
        default: 'accuracy'
 | 
			
		||||
        description: 'Labels to filter the runners, seperated by comma and must be quoted.'
 | 
			
		||||
        default: "accuracy"
 | 
			
		||||
        required: false
 | 
			
		||||
        type: string
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -166,7 +166,7 @@ jobs:
 | 
			
		|||
          export HF_HOME=${HARNESS_HF_HOME}
 | 
			
		||||
          export HF_DATASETS=$HARNESS_HF_HOME/datasets
 | 
			
		||||
          export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets
 | 
			
		||||
          source /opt/intel/oneapi/setvars.sh
 | 
			
		||||
          source $HOME/intel/oneapi/setvars.sh
 | 
			
		||||
          python run_llb.py \
 | 
			
		||||
          --model bigdl-llm \
 | 
			
		||||
          --pretrained ${MODEL_PATH} \
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -42,7 +42,7 @@ def main(res_path, golden_path):
 | 
			
		|||
        task_results = results[task]
 | 
			
		||||
        task_golden = golden_results[task]
 | 
			
		||||
        for m in task_results.keys():
 | 
			
		||||
            if m in task_golden and abs(task_results[m] - task_golden[m]) < 0.001:
 | 
			
		||||
            if m in task_golden and abs(task_results[m] - task_golden[m]) > 0.001:
 | 
			
		||||
                if not m.endswith("_stderr"):
 | 
			
		||||
                    identical = False
 | 
			
		||||
                    logger.error(f"Different on metric '{m}' [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue