Fix harness workflow (#9704)
* error when larger than 0.001 * fix env setup * fix typo * fix typo
This commit is contained in:
parent
12df70953e
commit
b3647507c0
2 changed files with 7 additions and 7 deletions
12
.github/workflows/llm-harness-evaluation.yml
vendored
12
.github/workflows/llm-harness-evaluation.yml
vendored
|
|
@ -17,20 +17,20 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
model_name:
|
||||
description: 'A list of models added to the job matrix.'
|
||||
description: 'Model names, seperated by comma and must be quoted.'
|
||||
required: true
|
||||
type: string
|
||||
precision:
|
||||
description: 'A list of precisions added to the job matrix'
|
||||
description: 'Precisions, seperated by comma and must be quoted.'
|
||||
required: true
|
||||
type: string
|
||||
task:
|
||||
description: 'A list of precisions added to the job matrix'
|
||||
description: 'Tasks, seperated by comma and must be quoted.'
|
||||
required: true
|
||||
type: string
|
||||
runs-on:
|
||||
description: 'Labels to filter the runners.'
|
||||
default: 'accuracy'
|
||||
description: 'Labels to filter the runners, seperated by comma and must be quoted.'
|
||||
default: "accuracy"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
|
|
@ -166,7 +166,7 @@ jobs:
|
|||
export HF_HOME=${HARNESS_HF_HOME}
|
||||
export HF_DATASETS=$HARNESS_HF_HOME/datasets
|
||||
export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
source $HOME/intel/oneapi/setvars.sh
|
||||
python run_llb.py \
|
||||
--model bigdl-llm \
|
||||
--pretrained ${MODEL_PATH} \
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ def main(res_path, golden_path):
|
|||
task_results = results[task]
|
||||
task_golden = golden_results[task]
|
||||
for m in task_results.keys():
|
||||
if m in task_golden and abs(task_results[m] - task_golden[m]) < 0.001:
|
||||
if m in task_golden and abs(task_results[m] - task_golden[m]) > 0.001:
|
||||
if not m.endswith("_stderr"):
|
||||
identical = False
|
||||
logger.error(f"Different on metric '{m}' [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
|
||||
|
|
|
|||
Loading…
Reference in a new issue