Fix harness workflow (#9704)
* error when larger than 0.001 * fix env setup * fix typo * fix typo
This commit is contained in:
parent
12df70953e
commit
b3647507c0
2 changed files with 7 additions and 7 deletions
12
.github/workflows/llm-harness-evaluation.yml
vendored
12
.github/workflows/llm-harness-evaluation.yml
vendored
|
|
@ -17,20 +17,20 @@ on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
model_name:
|
model_name:
|
||||||
description: 'A list of models added to the job matrix.'
|
description: 'Model names, seperated by comma and must be quoted.'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
precision:
|
precision:
|
||||||
description: 'A list of precisions added to the job matrix'
|
description: 'Precisions, seperated by comma and must be quoted.'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
task:
|
task:
|
||||||
description: 'A list of precisions added to the job matrix'
|
description: 'Tasks, seperated by comma and must be quoted.'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
runs-on:
|
runs-on:
|
||||||
description: 'Labels to filter the runners.'
|
description: 'Labels to filter the runners, seperated by comma and must be quoted.'
|
||||||
default: 'accuracy'
|
default: "accuracy"
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
|
|
@ -166,7 +166,7 @@ jobs:
|
||||||
export HF_HOME=${HARNESS_HF_HOME}
|
export HF_HOME=${HARNESS_HF_HOME}
|
||||||
export HF_DATASETS=$HARNESS_HF_HOME/datasets
|
export HF_DATASETS=$HARNESS_HF_HOME/datasets
|
||||||
export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets
|
export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source $HOME/intel/oneapi/setvars.sh
|
||||||
python run_llb.py \
|
python run_llb.py \
|
||||||
--model bigdl-llm \
|
--model bigdl-llm \
|
||||||
--pretrained ${MODEL_PATH} \
|
--pretrained ${MODEL_PATH} \
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ def main(res_path, golden_path):
|
||||||
task_results = results[task]
|
task_results = results[task]
|
||||||
task_golden = golden_results[task]
|
task_golden = golden_results[task]
|
||||||
for m in task_results.keys():
|
for m in task_results.keys():
|
||||||
if m in task_golden and abs(task_results[m] - task_golden[m]) < 0.001:
|
if m in task_golden and abs(task_results[m] - task_golden[m]) > 0.001:
|
||||||
if not m.endswith("_stderr"):
|
if not m.endswith("_stderr"):
|
||||||
identical = False
|
identical = False
|
||||||
logger.error(f"Different on metric '{m}' [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
|
logger.error(f"Different on metric '{m}' [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue