From b3647507c02c64227895cedb63a40bccf2a41ed6 Mon Sep 17 00:00:00 2001 From: "Chen, Zhentao" Date: Mon, 18 Dec 2023 15:42:10 +0800 Subject: [PATCH] Fix harness workflow (#9704) * error when larger than 0.001 * fix env setup * fix typo * fix typo --- .github/workflows/llm-harness-evaluation.yml | 12 ++++++------ .../benchmark/harness_nightly/accuracy_regression.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index b9fb6123..c0b49b45 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -17,20 +17,20 @@ on: workflow_dispatch: inputs: model_name: - description: 'A list of models added to the job matrix.' + description: 'Model names, seperated by comma and must be quoted.' required: true type: string precision: - description: 'A list of precisions added to the job matrix' + description: 'Precisions, seperated by comma and must be quoted.' required: true type: string task: - description: 'A list of precisions added to the job matrix' + description: 'Tasks, seperated by comma and must be quoted.' required: true type: string runs-on: - description: 'Labels to filter the runners.' - default: 'accuracy' + description: 'Labels to filter the runners, seperated by comma and must be quoted.' + default: "accuracy" required: false type: string @@ -166,7 +166,7 @@ jobs: export HF_HOME=${HARNESS_HF_HOME} export HF_DATASETS=$HARNESS_HF_HOME/datasets export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets - source /opt/intel/oneapi/setvars.sh + source $HOME/intel/oneapi/setvars.sh python run_llb.py \ --model bigdl-llm \ --pretrained ${MODEL_PATH} \ diff --git a/python/llm/test/benchmark/harness_nightly/accuracy_regression.py b/python/llm/test/benchmark/harness_nightly/accuracy_regression.py index fc1da7b7..ed793956 100644 --- a/python/llm/test/benchmark/harness_nightly/accuracy_regression.py +++ b/python/llm/test/benchmark/harness_nightly/accuracy_regression.py @@ -42,7 +42,7 @@ def main(res_path, golden_path): task_results = results[task] task_golden = golden_results[task] for m in task_results.keys(): - if m in task_golden and abs(task_results[m] - task_golden[m]) < 0.001: + if m in task_golden and abs(task_results[m] - task_golden[m]) > 0.001: if not m.endswith("_stderr"): identical = False logger.error(f"Different on metric '{m}' [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")