fix harness runner label of manual trigger (#9754)
* fix runner * update golden
This commit is contained in:
parent
2d1bf20309
commit
86a69e289c
2 changed files with 19 additions and 14 deletions
2
.github/workflows/llm-harness-evaluation.yml
vendored
2
.github/workflows/llm-harness-evaluation.yml
vendored
|
|
@ -78,7 +78,7 @@ jobs:
|
|||
MANUAL_MATRIX_MODEL_NAME: ${{format('[ {0} ]', inputs.model_name)}}
|
||||
MANUAL_MATRIX_TASK: ${{format('[ {0} ]', inputs.task)}}
|
||||
MANUAL_MATRIX_PRECISION: ${{format('[ {0} ]', inputs.precision)}}
|
||||
MANUAL_LABELS: ${{format('["self-hosted", "llm", {0}]', inputs.precision)}}
|
||||
MANUAL_LABELS: ${{format('["self-hosted", "llm", {0}]', inputs.runs-on)}}
|
||||
run: |
|
||||
echo "model_name=$MANUAL_MATRIX_MODEL_NAME" >> $GITHUB_ENV
|
||||
echo "precision=$MANUAL_MATRIX_TASK" >> $GITHUB_ENV
|
||||
|
|
|
|||
|
|
@ -27,25 +27,30 @@
|
|||
"Mistral-7B-v0.1": {"xpu": {
|
||||
"mixed_fp4": {
|
||||
"truthfulqa_mc": {
|
||||
"mc1": 0.27539779681762544,
|
||||
"mc1_stderr": 0.01563813566777552,
|
||||
"mc2": 0.41062756399348693,
|
||||
"mc2_stderr": 0.014067612078490615
|
||||
"mc1": 0.2741738066095471,
|
||||
"mc1_stderr": 0.015616518497219374,
|
||||
"mc2": 0.4090424865843113,
|
||||
"mc2_stderr": 0.014068835265546585
|
||||
},
|
||||
"arc_challenge": {"acc": 0.5674061433447098,"acc_stderr": 0.014478005694182528,"acc_norm": 0.5989761092150171,"acc_norm_stderr": 0.014322255790719867}
|
||||
"arc_challenge": {
|
||||
"acc": 0.5674061433447098,
|
||||
"acc_stderr": 0.014478005694182528,
|
||||
"acc_norm": 0.6023890784982935,
|
||||
"acc_norm_stderr": 0.01430175222327954
|
||||
}
|
||||
},
|
||||
"fp8": {
|
||||
"truthfulqa_mc": {
|
||||
"mc1": 0.2778457772337821,
|
||||
"mc1_stderr": 0.015680929364024643,
|
||||
"mc2": 0.42125519016651203,
|
||||
"mc2_stderr": 0.014145367212406432
|
||||
"mc1": 0.2802937576499388,
|
||||
"mc1_stderr": 0.015723139524608763,
|
||||
"mc2": 0.4253576013662111,
|
||||
"mc2_stderr": 0.014199215617062957
|
||||
},
|
||||
"arc_challenge": {
|
||||
"acc": 0.5639931740614335,
|
||||
"acc_stderr": 0.014491225699230916,
|
||||
"acc_norm": 0.5989761092150171,
|
||||
"acc_norm_stderr": 0.014322255790719867
|
||||
"acc": 0.5622866894197952,
|
||||
"acc_stderr": 0.014497573881108283,
|
||||
"acc_norm": 0.6032423208191127,
|
||||
"acc_norm_stderr": 0.014296513020180646
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
|
|
|||
Loading…
Reference in a new issue