ipex-llm/python/llm/test/benchmark/harness_nightly/golden_results.json
Chen, Zhentao 86a69e289c fix harness runner label of manual trigger (#9754)
* fix runner

* update golden
2023-12-22 15:09:22 +08:00

57 lines
1.7 KiB
JSON

{
"stablelm-3b-4e1t": {"xpu": {
"mixed_fp4": {
"truthfulqa_mc": {"mc1": 0.24357405140758873,"mc1_stderr": 0.015026354824910782,"mc2": 0.37399115063281224,"mc2_stderr": 0.013684003173581748},
"arc_challenge": {
"acc": 0.40102389078498296,
"acc_stderr": 0.014322255790719869,
"acc_norm": 0.44283276450511944,
"acc_norm_stderr": 0.014515573873348897
}
},
"fp8": {
"truthfulqa_mc": {
"mc1": 0.24479804161566707,
"mc1_stderr": 0.01505186948671501,
"mc2": 0.3747170112957169,
"mc2_stderr": 0.013516983188729865
},
"arc_challenge": {
"acc": 0.41552901023890787,
"acc_stderr": 0.014401366641216377,
"acc_norm": 0.46245733788395904,
"acc_norm_stderr": 0.014570144495075581
}
}
}},
"Mistral-7B-v0.1": {"xpu": {
"mixed_fp4": {
"truthfulqa_mc": {
"mc1": 0.2741738066095471,
"mc1_stderr": 0.015616518497219374,
"mc2": 0.4090424865843113,
"mc2_stderr": 0.014068835265546585
},
"arc_challenge": {
"acc": 0.5674061433447098,
"acc_stderr": 0.014478005694182528,
"acc_norm": 0.6023890784982935,
"acc_norm_stderr": 0.01430175222327954
}
},
"fp8": {
"truthfulqa_mc": {
"mc1": 0.2802937576499388,
"mc1_stderr": 0.015723139524608763,
"mc2": 0.4253576013662111,
"mc2_stderr": 0.014199215617062957
},
"arc_challenge": {
"acc": 0.5622866894197952,
"acc_stderr": 0.014497573881108283,
"acc_norm": 0.6032423208191127,
"acc_norm_stderr": 0.014296513020180646
}
}
}}
}