* modify output_path as a directory * schedule nightly at 21 on Friday * add tasks and models for nightly * add accuracy regression * comment out if to test * mixed fp4 * for test * add missing delimiter * remove comma * fixed golden results * add mixed 4 golden result * add more options * add mistral results * get golden result of stable lm * move nightly scripts and results to test folder * add license * add fp8 stable lm golden * run on all available devices * trigger only when ready for review * fix new line * update golden * add mistral
37 lines
1.5 KiB
JSON
37 lines
1.5 KiB
JSON
{
|
|
"stablelm-3b-4e1t": {"xpu": {
|
|
"mixed_fp4": {
|
|
"truthfulqa_mc": {"mc1": 0.24357405140758873,"mc1_stderr": 0.015026354824910782,"mc2": 0.37399115063281224,"mc2_stderr": 0.013684003173581748},
|
|
"arc_challenge": {
|
|
"acc": 0.40102389078498296,
|
|
"acc_stderr": 0.014322255790719869,
|
|
"acc_norm": 0.44283276450511944,
|
|
"acc_norm_stderr": 0.014515573873348897
|
|
}
|
|
},
|
|
"fp8": {
|
|
"truthfulqa_mc": {
|
|
"mc1": 0.24479804161566707,
|
|
"mc1_stderr": 0.01505186948671501,
|
|
"mc2": 0.3747170112957169,
|
|
"mc2_stderr": 0.013516983188729865
|
|
},
|
|
"arc_challenge": {
|
|
"acc": 0.41552901023890787,
|
|
"acc_stderr": 0.014401366641216377,
|
|
"acc_norm": 0.46245733788395904,
|
|
"acc_norm_stderr": 0.014570144495075581
|
|
}
|
|
}
|
|
}},
|
|
"Mistral-7B-v0.1": {"xpu": {
|
|
"mixed_fp4": {
|
|
"truthfulqa_mc": {"mc1": 0.27539779681762544,"mc1_stderr": 0.01563813566777552,"mc2": 0.41062244273774384,"mc2_stderr": 0.014067078150027909},
|
|
"arc_challenge": {"acc": 0.5674061433447098,"acc_stderr": 0.014478005694182528,"acc_norm": 0.5989761092150171,"acc_norm_stderr": 0.014322255790719867}
|
|
},
|
|
"fp8": {
|
|
"truthfulqa_mc": {"mc1": 0.2778457772337821,"mc1_stderr": 0.015680929364024643,"mc2": 0.4212635093545362,"mc2_stderr": 0.01414660694632397},
|
|
"arc_challenge": {"acc": 0.5639931740614335,"acc_stderr": 0.014491225699230916,"acc_norm": 0.5998293515358362,"acc_norm_stderr": 0.014317197787809174}
|
|
}
|
|
}}
|
|
}
|