Harness: remove deprecated files (#10165)
This commit is contained in:
parent
d3591383d5
commit
14ba2c5135
2 changed files with 0 additions and 113 deletions
|
|
@ -1,56 +0,0 @@
|
||||||
#
|
|
||||||
# Copyright 2016 The BigDL Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
def main(res_path, golden_path):
|
|
||||||
print(res_path, golden_path)
|
|
||||||
with open(res_path, "r") as f:
|
|
||||||
results = json.load(f)['results']
|
|
||||||
print(results)
|
|
||||||
|
|
||||||
model_name, device, precision, task = res_path.split('/')[-5:-1]
|
|
||||||
|
|
||||||
with open(golden_path, "r") as f:
|
|
||||||
golden_results = json.load(f)[model_name][device][precision]
|
|
||||||
print(golden_results)
|
|
||||||
|
|
||||||
identical = True
|
|
||||||
for task in results.keys():
|
|
||||||
|
|
||||||
if task not in golden_results:
|
|
||||||
identical = False
|
|
||||||
logger.error(f"Task {task} should be updated to golden results.")
|
|
||||||
continue
|
|
||||||
task_results = results[task]
|
|
||||||
task_golden = golden_results[task]
|
|
||||||
for m in task_results.keys():
|
|
||||||
if m in task_golden and abs(task_results[m] - task_golden[m]) > 0.001:
|
|
||||||
if not m.endswith("_stderr"):
|
|
||||||
identical = False
|
|
||||||
logger.error(f"Different on metric '{m}' [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
|
|
||||||
else:
|
|
||||||
logger.warning(f"Diff on {m} [golden acc/ current acc]: [{task_golden[m]}/{task_results[m]}]")
|
|
||||||
if identical:
|
|
||||||
logger.info("Accuracy values are identical to golden results.")
|
|
||||||
else:
|
|
||||||
raise RuntimeError("Accuracy has changed, please check if any accuracy issue or update golden accuracy value.")
|
|
||||||
|
|
||||||
main(*sys.argv[1:3])
|
|
||||||
|
|
@ -1,57 +0,0 @@
|
||||||
{
|
|
||||||
"stablelm-3b-4e1t": {"xpu": {
|
|
||||||
"mixed_fp4": {
|
|
||||||
"truthfulqa_mc": {"mc1": 0.24357405140758873,"mc1_stderr": 0.015026354824910782,"mc2": 0.37399115063281224,"mc2_stderr": 0.013684003173581748},
|
|
||||||
"arc_challenge": {
|
|
||||||
"acc": 0.40102389078498296,
|
|
||||||
"acc_stderr": 0.014322255790719869,
|
|
||||||
"acc_norm": 0.44283276450511944,
|
|
||||||
"acc_norm_stderr": 0.014515573873348897
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"fp8": {
|
|
||||||
"truthfulqa_mc": {
|
|
||||||
"mc1": 0.24479804161566707,
|
|
||||||
"mc1_stderr": 0.01505186948671501,
|
|
||||||
"mc2": 0.3747170112957169,
|
|
||||||
"mc2_stderr": 0.013516983188729865
|
|
||||||
},
|
|
||||||
"arc_challenge": {
|
|
||||||
"acc": 0.41552901023890787,
|
|
||||||
"acc_stderr": 0.014401366641216377,
|
|
||||||
"acc_norm": 0.46245733788395904,
|
|
||||||
"acc_norm_stderr": 0.014570144495075581
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}},
|
|
||||||
"Mistral-7B-v0.1": {"xpu": {
|
|
||||||
"mixed_fp4": {
|
|
||||||
"truthfulqa_mc": {
|
|
||||||
"mc1": 0.2741738066095471,
|
|
||||||
"mc1_stderr": 0.015616518497219374,
|
|
||||||
"mc2": 0.4090424865843113,
|
|
||||||
"mc2_stderr": 0.014068835265546585
|
|
||||||
},
|
|
||||||
"arc_challenge": {
|
|
||||||
"acc": 0.5674061433447098,
|
|
||||||
"acc_stderr": 0.014478005694182528,
|
|
||||||
"acc_norm": 0.6023890784982935,
|
|
||||||
"acc_norm_stderr": 0.01430175222327954
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"fp8": {
|
|
||||||
"truthfulqa_mc": {
|
|
||||||
"mc1": 0.2802937576499388,
|
|
||||||
"mc1_stderr": 0.015723139524608763,
|
|
||||||
"mc2": 0.4253576013662111,
|
|
||||||
"mc2_stderr": 0.014199215617062957
|
|
||||||
},
|
|
||||||
"arc_challenge": {
|
|
||||||
"acc": 0.5622866894197952,
|
|
||||||
"acc_stderr": 0.014497573881108283,
|
|
||||||
"acc_norm": 0.6032423208191127,
|
|
||||||
"acc_norm_stderr": 0.014296513020180646
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
Loading…
Reference in a new issue