Reformat summary table (#9942)

* reformat the table

* refactor the file

* read result.json only
This commit is contained in:
Chen, Zhentao 2024-01-25 23:49:00 +08:00 committed by GitHub
parent 171fb2d185
commit 762adc4f9d

View file

@ -34,23 +34,28 @@ def make_table(result_dict):
"""Generate table of results.""" """Generate table of results."""
md_writer = MarkdownTableWriter() md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter() latex_writer = LatexTableWriter()
md_writer.headers = ["Model", "Precision", "Task", "Metric", "Value"] md_writer.headers = ["Model", "Precision", "Arc", "Hellaswag", "MMLU", "TruthfulQA","Winogrande", "GSM8K"]
latex_writer.headers = ["Model", "Precision", "Task", "Metric", "Value"] latex_writer.headers = ["Model", "Precision", "Arc", "Hellaswag", "MMLU", "TruthfulQA","Winogrande", "GSM8K"]
tasks = ["arc", "hellaswag", "mmlu", "truthfulqa", "winogrande", "gsm8k"]
values = [] values = []
for model, model_results in result_dict.items(): for model, model_results in result_dict.items():
for precision, prec_results in model_results.items(): for precision, prec_results in model_results.items():
for task, task_results in prec_results.items(): value = [model, precision]
for task in tasks:
results = task_results["results"] task_results = prec_results.get(task, None)
if task_results is None:
value.append("")
else:
m = task_to_metric[task] m = task_to_metric[task]
results = task_results["results"]
if len(results) > 1: if len(results) > 1:
result = results[task] result = results[task]
else: else:
result = list(results.values())[0] result = list(results.values())[0]
value.append("%.2f" % (result[m] * 100))
values.append([model, precision, task, m, "%.2f" % (result[m] * 100)]) values.append(value)
model = "" model = ""
precision = "" precision = ""
@ -63,8 +68,7 @@ def make_table(result_dict):
return md_writer.dumps() return md_writer.dumps()
if __name__ == "__main__": def merge_results(path):
# loop dirs and subdirs in results dir # loop dirs and subdirs in results dir
# for each dir, load json files # for each dir, load json files
merged_results = dict() merged_results = dict()
@ -72,7 +76,7 @@ if __name__ == "__main__":
# skip dirs without files # skip dirs without files
if not filenames: if not filenames:
continue continue
for filename in sorted([f for f in filenames if f.endswith(".json")]): for filename in sorted([f for f in filenames if f.endswith("result.json")]):
path = os.path.join(dirpath, filename) path = os.path.join(dirpath, filename)
model, device, precision, task = dirpath.split('/')[-4:] model, device, precision, task = dirpath.split('/')[-4:]
with open(path, "r") as f: with open(path, "r") as f:
@ -82,4 +86,15 @@ if __name__ == "__main__":
if precision not in merged_results[model]: if precision not in merged_results[model]:
merged_results[model][precision] = dict() merged_results[model][precision] = dict()
merged_results[model][precision][task] = result_dict merged_results[model][precision][task] = result_dict
return merged_results
def main(*args):
merged_results = merge_results(args[0])
print(make_table(merged_results)) print(make_table(merged_results))
if __name__ == "__main__":
main(*sys.argv)