ipex-llm/python/llm/dev/benchmark/perplexity/make_table.py
hxsz1997 cba61a2909 Add html report of ppl (#10218)
* remove include and language option, select the corresponding dataset based on the model name in Run

* change the nightly test time

* change the nightly test time of harness and ppl

* save the ppl result to json file

* generate csv file and print table result

* generate html

* modify the way to get parent folder

* update html in parent folder

* add llm-ppl-summary and llm-ppl-summary-html

* modify echo single result

* remove download fp16.csv

* change model name of PR

* move ppl nightly related files to llm/test folder

* reformat

* seperate make_table from make_table_and_csv.py

* separate make_csv from make_table_and_csv.py

* update llm-ppl-html

* remove comment

* add Download fp16.results
2024-02-27 17:37:08 +08:00

101 lines
No EOL
3.2 KiB
Python

#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Usage:
python make_table.py <input_dir>
"""
import logging
from pytablewriter import MarkdownTableWriter, LatexTableWriter
import os
import json
import sys
import csv
import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def make_table(result_dict):
"""Generate table of results."""
md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter()
md_writer.headers = ["Model", "Precision", "en", "zh"]
latex_writer.headers = ["Model", "Precision", "en", "zh"]
languages = ["en", "zh"]
values = []
for model, model_results in result_dict.items():
for precision, prec_results in model_results.items():
value = [model, precision]
for language in languages:
task_results = prec_results.get(language, None)
if task_results is None:
value.append("")
else:
result = task_results["results"]
value.append("%.4f" % result)
values.append(value)
model = ""
precision = ""
md_writer.value_matrix = values
latex_writer.value_matrix = values
# todo: make latex table look good
# print(latex_writer.dumps())
return md_writer.dumps()
def merge_results(path):
# loop dirs and subdirs in results dir
# for each dir, load json files
print('Read from', path)
merged_results = dict()
for dirpath, dirnames, filenames in os.walk(path):
# skip dirs without files
if not filenames:
continue
for filename in sorted([f for f in filenames if f.endswith("result.json")]):
path = os.path.join(dirpath, filename)
model, device, precision, language = dirpath.split('/')[-4:]
with open(path, "r") as f:
result_dict = json.load(f)
if model not in merged_results:
merged_results[model] = dict()
if precision not in merged_results[model]:
merged_results[model][precision] = dict()
merged_results[model][precision][language] = result_dict
return merged_results
def main(*args):
if len(args) > 1:
input_path = args[1]
else:
raise ValueError("Input path is required")
merged_results = merge_results(input_path)
print(make_table(merged_results))
if __name__ == "__main__":
# when running from the harness, the first argument is the script name
# you must name the second argument and the third argument(optional) to be the input_dir and output_dir
main(*sys.argv)