Fixed dumped logs in harness (#9549)
* install transformers==4.34.0 * modify output_path as a directory * add device and task to output dir parents
This commit is contained in:
parent
d85a430a8c
commit
c8e0c2ed48
2 changed files with 10 additions and 9 deletions
2
.github/workflows/llm-harness-evaluation.yml
vendored
2
.github/workflows/llm-harness-evaluation.yml
vendored
|
|
@ -87,7 +87,7 @@ jobs:
|
||||||
- name: Upgrade packages
|
- name: Upgrade packages
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
pip install --upgrade transformers
|
pip install --upgrade transformers==4.34.0
|
||||||
|
|
||||||
- name: Run harness
|
- name: Run harness
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,8 @@ def main():
|
||||||
|
|
||||||
success = []
|
success = []
|
||||||
fail = []
|
fail = []
|
||||||
|
model_name = os.path.basename(os.path.realpath(args.pretrained))
|
||||||
|
output_path = args.output_path if args.output_path else "results"
|
||||||
for prec in args.precision:
|
for prec in args.precision:
|
||||||
prec_arg = parse_precision(prec, args.model)
|
prec_arg = parse_precision(prec, args.model)
|
||||||
model_args = f"pretrained={args.pretrained},{prec_arg}"
|
model_args = f"pretrained={args.pretrained},{prec_arg}"
|
||||||
|
|
@ -94,6 +96,8 @@ def main():
|
||||||
for task in args.tasks:
|
for task in args.tasks:
|
||||||
task_names=task_map.get(task, task).split(',')
|
task_names=task_map.get(task, task).split(',')
|
||||||
num_fewshot = task_to_n_few_shots.get(task, args.num_fewshot)
|
num_fewshot = task_to_n_few_shots.get(task, args.num_fewshot)
|
||||||
|
log_dir = f"{output_path}/{model_name}/{args.device}/{prec}/{task}"
|
||||||
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
try:
|
try:
|
||||||
results = evaluator.simple_evaluate(
|
results = evaluator.simple_evaluate(
|
||||||
model=args.model,
|
model=args.model,
|
||||||
|
|
@ -109,7 +113,7 @@ def main():
|
||||||
decontamination_ngrams_path=args.decontamination_ngrams_path,
|
decontamination_ngrams_path=args.decontamination_ngrams_path,
|
||||||
check_integrity=args.check_integrity,
|
check_integrity=args.check_integrity,
|
||||||
write_out=args.write_out,
|
write_out=args.write_out,
|
||||||
output_base_path=args.output_base_path,
|
output_base_path=log_dir
|
||||||
)
|
)
|
||||||
if len(results['results']) > 1:
|
if len(results['results']) > 1:
|
||||||
average = {}
|
average = {}
|
||||||
|
|
@ -117,18 +121,15 @@ def main():
|
||||||
for metric, value in subtask.items():
|
for metric, value in subtask.items():
|
||||||
average[metric] = average.get(metric, []) + [value]
|
average[metric] = average.get(metric, []) + [value]
|
||||||
for k, v in average.items():
|
for k, v in average.items():
|
||||||
average[k] = sum(average[k]) / len(average[k]) if not k.endswith("_stderr") else 0
|
average[k] = sum(v) / len(v) if not k.endswith("_stderr") else 0
|
||||||
results['results'][f"avg_{task}"] = average
|
results['results'][task] = average
|
||||||
results['versions'][f"avg_{task}"] = 1
|
results['versions'][task] = 1
|
||||||
|
|
||||||
dumped = json.dumps(results, indent=2)
|
dumped = json.dumps(results, indent=2)
|
||||||
print(dumped)
|
print(dumped)
|
||||||
|
|
||||||
if args.output_path:
|
if args.output_path:
|
||||||
dirname = os.path.dirname(args.output_path)
|
with open(f"{log_dir}/result.json", "w") as f:
|
||||||
if dirname:
|
|
||||||
os.makedirs(dirname, exist_ok=True)
|
|
||||||
with open(args.output_path, "w") as f:
|
|
||||||
f.write(dumped)
|
f.write(dumped)
|
||||||
success.append(results)
|
success.append(results)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue