LLM: add whisper models into nightly test (#10193)

* LLM: add whisper models into nightly test

* small fix

* small fix

* add more whisper models

* test all cases

* test specific cases

* collect the csv

* store the resut

* to html

* small fix

* small test

* test all cases

* modify whisper_csv_to_html
This commit is contained in:
WeiguangHan 2024-03-11 20:00:47 +08:00 committed by GitHub
parent dbcfc5c2fa
commit 17bdb1a60b
4 changed files with 400 additions and 4 deletions

View file

@ -0,0 +1,207 @@
name: LLM Whisper Models Evaluation
# Cancel previous runs in the PR when you push new commits
concurrency:
group: ${{ github.workflow }}-llm-nightly-test-${{ github.event.pull_request.number || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
# Controls when the action will run.
on:
schedule:
- cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China
pull_request:
branches: [main]
paths:
- ".github/workflows/llm-whisper-evaluation.yml"
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
inputs:
model_name:
description: 'Model names, separated by comma and must be quoted.'
required: true
type: string
precision:
description: 'Precisions, separated by comma and must be quoted.'
required: true
type: string
task:
description: 'Tasks, separated by comma and must be quoted.'
required: true
type: string
runs-on:
description: 'Labels to filter the runners, separated by comma and must be quoted.'
default: "accuracy"
required: false
type: string
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
llm-cpp-build: # please uncomment it for PR tests
uses: ./.github/workflows/llm-binary-build.yml
# Set the testing matrix based on the event (schedule, PR, or manual dispatch)
set-matrix:
runs-on: ubuntu-latest
outputs:
model_name: ${{ steps.set-matrix.outputs.model_name }}
precision: ${{ steps.set-matrix.outputs.precision }}
task: ${{ steps.set-matrix.outputs.task }}
runner: ${{ steps.set-matrix.outputs.runner }}
steps:
- name: set-env
env:
MATRIX_MODEL_NAME: '["whisper-tiny", "whisper-small", "whisper-medium", "whisper-base"]'
MATRIX_TASK: '["librispeech"]'
MATRIX_PRECISION: '["sym_int4", "fp8_e5m2"]'
LABELS: '["self-hosted", "llm", "perf"]'
run: |
echo "model_name=$MATRIX_MODEL_NAME" >> $GITHUB_ENV
echo "task=$MATRIX_TASK" >> $GITHUB_ENV
echo "precision=$MATRIX_PRECISION" >> $GITHUB_ENV
echo "runner=$LABELS" >> $GITHUB_ENV
- name: set-matrix
id: set-matrix
run: |
echo "model_name=$model_name" >> $GITHUB_OUTPUT
echo "task=$task" >> $GITHUB_OUTPUT
echo "precision=$precision" >> $GITHUB_OUTPUT
echo "runner=$runner" >> $GITHUB_OUTPUT
llm-whisper-evaluation:
# if: ${{ github.event.schedule || github.event.inputs.artifact == 'llm-whisper-evaluation' || github.event.inputs.artifact == 'all' }} # please comment it for PR tests
needs: [llm-cpp-build, set-matrix] # please uncomment it for PR tests
# needs: [set-matrix] # please comment it for PR tests
strategy:
fail-fast: false
matrix:
python-version: ["3.9"]
model_name: ${{ fromJson(needs.set-matrix.outputs.model_name) }}
task: ${{ fromJson(needs.set-matrix.outputs.task) }}
precision: ${{ fromJson(needs.set-matrix.outputs.precision) }}
device: [xpu]
runs-on: ${{ fromJson(needs.set-matrix.outputs.runner) }}
env:
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
ORIGIN_DIR: /mnt/disk1/models
steps:
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade wheel
python -m pip install --upgrade pandas
python -m pip install --upgrade datasets
python -m pip install --upgrade evaluate
python -m pip install --upgrade soundfile
python -m pip install --upgrade librosa
python -m pip install --upgrade jiwer
# please uncomment it and comment the "Install BigDL-LLM from Pypi" part for PR tests
- name: Download llm binary
uses: ./.github/actions/llm/download-llm-binary
- name: Run LLM install (all) test
uses: ./.github/actions/llm/setup-llm-env
with:
extra-dependency: "xpu_2.1"
# - name: Install BigDL-LLM from Pypi
# shell: bash
# run: |
# pip install --pre --upgrade bigdl-llm[xpu] -f https://developer.intel.com/ipex-whl-stable-xpu
# - name: Test installed xpu version
# shell: bash
# run: |
# source /opt/intel/oneapi/setvars.sh
# bash python/llm/test/run-llm-install-tests.sh
- name: Run whisper evaluation
shell: bash
run: |
source /opt/intel/oneapi/setvars.sh
export USE_XETLA=OFF
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
echo "MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/" >> "$GITHUB_ENV"
MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/
export LIBRISPEECH_DATASET_PATH=/mnt/disk1/datasets/librispeech
cd python/llm/dev/benchmark/whisper
python run_whisper.py --model_path ${MODEL_PATH} --data_type other --device xpu --load_in_low_bit ${{ matrix.precision }} --save_result
- uses: actions/upload-artifact@v3
with:
name: whisper_results
path:
${{ github.workspace }}/python/llm/dev/benchmark/whisper/results/**
llm-whisper-summary:
if: ${{github.event_name == 'schedule' || github.event_name == 'pull_request'}}
needs: [set-matrix, llm-whisper-evaluation]
runs-on: ["self-hosted", "llm", "perf"]
steps:
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Set output path
shell: bash
run: |
DATE=$(date +%Y-%m-%d)
OUTPUT_PATH="results_$DATE"
echo "OUTPUT_PATH=$OUTPUT_PATH" >> $GITHUB_ENV
NIGHTLY_FOLDER="/mnt/disk1/whisper_nightly_gpu"
echo "NIGHTLY_FOLDER=$NIGHTLY_FOLDER" >> $GITHUB_ENV
PR_FOLDER="/mnt/disk1/whisper_pr_gpu"
echo "PR_FOLDER=$PR_FOLDER" >> $GITHUB_ENV
- name: Download all results for nightly run
if: github.event_name == 'schedule'
uses: actions/download-artifact@v3
with:
name: whisper_results
path: ${{ env.NIGHTLY_FOLDER}}/${{ env.OUTPUT_PATH }}
- name: Download all results for pr run
if: github.event_name == 'pull_request'
uses: actions/download-artifact@v3
with:
name: whisper_results
path: ${{ env.PR_FOLDER}}/${{ env.OUTPUT_PATH }}
- name: Summarize the results for nightly run
if: github.event_name == 'schedule'
shell: bash
run: |
cp -r /mnt/disk1/datasets/whisper_fp16_results/* /mnt/disk1/whisper_nightly_gpu/${{ env.OUTPUT_PATH }}
pip install pandas==1.5.3
python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_concat_csv.py -i ${{ env.NIGHTLY_FOLDER}}/${{ env.OUTPUT_PATH }} -o ${{ env.NIGHTLY_FOLDER}}
python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_csv_to_html.py -f ${{ env.NIGHTLY_FOLDER}}
- name: Summarize the results for pull request
if: github.event_name == 'pull_request'
shell: bash
run: |
cp -r /mnt/disk1/datasets/whisper_fp16_results/* /mnt/disk1/whisper_pr_gpu/${{ env.OUTPUT_PATH }}
pip install pandas==1.5.3
python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_concat_csv.py -i ${{ env.PR_FOLDER}}/${{ env.OUTPUT_PATH }} -o ${{ env.PR_FOLDER}}
python ${{ github.workspace }}/python/llm/dev/benchmark/whisper/whisper_csv_to_html.py -f ${{ env.PR_FOLDER}}

View file

@ -21,12 +21,20 @@ import torch
from evaluate import load from evaluate import load
import time import time
import argparse import argparse
import pandas as pd
import os
import csv
from datetime import date
current_dir = os.path.dirname(os.path.realpath(__file__))
def get_args(): def get_args():
parser = argparse.ArgumentParser(description="Evaluate Whisper performance and accuracy") parser = argparse.ArgumentParser(description="Evaluate Whisper performance and accuracy")
parser.add_argument('--model_path', required=True, help='pretrained model path') parser.add_argument('--model_path', required=True, help='pretrained model path')
parser.add_argument('--data_type', required=True, help='clean, other') parser.add_argument('--data_type', required=True, help='clean, other')
parser.add_argument('--device', required=False, help='cpu, xpu') parser.add_argument('--device', required=False, help='cpu, xpu')
parser.add_argument('--load_in_low_bit', default='sym_int4', help='Specify whether to load data in low bit format (e.g., 4-bit)')
parser.add_argument('--save_result', action='store_true', help='Save the results to a CSV file')
args = parser.parse_args() args = parser.parse_args()
return args return args
@ -40,7 +48,7 @@ if __name__ == '__main__':
processor = WhisperProcessor.from_pretrained(args.model_path) processor = WhisperProcessor.from_pretrained(args.model_path)
forced_decoder_ids = processor.get_decoder_prompt_ids(language='en', task='transcribe') forced_decoder_ids = processor.get_decoder_prompt_ids(language='en', task='transcribe')
model = AutoModelForSpeechSeq2Seq.from_pretrained(args.model_path, load_in_low_bit="sym_int4", optimize_model=True).eval().to(args.device) model = AutoModelForSpeechSeq2Seq.from_pretrained(args.model_path, load_in_low_bit=args.load_in_low_bit, optimize_model=True).eval().to(args.device)
model.config.forced_decoder_ids = None model.config.forced_decoder_ids = None
def map_to_pred(batch): def map_to_pred(batch):
@ -67,6 +75,24 @@ if __name__ == '__main__':
wer = load("./wer") wer = load("./wer")
speech_length = sum(result["length"][1:]) speech_length = sum(result["length"][1:])
prc_time = sum(result["time"][1:]) prc_time = sum(result["time"][1:])
print("Realtime Factor(RTF) is : %.4f" % (prc_time/speech_length))
print("Realtime X(RTX) is : %.2f" % (speech_length/prc_time)) MODEL = args.model_path.split('/')[-2]
print(f'WER is {100 * wer.compute(references=result["reference"], predictions=result["prediction"])}') RTF = prc_time/speech_length
RTX = speech_length/prc_time
WER = 100 * wer.compute(references=result["reference"], predictions=result["prediction"])
today = date.today()
if args.save_result:
csv_name = f'{current_dir}/results/{MODEL}-{args.data_type}-{args.device}-{args.load_in_low_bit}-{today}.csv'
os.makedirs(os.path.dirname(csv_name), exist_ok=True)
with open(csv_name, mode='a', newline='') as file:
csv_writer = csv.writer(file)
file.seek(0, os.SEEK_END)
if file.tell() == 0:
csv_writer.writerow(["models","precision","WER","RTF"])
csv_writer.writerow([MODEL, args.load_in_low_bit, WER, RTF])
print(f'Results saved to {csv_name}')
print("Realtime Factor(RTF) is : %.4f" % RTF)
print("Realtime X(RTX) is : %.2f" % RTX)
print(f'WER is {WER}')

View file

@ -0,0 +1,50 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Python program to concat CSVs
import os
import sys
import argparse
import pandas as pd
from datetime import date
def main():
parser = argparse.ArgumentParser(description="concat .csv files")
parser.add_argument("-i", "--input_path", type=str, dest="input_path",
help="The directory which stores the original CSV files", default="./")
parser.add_argument("-o", "--output_path", type=str, dest="output_path",
help="The directory which stores the concated CSV file", default="./")
args = parser.parse_args()
csv_files = []
for file_name in os.listdir(args.input_path):
file_path = os.path.join(args.input_path, file_name)
if os.path.isfile(file_path) and file_name.endswith(".csv"):
csv_files.append(file_path)
csv_files.sort()
merged_df = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)
merged_df.reset_index(drop=True, inplace=True)
today = date.today()
csv_name = f'whisper-{today}.csv'
output_file_path = os.path.join(args.output_path, csv_name)
merged_df.to_csv(output_file_path)
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,113 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Python program to convert CSV to HTML Table
import os
import sys
import argparse
import pandas as pd
def highlight_vals(val, max=3.0, color1='red', color2='green'):
if isinstance(val, float):
if val > max:
return 'background-color: %s' % color2
elif val <= -max:
return 'background-color: %s' % color1
else:
return ''
def main():
parser = argparse.ArgumentParser(description="convert .csv file to .html file")
parser.add_argument("-f", "--folder_path", type=str, dest="folder_path",
help="The directory which stores the .csv file", default="/mnt/disk1/whisper_pr_gpu/")
parser.add_argument("-t", "--threshold", type=float, dest="threshold",
help="the threshold of highlight values", default=1.0)
args = parser.parse_args()
csv_files = []
for file_name in os.listdir(args.folder_path):
file_path = os.path.join(args.folder_path, file_name)
if os.path.isfile(file_path) and file_name.endswith(".csv"):
csv_files.append(file_path)
csv_files.sort(reverse=True)
latest_csv = pd.read_csv(csv_files[0], index_col=0)
daily_html=csv_files[0].split(".")[0]+".html"
if len(csv_files)>1:
previous_csv = pd.read_csv(csv_files[1], index_col=0)
last1=['']*len(latest_csv.index)
diff1=['']*len(latest_csv.index)
last2=['']*len(latest_csv.index)
diff2=['']*len(latest_csv.index)
WER='WER'
RTF='RTF'
for latest_csv_ind,latest_csv_row in latest_csv.iterrows():
latest_csv_model=latest_csv_row['models'].strip()
latest_csv_precision=latest_csv_row['precision'].strip()
latest_WER=latest_csv_row[WER]
latest_RTF=latest_csv_row[RTF]
in_previous_flag=False
for previous_csv_ind,previous_csv_row in previous_csv.iterrows():
previous_csv_model=previous_csv_row['models'].strip()
previous_csv_precision=previous_csv_row['precision'].strip()
if latest_csv_model==previous_csv_model and latest_csv_precision==previous_csv_precision:
previous_WER=previous_csv_row[WER]
previous_RTF=previous_csv_row[RTF]
if previous_WER > 0.0 and previous_RTF > 0.0:
last1[latest_csv_ind]=previous_WER
diff1[latest_csv_ind]=round((previous_WER-latest_WER)*100/previous_WER,2)
last2[latest_csv_ind]=previous_RTF
diff2[latest_csv_ind]=round((previous_RTF-latest_RTF)*100/previous_RTF,2)
in_previous_flag=True
if not in_previous_flag:
last1[latest_csv_ind]=pd.NA
diff1[latest_csv_ind]=pd.NA
last2[latest_csv_ind]=pd.NA
diff2[latest_csv_ind]=pd.NA
latest_csv.insert(loc=4,column='last1',value=last1)
latest_csv.insert(loc=5,column='diff1(%)',value=diff1)
latest_csv.insert(loc=6,column='last2',value=last2)
latest_csv.insert(loc=7,column='diff2(%)',value=diff2)
subset1=['diff1(%)','diff2(%)']
columns={'WER': '{:.6f}', 'RTF': '{:.6f}', 'last1': '{:.6f}', 'diff1(%)': '{:.6f}','last2': '{:.6f}', 'diff2(%)': '{:.6f}'}
styled_df = latest_csv.style.format(columns).applymap(lambda val: highlight_vals(val, max=1.0, color1='red', color2='green'), subset=subset1)
html_output = styled_df.set_table_attributes("border=1").render()
with open(daily_html, 'w') as f:
f.write(html_output)
else:
latest_csv.to_html(daily_html)
return 0
if __name__ == "__main__":
sys.exit(main())