Merge pull request #10101 from pengyb2001/eval_stat
Modify harness evaluation workflow
This commit is contained in:
commit
3f60e9df89
1 changed files with 21 additions and 27 deletions
48
.github/workflows/llm-harness-evaluation.yml
vendored
48
.github/workflows/llm-harness-evaluation.yml
vendored
|
|
@ -8,7 +8,7 @@ concurrency:
|
|||
# Controls when the action will run.
|
||||
on:
|
||||
schedule:
|
||||
- cron: "00 16 * * *" # GMT time, 16:00 GMT == 00:00 China
|
||||
- cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
|
|
@ -52,7 +52,7 @@ jobs:
|
|||
if: ${{github.event_name == 'schedule'}}
|
||||
env:
|
||||
NIGHTLY_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500", "falcon-7b-instruct-with-patch",
|
||||
"Mistral-7B-v0.1", "mpt-7b-chat", "Baichuan2-7B-Chat-LLaMAfied", "stablelm-3b"]'
|
||||
"Mistral-7B-v0.1", "mpt-7b-chat", "Baichuan2-7B-Chat-LLaMAfied"]'
|
||||
NIGHTLY_MATRIX_TASK: '["arc", "truthfulqa", "winogrande"]'
|
||||
NIGHTLY_MATRIX_PRECISION: '["sym_int4", "fp8"]'
|
||||
NIGHTLY_LABELS: '["self-hosted", "llm", "accuracy-nightly"]'
|
||||
|
|
@ -65,9 +65,9 @@ jobs:
|
|||
- name: set-pr-env
|
||||
if: ${{github.event_name == 'pull_request'}}
|
||||
env:
|
||||
PR_MATRIX_MODEL_NAME: '["stablelm-3b-4e1t", "Mistral-7B-v0.1"]'
|
||||
PR_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500"]'
|
||||
PR_MATRIX_TASK: '["truthfulqa"]'
|
||||
PR_MATRIX_PRECISION: '["fp8"]'
|
||||
PR_MATRIX_PRECISION: '["sys_int4"]'
|
||||
PR_LABELS: '["self-hosted", "llm", "temp-arc01"]'
|
||||
run: |
|
||||
echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV
|
||||
|
|
@ -99,11 +99,6 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# include:
|
||||
# python-version: "3.9"
|
||||
# model_name: "stablelm-3b-4e1t"
|
||||
# task: "arc"
|
||||
# precision: "sym_int4" #options: sym_int4, fp4, mixed_fp4, sym_int8, fp8, mixed_fp8
|
||||
python-version: ["3.9"]
|
||||
model_name: ${{ fromJson(needs.set-matrix.outputs.model_name) }}
|
||||
task: ${{ fromJson(needs.set-matrix.outputs.task) }}
|
||||
|
|
@ -111,6 +106,8 @@ jobs:
|
|||
device: [xpu]
|
||||
|
||||
runs-on: ${{ fromJson(needs.set-matrix.outputs.runner) }}
|
||||
outputs:
|
||||
output_path: ${{ steps.run_harness.outputs.output_path }}
|
||||
env:
|
||||
ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
|
||||
ORIGIN_DIR: /mnt/disk1/models
|
||||
|
|
@ -176,24 +173,16 @@ jobs:
|
|||
export HF_DATASETS=$HARNESS_HF_HOME/datasets
|
||||
export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
python run_llb.py \
|
||||
--model bigdl-llm \
|
||||
--pretrained ${MODEL_PATH} \
|
||||
--precision ${{ matrix.precision }} \
|
||||
--device ${{ matrix.device }} \
|
||||
--tasks ${{ matrix.task }} \
|
||||
--batch_size 1 --no_cache --output_path results
|
||||
--model bigdl-llm \
|
||||
--pretrained ${MODEL_PATH} \
|
||||
--precision ${{ matrix.precision }} \
|
||||
--device ${{ matrix.device }} \
|
||||
--tasks ${{ matrix.task }} \
|
||||
--batch_size 1 --no_cache --output_path results
|
||||
|
||||
|
||||
- name: Compare with golden accuracy
|
||||
shell: bash
|
||||
if: ${{github.event_name == 'schedule'}}
|
||||
working-directory: ${{ github.workspace }}/python/llm
|
||||
run: |
|
||||
python test/benchmark/harness_nightly/accuracy_regression.py \
|
||||
dev/benchmark/harness/results/${{ matrix.model_name }}/${{ matrix.device }}/${{ matrix.precision }}/${{ matrix.task }}/result.json \
|
||||
test/benchmark/harness_nightly/golden_results.json
|
||||
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: harness_results
|
||||
|
|
@ -222,13 +211,18 @@ jobs:
|
|||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install jsonlines pytablewriter regex
|
||||
|
||||
DATE=$(date +%Y-%m-%d)
|
||||
OUTPUT_PATH="results_$DATE"
|
||||
echo "OUTPUT_PATH=$OUTPUT_PATH" >> $GITHUB_ENV
|
||||
- name: Download all results
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: harness_results
|
||||
path: results
|
||||
path: ${{ env.OUTPUT_PATH }}
|
||||
- name: Summarize the results
|
||||
shell: bash
|
||||
run: |
|
||||
ls results
|
||||
python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py results
|
||||
echo ${{ env.OUTPUT_PATH }}
|
||||
ls ${{ env.OUTPUT_PATH }}
|
||||
python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py ${{ env.OUTPUT_PATH }}
|
||||
Loading…
Reference in a new issue