From bc92dbf7be2abc743464ed6208de825cf605f211 Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 11:20:37 +0800 Subject: [PATCH 1/8] remove stableml;change schedule;change storage method --- .github/workflows/llm-harness-evaluation.yml | 95 +++++++++++++++----- 1 file changed, 72 insertions(+), 23 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 6cc9802c..61448f68 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -8,7 +8,7 @@ concurrency: # Controls when the action will run. on: schedule: - - cron: "00 16 * * *" # GMT time, 16:00 GMT == 00:00 China + - cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China pull_request: branches: [main] paths: @@ -52,7 +52,7 @@ jobs: if: ${{github.event_name == 'schedule'}} env: NIGHTLY_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500", "falcon-7b-instruct-with-patch", - "Mistral-7B-v0.1", "mpt-7b-chat", "Baichuan2-7B-Chat-LLaMAfied", "stablelm-3b"]' + "Mistral-7B-v0.1", "mpt-7b-chat", "Baichuan2-7B-Chat-LLaMAfied"]' NIGHTLY_MATRIX_TASK: '["arc", "truthfulqa", "winogrande"]' NIGHTLY_MATRIX_PRECISION: '["sym_int4", "fp8"]' NIGHTLY_LABELS: '["self-hosted", "llm", "accuracy-nightly"]' @@ -65,7 +65,7 @@ jobs: - name: set-pr-env if: ${{github.event_name == 'pull_request'}} env: - PR_MATRIX_MODEL_NAME: '["stablelm-3b-4e1t", "Mistral-7B-v0.1"]' + PR_MATRIX_MODEL_NAME: '["Mistral-7B-v0.1", "Llama2-7b-guanaco-dolphin-500"]' PR_MATRIX_TASK: '["truthfulqa"]' PR_MATRIX_PRECISION: '["fp8"]' PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' @@ -111,6 +111,8 @@ jobs: device: [xpu] runs-on: ${{ fromJson(needs.set-matrix.outputs.runner) }} + outputs: + output_path: ${{ steps.run_harness.outputs.output_path }} env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} ORIGIN_DIR: /mnt/disk1/models @@ -176,34 +178,31 @@ jobs: export HF_DATASETS=$HARNESS_HF_HOME/datasets export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets source /opt/intel/oneapi/setvars.sh + + DATE=$(date +%Y-%m-%d) + OUTPUT_PATH="results_$DATE" + echo "OUTPUT_PATH=$OUTPUT_PATH" >> $GITHUB_ENV + echo "output_path=$OUTPUT_PATH" >> $GITHUB_OUTPUT + python run_llb.py \ - --model bigdl-llm \ - --pretrained ${MODEL_PATH} \ - --precision ${{ matrix.precision }} \ - --device ${{ matrix.device }} \ - --tasks ${{ matrix.task }} \ - --batch_size 1 --no_cache --output_path results + --model bigdl-llm \ + --pretrained ${MODEL_PATH} \ + --precision ${{ matrix.precision }} \ + --device ${{ matrix.device }} \ + --tasks ${{ matrix.task }} \ + --batch_size 1 --no_cache --output_path $OUTPUT_PATH - - name: Compare with golden accuracy - shell: bash - if: ${{github.event_name == 'schedule'}} - working-directory: ${{ github.workspace }}/python/llm - run: | - python test/benchmark/harness_nightly/accuracy_regression.py \ - dev/benchmark/harness/results/${{ matrix.model_name }}/${{ matrix.device }}/${{ matrix.precision }}/${{ matrix.task }}/result.json \ - test/benchmark/harness_nightly/golden_results.json - - uses: actions/upload-artifact@v3 with: name: harness_results path: - ${{ github.workspace }}/python/llm/dev/benchmark/harness/results/** + ${{ github.workspace }}/python/llm/dev/benchmark/harness/${{ env.OUTPUT_PATH }}/** - name: echo single result shell: bash - working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/results/ + working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/${{ env.OUTPUT_PATH }}/ run: | cat ${{ matrix.model_name }}/${{ matrix.device }}/${{ matrix.precision }}/${{ matrix.task }}/result.json @@ -211,6 +210,8 @@ jobs: if: ${{ always() }} needs: llm-harness-evalution runs-on: ubuntu-latest + env: + OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} steps: - uses: actions/checkout@v3 - name: Set up Python 3.9 @@ -226,9 +227,57 @@ jobs: uses: actions/download-artifact@v3 with: name: harness_results - path: results + path: ${{ env.OUTPUT_PATH }} - name: Summarize the results shell: bash run: | - ls results - python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py results + ls ${{ env.OUTPUT_PATH }} + python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py ${{ env.OUTPUT_PATH }} + + llm-harness-summary-nightly: + if: ${{github.event_name == 'schedule' || github.event_name == 'pull_request'}} + needs: llm-harness-evalution + runs-on: '["self-hosted", "llm", "temp-arc01"]' + env: + OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Install dependencies + shell: bash + run: | + pip install --upgrade pip + pip install jsonlines pytablewriter regex + + - name: Download all results for nightly run + if: github.event_name == 'schedule' + uses: actions/download-artifact@v3 + with: + name: harness_results + path: /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} + + - name: Download all results for pull request + if: github.event_name == 'pull_request' + uses: actions/download-artifact@v3 + with: + name: harness_results + path: /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} + + - name: Summarize the results for nightly run + if: github.event_name == 'schedule' + shell: bash + run: | + ls /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} + python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} + + - name: Summarize the results for pull request + if: github.event_name == 'pull_request' + shell: bash + run: | + ls /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} + python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} + \ No newline at end of file From 5edefe7d8e0afb337c5f55469369a2f6801d825f Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 13:50:38 +0800 Subject: [PATCH 2/8] remove nightly summary job --- .github/workflows/llm-harness-evaluation.yml | 92 +++++++++++--------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 61448f68..151b01ba 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -210,8 +210,8 @@ jobs: if: ${{ always() }} needs: llm-harness-evalution runs-on: ubuntu-latest - env: - OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} + # env: + # OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} steps: - uses: actions/checkout@v3 - name: Set up Python 3.9 @@ -223,6 +223,10 @@ jobs: run: | pip install --upgrade pip pip install jsonlines pytablewriter regex + + DATE=$(date +%Y-%m-%d) + OUTPUT_PATH="results_$DATE" + echo "OUTPUT_PATH=$OUTPUT_PATH" >> $GITHUB_ENV - name: Download all results uses: actions/download-artifact@v3 with: @@ -231,53 +235,55 @@ jobs: - name: Summarize the results shell: bash run: | + echo ${{ env.OUTPUT_PATH }} ls ${{ env.OUTPUT_PATH }} python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py ${{ env.OUTPUT_PATH }} - llm-harness-summary-nightly: - if: ${{github.event_name == 'schedule' || github.event_name == 'pull_request'}} - needs: llm-harness-evalution - runs-on: '["self-hosted", "llm", "temp-arc01"]' - env: - OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.9 - uses: actions/setup-python@v4 - with: - python-version: 3.9 + # TODO: add a nightly summary job + # llm-harness-summary-nightly: + # if: ${{github.event_name == 'schedule' || github.event_name == 'pull_request'}} + # needs: llm-harness-evalution + # runs-on: '["self-hosted", "llm", "temp-arc01"]' + # env: + # OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} + # steps: + # - uses: actions/checkout@v3 + # - name: Set up Python 3.9 + # uses: actions/setup-python@v4 + # with: + # python-version: 3.9 - - name: Install dependencies - shell: bash - run: | - pip install --upgrade pip - pip install jsonlines pytablewriter regex + # - name: Install dependencies + # shell: bash + # run: | + # pip install --upgrade pip + # pip install jsonlines pytablewriter regex - - name: Download all results for nightly run - if: github.event_name == 'schedule' - uses: actions/download-artifact@v3 - with: - name: harness_results - path: /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} + # - name: Download all results for nightly run + # if: github.event_name == 'schedule' + # uses: actions/download-artifact@v3 + # with: + # name: harness_results + # path: /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} - - name: Download all results for pull request - if: github.event_name == 'pull_request' - uses: actions/download-artifact@v3 - with: - name: harness_results - path: /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} + # - name: Download all results for pull request + # if: github.event_name == 'pull_request' + # uses: actions/download-artifact@v3 + # with: + # name: harness_results + # path: /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} - - name: Summarize the results for nightly run - if: github.event_name == 'schedule' - shell: bash - run: | - ls /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} - python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} + # - name: Summarize the results for nightly run + # if: github.event_name == 'schedule' + # shell: bash + # run: | + # ls /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} + # python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} - - name: Summarize the results for pull request - if: github.event_name == 'pull_request' - shell: bash - run: | - ls /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} - python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} + # - name: Summarize the results for pull request + # if: github.event_name == 'pull_request' + # shell: bash + # run: | + # ls /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} + # python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} \ No newline at end of file From 2c75b5b981562f9b587623f575a58b31bf02c2b8 Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 13:51:57 +0800 Subject: [PATCH 3/8] remove mistral in pr job --- .github/workflows/llm-harness-evaluation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 151b01ba..59f588db 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -65,7 +65,7 @@ jobs: - name: set-pr-env if: ${{github.event_name == 'pull_request'}} env: - PR_MATRIX_MODEL_NAME: '["Mistral-7B-v0.1", "Llama2-7b-guanaco-dolphin-500"]' + PR_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500"]' PR_MATRIX_TASK: '["truthfulqa"]' PR_MATRIX_PRECISION: '["fp8"]' PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' From 94723bb0b13e11b5fda63cdc7d59b00701453cbd Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 14:09:14 +0800 Subject: [PATCH 4/8] add retry in run llm install part;test arc05 with llama2 --- .github/workflows/llm-harness-evaluation.yml | 22 ++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 59f588db..d270c300 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -68,7 +68,7 @@ jobs: PR_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500"]' PR_MATRIX_TASK: '["truthfulqa"]' PR_MATRIX_PRECISION: '["fp8"]' - PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' + PR_LABELS: '["self-hosted", "llm", "accuracy2", "accuracy-nightly"]' run: | echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV echo "precision=$PR_MATRIX_PRECISION" >> $GITHUB_ENV @@ -135,9 +135,23 @@ jobs: uses: ./.github/actions/llm/download-llm-binary - name: Run LLM install (all) test - uses: ./.github/actions/llm/setup-llm-env - with: - extra-dependency: "xpu_2.1" + # uses: ./.github/actions/llm/setup-llm-env + # with: + # extra-dependency: "xpu_2.1" + run: | + retry_count=0 + max_retries=1 + command="bash ./.github/actions/llm/setup-llm-env --extra-dependency xpu_2.1" + until $command; do + exit_code=$? + echo "Attempt $((retry_count+1)) failed with exit code $exit_code. Retrying..." + retry_count=$((retry_count+1)) + if [ "$retry_count" -gt "$max_retries" ]; then + echo "Reached maximum retry attempts. Exiting." + exit $exit_code + fi + sleep 5 + done - name: Install harness working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/ From d11ef0d11740aec7ba4d7d8a1104bbc55da598a6 Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 14:25:26 +0800 Subject: [PATCH 5/8] remove retry in llm install part --- .github/workflows/llm-harness-evaluation.yml | 34 ++++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index d270c300..6c733198 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -135,23 +135,23 @@ jobs: uses: ./.github/actions/llm/download-llm-binary - name: Run LLM install (all) test - # uses: ./.github/actions/llm/setup-llm-env - # with: - # extra-dependency: "xpu_2.1" - run: | - retry_count=0 - max_retries=1 - command="bash ./.github/actions/llm/setup-llm-env --extra-dependency xpu_2.1" - until $command; do - exit_code=$? - echo "Attempt $((retry_count+1)) failed with exit code $exit_code. Retrying..." - retry_count=$((retry_count+1)) - if [ "$retry_count" -gt "$max_retries" ]; then - echo "Reached maximum retry attempts. Exiting." - exit $exit_code - fi - sleep 5 - done + uses: ./.github/actions/llm/setup-llm-env + with: + extra-dependency: "xpu_2.1" + # run: | + # retry_count=0 + # max_retries=1 + # command="bash ./.github/actions/llm/setup-llm-env --extra-dependency xpu_2.1" + # until $command; do + # exit_code=$? + # echo "Attempt $((retry_count+1)) failed with exit code $exit_code. Retrying..." + # retry_count=$((retry_count+1)) + # if [ "$retry_count" -gt "$max_retries" ]; then + # echo "Reached maximum retry attempts. Exiting." + # exit $exit_code + # fi + # sleep 5 + # done - name: Install harness working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/ From 2c4e610743c4728a5f11b9cf5a46d0672ba4a8a7 Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 20:12:10 +0800 Subject: [PATCH 6/8] remove irrelevant code --- .github/workflows/llm-harness-evaluation.yml | 72 +------------------- 1 file changed, 1 insertion(+), 71 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 6c733198..8513632b 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -99,11 +99,6 @@ jobs: strategy: fail-fast: false matrix: - # include: - # python-version: "3.9" - # model_name: "stablelm-3b-4e1t" - # task: "arc" - # precision: "sym_int4" #options: sym_int4, fp4, mixed_fp4, sym_int8, fp8, mixed_fp8 python-version: ["3.9"] model_name: ${{ fromJson(needs.set-matrix.outputs.model_name) }} task: ${{ fromJson(needs.set-matrix.outputs.task) }} @@ -138,20 +133,6 @@ jobs: uses: ./.github/actions/llm/setup-llm-env with: extra-dependency: "xpu_2.1" - # run: | - # retry_count=0 - # max_retries=1 - # command="bash ./.github/actions/llm/setup-llm-env --extra-dependency xpu_2.1" - # until $command; do - # exit_code=$? - # echo "Attempt $((retry_count+1)) failed with exit code $exit_code. Retrying..." - # retry_count=$((retry_count+1)) - # if [ "$retry_count" -gt "$max_retries" ]; then - # echo "Reached maximum retry attempts. Exiting." - # exit $exit_code - # fi - # sleep 5 - # done - name: Install harness working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/ @@ -224,8 +205,6 @@ jobs: if: ${{ always() }} needs: llm-harness-evalution runs-on: ubuntu-latest - # env: - # OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} steps: - uses: actions/checkout@v3 - name: Set up Python 3.9 @@ -251,53 +230,4 @@ jobs: run: | echo ${{ env.OUTPUT_PATH }} ls ${{ env.OUTPUT_PATH }} - python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py ${{ env.OUTPUT_PATH }} - - # TODO: add a nightly summary job - # llm-harness-summary-nightly: - # if: ${{github.event_name == 'schedule' || github.event_name == 'pull_request'}} - # needs: llm-harness-evalution - # runs-on: '["self-hosted", "llm", "temp-arc01"]' - # env: - # OUTPUT_PATH: ${{ needs.llm-harness-evalution.outputs.output_path }} - # steps: - # - uses: actions/checkout@v3 - # - name: Set up Python 3.9 - # uses: actions/setup-python@v4 - # with: - # python-version: 3.9 - - # - name: Install dependencies - # shell: bash - # run: | - # pip install --upgrade pip - # pip install jsonlines pytablewriter regex - - # - name: Download all results for nightly run - # if: github.event_name == 'schedule' - # uses: actions/download-artifact@v3 - # with: - # name: harness_results - # path: /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} - - # - name: Download all results for pull request - # if: github.event_name == 'pull_request' - # uses: actions/download-artifact@v3 - # with: - # name: harness_results - # path: /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} - - # - name: Summarize the results for nightly run - # if: github.event_name == 'schedule' - # shell: bash - # run: | - # ls /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} - # python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} - - # - name: Summarize the results for pull request - # if: github.event_name == 'pull_request' - # shell: bash - # run: | - # ls /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} - # python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} - \ No newline at end of file + python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_results.py ${{ env.OUTPUT_PATH }} \ No newline at end of file From e627727b4bc0b94b21632002783b117e1d460441 Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 21:12:51 +0800 Subject: [PATCH 7/8] change download path --- .github/workflows/llm-harness-evaluation.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 8513632b..4b0271d9 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -174,30 +174,25 @@ jobs: export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets source /opt/intel/oneapi/setvars.sh - DATE=$(date +%Y-%m-%d) - OUTPUT_PATH="results_$DATE" - echo "OUTPUT_PATH=$OUTPUT_PATH" >> $GITHUB_ENV - echo "output_path=$OUTPUT_PATH" >> $GITHUB_OUTPUT - python run_llb.py \ --model bigdl-llm \ --pretrained ${MODEL_PATH} \ --precision ${{ matrix.precision }} \ --device ${{ matrix.device }} \ --tasks ${{ matrix.task }} \ - --batch_size 1 --no_cache --output_path $OUTPUT_PATH + --batch_size 1 --no_cache --output_path results - uses: actions/upload-artifact@v3 with: name: harness_results path: - ${{ github.workspace }}/python/llm/dev/benchmark/harness/${{ env.OUTPUT_PATH }}/** + ${{ github.workspace }}/python/llm/dev/benchmark/harness/results/** - name: echo single result shell: bash - working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/${{ env.OUTPUT_PATH }}/ + working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/results/ run: | cat ${{ matrix.model_name }}/${{ matrix.device }}/${{ matrix.precision }}/${{ matrix.task }}/result.json From f63eba6c5afb2e88876a77399bc565a8f5f904f5 Mon Sep 17 00:00:00 2001 From: pengyb2001 <284261055@qq.com> Date: Tue, 6 Feb 2024 23:35:18 +0800 Subject: [PATCH 8/8] change pr test machine --- .github/workflows/llm-harness-evaluation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 4b0271d9..96473621 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -67,8 +67,8 @@ jobs: env: PR_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500"]' PR_MATRIX_TASK: '["truthfulqa"]' - PR_MATRIX_PRECISION: '["fp8"]' - PR_LABELS: '["self-hosted", "llm", "accuracy2", "accuracy-nightly"]' + PR_MATRIX_PRECISION: '["sys_int4"]' + PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' run: | echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV echo "precision=$PR_MATRIX_PRECISION" >> $GITHUB_ENV