diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 863101f2..5b6b7727 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -71,7 +71,7 @@ jobs: PR_MATRIX_MODEL_NAME: '["Mistral-7B-v0.1"]' PR_MATRIX_TASK: '["arc", "truthfulqa", "winogrande"]' PR_MATRIX_PRECISION: '["fp8"]' - PR_LABELS: '["self-hosted", "llm", "accuracy-nightly"]' + PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' run: | echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV @@ -294,7 +294,7 @@ jobs: - name: Download fp16.csv for summary shell: bash run: | - wget https://raw.githubusercontent.com/intel-analytics/BigDL/main/python/llm/dev/benchmark/harness/fp16.csv -O ${{ env.NIGHTLY_FOLDER}}/../fp16.csv + wget https://raw.githubusercontent.com/intel-analytics/BigDL/main/python/llm/test/benchmark/harness/fp16.csv -O ${{ env.NIGHTLY_FOLDER}}/../fp16.csv ls ${{ env.NIGHTLY_FOLDER}}/.. - name: Summarize the results for nightly run @@ -304,8 +304,8 @@ jobs: ls /home/arda/harness-action-runners/nightly-accuracy-data/${{ env.OUTPUT_PATH }} pip install pandas==1.5.3 python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_and_csv.py ${{ env.NIGHTLY_FOLDER}}/${{ env.OUTPUT_PATH }} ${{ env.NIGHTLY_FOLDER}} - python ${{ github.workspace }}/python/llm/dev/benchmark/harness/harness_csv_to_html.py -f ${{ env.NIGHTLY_FOLDER}} - python ${{ github.workspace }}/python/llm/dev/benchmark/harness/update_html_in_parent_folder.py -f ${{ env.NIGHTLY_FOLDER }} + python ${{ github.workspace }}/python/llm/test/benchmark/harness/harness_csv_to_html.py -f ${{ env.NIGHTLY_FOLDER}} + python ${{ github.workspace }}/python/llm/test/benchmark/harness/update_html_in_parent_folder.py -f ${{ env.NIGHTLY_FOLDER }} - name: Summarize the results for pull request if: github.event_name == 'pull_request' @@ -314,4 +314,4 @@ jobs: ls /home/arda/harness-action-runners/pr-accuracy-data/${{ env.OUTPUT_PATH }} pip install pandas==1.5.3 python ${{ github.workspace }}/python/llm/dev/benchmark/harness/make_table_and_csv.py ${{ env.PR_FOLDER}}/${{ env.OUTPUT_PATH }} ${{ env.PR_FOLDER}} - python ${{ github.workspace }}/python/llm/dev/benchmark/harness/harness_csv_to_html.py -f ${{ env.PR_FOLDER}} \ No newline at end of file + python ${{ github.workspace }}/python/llm/test/benchmark/harness/harness_csv_to_html.py -f ${{ env.PR_FOLDER}} \ No newline at end of file diff --git a/python/llm/dev/benchmark/harness/fp16.csv b/python/llm/test/benchmark/harness/fp16.csv similarity index 100% rename from python/llm/dev/benchmark/harness/fp16.csv rename to python/llm/test/benchmark/harness/fp16.csv diff --git a/python/llm/dev/benchmark/harness/harness_csv_to_html.py b/python/llm/test/benchmark/harness/harness_csv_to_html.py similarity index 100% rename from python/llm/dev/benchmark/harness/harness_csv_to_html.py rename to python/llm/test/benchmark/harness/harness_csv_to_html.py diff --git a/python/llm/dev/benchmark/harness/update_html_in_parent_folder.py b/python/llm/test/benchmark/harness/update_html_in_parent_folder.py similarity index 100% rename from python/llm/dev/benchmark/harness/update_html_in_parent_folder.py rename to python/llm/test/benchmark/harness/update_html_in_parent_folder.py