diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index 9bf292c4..6cc9802c 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -8,7 +8,7 @@ concurrency: # Controls when the action will run. on: schedule: - - cron: "00 13 * * 5" # GMT time, 13:00 GMT == 21:00 China + - cron: "00 16 * * *" # GMT time, 16:00 GMT == 00:00 China pull_request: branches: [main] paths: @@ -39,6 +39,7 @@ on: jobs: llm-cpp-build: uses: ./.github/workflows/llm-binary-build.yml + # Set the testing matrix based on the event (schedule, PR, or manual dispatch) set-matrix: runs-on: ubuntu-latest outputs: @@ -50,10 +51,11 @@ jobs: - name: set-nightly-env if: ${{github.event_name == 'schedule'}} env: - NIGHTLY_MATRIX_MODEL_NAME: '["stablelm-3b-4e1t","Mistral-7B-v0.1"]' - NIGHTLY_MATRIX_TASK: '["truthfulqa", "arc"]' - NIGHTLY_MATRIX_PRECISION: '["mixed_fp4", "fp8"]' - NIGHTLY_LABELS: '["self-hosted", "llm", "accuracy"]' + NIGHTLY_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500", "falcon-7b-instruct-with-patch", + "Mistral-7B-v0.1", "mpt-7b-chat", "Baichuan2-7B-Chat-LLaMAfied", "stablelm-3b"]' + NIGHTLY_MATRIX_TASK: '["arc", "truthfulqa", "winogrande"]' + NIGHTLY_MATRIX_PRECISION: '["sym_int4", "fp8"]' + NIGHTLY_LABELS: '["self-hosted", "llm", "accuracy-nightly"]' run: | echo "model_name=$NIGHTLY_MATRIX_MODEL_NAME" >> $GITHUB_ENV echo "precision=$NIGHTLY_MATRIX_PRECISION" >> $GITHUB_ENV @@ -63,9 +65,9 @@ jobs: - name: set-pr-env if: ${{github.event_name == 'pull_request'}} env: - PR_MATRIX_MODEL_NAME: '["stablelm-3b-4e1t"]' + PR_MATRIX_MODEL_NAME: '["stablelm-3b-4e1t", "Mistral-7B-v0.1"]' PR_MATRIX_TASK: '["truthfulqa"]' - PR_MATRIX_PRECISION: '["mixed_fp4", "fp8"]' + PR_MATRIX_PRECISION: '["fp8"]' PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' run: | echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV @@ -122,6 +124,7 @@ jobs: - name: Install dependencies shell: bash run: | + set -e python -m pip install --upgrade pip python -m pip install --upgrade setuptools==58.0.4 python -m pip install --upgrade wheel @@ -132,7 +135,7 @@ jobs: - name: Run LLM install (all) test uses: ./.github/actions/llm/setup-llm-env with: - extra-dependency: "xpu_2.0" + extra-dependency: "xpu_2.1" - name: Install harness working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/ @@ -154,7 +157,13 @@ jobs: - name: Upgrade packages shell: bash run: | - pip install --upgrade transformers==4.34.0 datasets==2.14.6 + pip install --upgrade datasets==2.14.6 + if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then + pip install --upgrade transformers==4.36 + else + pip install --upgrade transformers==4.31 + fi + - name: Run harness shell: bash @@ -166,7 +175,7 @@ jobs: export HF_HOME=${HARNESS_HF_HOME} export HF_DATASETS=$HARNESS_HF_HOME/datasets export HF_DATASETS_CACHE=$HARNESS_HF_HOME/datasets - source $HOME/intel/oneapi/setvars.sh + source /opt/intel/oneapi/setvars.sh python run_llb.py \ --model bigdl-llm \ --pretrained ${MODEL_PATH} \