diff --git a/.github/workflows/llm-harness-evaluation.yml b/.github/workflows/llm-harness-evaluation.yml index ef8b1dc1..5727f172 100644 --- a/.github/workflows/llm-harness-evaluation.yml +++ b/.github/workflows/llm-harness-evaluation.yml @@ -16,14 +16,85 @@ on: - ".github/workflows/llm-harness-evaluation.yml" # Allows you to run this workflow manually from the Actions tab workflow_dispatch: + inputs: + model_name: + description: 'A list of models added to the job matrix.' + required: true + type: string + precision: + description: 'A list of precisions added to the job matrix' + required: true + type: string + task: + description: 'A list of precisions added to the job matrix' + required: true + type: string + runs-on: + description: 'Labels to filter the runners.' + default: 'accuracy' + required: false + type: string + # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: llm-cpp-build: uses: ./.github/workflows/llm-binary-build.yml + set-matrix: + runs-on: ubuntu-latest + outputs: + model_name: ${{ steps.set-matrix.outputs.model_name }} + precision: ${{ steps.set-matrix.outputs.precision }} + task: ${{ steps.set-matrix.outputs.task }} + runner: ${{ steps.set-matrix.outputs.runner }} + steps: + - name: set-nightly-env + if: ${{github.event_name == 'schedule'}} + env: + NIGHTLY_MATRIX_MODEL_NAME: '["stablelm-3b-4e1t","Mistral-7B-v0.1"]' + NIGHTLY_MATRIX_TASK: '["truthfulqa", "arc"]' + NIGHTLY_MATRIX_PRECISION: '["mixed_fp4", "fp8"]' + NIGHTLY_LABELS: '["self-hosted", "llm", "accuracy"]' + run: | + echo "model_name=$NIGHTLY_MATRIX_MODEL_NAME" >> $GITHUB_ENV + echo "precision=$NIGHTLY_MATRIX_PRECISION" >> $GITHUB_ENV + echo "task=$NIGHTLY_MATRIX_TASK" >> $GITHUB_ENV + echo "runner=$NIGHTLY_LABELS" >> $GITHUB_ENV + + - name: set-pr-env + if: ${{github.event_name == 'pull_request'}} + env: + PR_MATRIX_MODEL_NAME: '["stablelm-3b-4e1t"]' + PR_MATRIX_TASK: '["truthfulqa"]' + PR_MATRIX_PRECISION: '["mixed_fp4", "fp8"]' + PR_LABELS: '["self-hosted", "llm", "temp-arc01"]' + run: | + echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV + echo "precision=$PR_MATRIX_PRECISION" >> $GITHUB_ENV + echo "task=$PR_MATRIX_TASK" >> $GITHUB_ENV + echo "runner=$PR_LABELS" >> $GITHUB_ENV + - name: set-manual-env + if: ${{github.event_name == 'workflow_dispatch'}} + env: + MANUAL_MATRIX_MODEL_NAME: ${{format('[ {0} ]', inputs.model_name)}} + MANUAL_MATRIX_TASK: ${{format('[ {0} ]', inputs.task)}} + MANUAL_MATRIX_PRECISION: ${{format('[ {0} ]', inputs.precision)}} + MANUAL_LABELS: ${{format('["self-hosted", "llm", {0}]', inputs.precision)}} + run: | + echo "model_name=$MANUAL_MATRIX_MODEL_NAME" >> $GITHUB_ENV + echo "precision=$MANUAL_MATRIX_TASK" >> $GITHUB_ENV + echo "task=$MANUAL_MATRIX_PRECISION" >> $GITHUB_ENV + echo "runner=$MANUAL_LABELS" >> $GITHUB_ENV + - name: set-matrix + id: set-matrix + run: | + echo "model_name=$model_name" >> $GITHUB_OUTPUT + echo "precision=$precision" >> $GITHUB_OUTPUT + echo "task=$task" >> $GITHUB_OUTPUT + echo "runner=$runner" >> $GITHUB_OUTPUT llm-harness-evalution: timeout-minutes: 1000 - needs: llm-cpp-build + needs: [llm-cpp-build, set-matrix] strategy: fail-fast: false matrix: @@ -33,12 +104,12 @@ jobs: # task: "arc" # precision: "sym_int4" #options: sym_int4, fp4, mixed_fp4, sym_int8, fp8, mixed_fp8 python-version: ["3.9"] - model_name: [stablelm-3b-4e1t,Mistral-7B-v0.1] - task: [truthfulqa, arc] - precision: [mixed_fp4, fp8] + model_name: ${{ fromJson(needs.set-matrix.outputs.model_name) }} + task: ${{ fromJson(needs.set-matrix.outputs.task) }} + precision: ${{ fromJson(needs.set-matrix.outputs.precision) }} device: [xpu] - runs-on: [self-hosted, llm, accuracy] + runs-on: ${{ fromJson(needs.set-matrix.outputs.runner) }} env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} ORIGIN_DIR: /mnt/disk1/models