Add llm-ppl workflow (#10074)

* add llm-ppl workflow * update the DATASET_DIR * test multiple precisions * modify nightly test * match the updated ppl code * add matrix.include * fix the include error * update the include * add more model * update the precision of include * update nightly time and add more models * fix the workflow_dispatch description, change default model of pr and modify the env
2024-02-07 16:29:57 +08:00 · 2024-02-07 16:29:57 +08:00 · 1710ecb990
commit 1710ecb990
parent c1ec3d8921
1 changed files with 205 additions and 0 deletions
--- a/.github/workflows/llm-ppl-evaluation.yml
+++ b/.github/workflows/llm-ppl-evaluation.yml
@ -0,0 +1,205 @@
+name: LLM Perplexity Evalution
+
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-nightly-test-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
+# Controls when the action will run.
+on:
+  schedule:
+    - cron: "00 14 * * *" # GMT time, 14:00 GMT == 22:00 China
+  pull_request:
+    branches: [main]
+    paths:
+      - ".github/workflows/llm-ppl-evaluation.yml"
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+    inputs:
+      seq_len:
+        description: 'sequence length, input one number'
+        required: true
+        type: number      
+      model_name:
+        description: 'Model names, seperated by comma and must be quoted.'
+        required: true
+        type: string
+      precision:
+        description: 'Precisions, seperated by comma and must be quoted.'
+        required: true
+        type: string
+      language:
+        description: 'language, can be en, zh, or all'
+        required: true
+        type: choice
+        options:
+          - en
+          - zh
+          - all
+      runs-on:
+        description: 'Labels to filter the runners, seperated by comma and must be quoted.'
+        default: "accuracy"
+        required: false
+        type: string
+
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  llm-cpp-build:
+    uses: ./.github/workflows/llm-binary-build.yml
+  set-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      seq_len: ${{ steps.set-matrix.outputs.seq_len }}
+      model_name: ${{ steps.set-matrix.outputs.model_name }}
+      precision: ${{ steps.set-matrix.outputs.precision }}
+      language: ${{ steps.set-matrix.outputs.language }}
+      runner: ${{ steps.set-matrix.outputs.runner }}
+    steps:
+      - name: set-nightly-env
+        if: ${{github.event_name == 'schedule'}}
+        env:
+          NIGHTLY_MATRIX_SEQ_LEN: '["512"]'
+          NIGHTLY_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf", "mpt-7b-chat", 
+                        "falcon-7b-instruct-with-patch", "Mistral-7B-v0.1"]'
+          NIGHTLY_MATRIX_LANGUAGE: '["en"]'
+          NIGHTLY_MATRIX_PRECISION: '["sym_int4 fp8"]'
+          NIGHTLY_LABELS: '["self-hosted", "llm", "accuracy-nightly"]'
+        run: |
+            echo "seq_len=$NIGHTLY_MATRIX_SEQ_LEN" >> $GITHUB_ENV
+            echo "model_name=$NIGHTLY_MATRIX_MODEL_NAME" >> $GITHUB_ENV
+            echo "precision=$NIGHTLY_MATRIX_PRECISION" >> $GITHUB_ENV
+            echo "language=$NIGHTLY_MATRIX_LANGUAGE" >> $GITHUB_ENV
+            echo "runner=$NIGHTLY_LABELS" >> $GITHUB_ENV
+
+      - name: set-pr-env
+        if: ${{github.event_name == 'pull_request'}}
+        env:
+          PR_MATRIX_SEQ_LEN: '["512"]'
+          PR_MATRIX_MODEL_NAME: '["Llama-2-7b-chat-hf"]'
+          PR_MATRIX_LANGUAGE: '["en"]'
+          PR_MATRIX_PRECISION: '["sym_int4"]'
+          PR_LABELS: '["self-hosted", "llm", "temp-arc01"]'
+        run: |
+            echo "seq_len=$PR_MATRIX_SEQ_LEN" >> $GITHUB_ENV
+            echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV
+            echo "precision=$PR_MATRIX_PRECISION" >> $GITHUB_ENV
+            echo "language=$PR_MATRIX_LANGUAGE" >> $GITHUB_ENV
+            echo "runner=$PR_LABELS" >> $GITHUB_ENV
+      - name: set-manual-env
+        if: ${{github.event_name == 'workflow_dispatch'}}
+        env:
+          MANUAL_MATRIX_SEQ_LEN: ${{format('[ {0} ]', inputs.seq_len)}}
+          MANUAL_MATRIX_MODEL_NAME: ${{format('[ {0} ]', inputs.model_name)}}
+          MANUAL_MATRIX_LANGUAGE: ${{format('[ {0} ]', inputs.language)}}
+          MANUAL_MATRIX_PRECISION: ${{format('[ {0} ]', inputs.precision)}}
+          MANUAL_LABELS: ${{format('["self-hosted", "llm", {0}]', inputs.runs-on)}}
+        run: |
+            echo "seq_len=$MANUAL_MATRIX_SEQ_LEN" >> $GITHUB_ENV
+            echo "model_name=$MANUAL_MATRIX_MODEL_NAME" >> $GITHUB_ENV
+            echo "precision=$MANUAL_MATRIX_PRECISION" >> $GITHUB_ENV
+            echo "language=$MANUAL_MATRIX_LANGUAGE" >> $GITHUB_ENV
+            echo "runner=$MANUAL_LABELS" >> $GITHUB_ENV
+      - name: set-matrix
+        id: set-matrix
+        run: |
+            echo "seq_len=$seq_len" >> $GITHUB_OUTPUT
+            echo "model_name=$model_name" >> $GITHUB_OUTPUT
+            echo "precision=$precision" >> $GITHUB_OUTPUT
+            echo "language=$language" >> $GITHUB_OUTPUT
+            echo "runner=$runner" >> $GITHUB_OUTPUT
+  llm-ppl-evalution:
+    timeout-minutes: 1000
+    needs: [llm-cpp-build, set-matrix]
+    strategy:
+      fail-fast: false
+      matrix:
+        # include:
+        #   python-version: "3.9"
+        #   model_name: "stablelm-3b-4e1t"
+        #   task: "arc"
+        #   precision: "sym_int4" #options: sym_int4, fp4, mixed_fp4, sym_int8, fp8, mixed_fp8
+        python-version: ["3.9"]
+        model_name: ${{ fromJson(needs.set-matrix.outputs.model_name) }}
+        language: ${{ fromJson(needs.set-matrix.outputs.language) }}
+        precision: ${{ fromJson(needs.set-matrix.outputs.precision) }}
+        seq_len: ${{ fromJson(needs.set-matrix.outputs.seq_len) }}
+        device: [xpu]
+        include:
+          - python-version: "3.9"
+            model_name: "chatglm2-6b"
+            language: "zh"
+            precision: "sym_int4 fp8"
+            seq_len: "512"
+            device: "xpu"
+          - python-version: "3.9"
+            model_name: "chatglm3-6b"
+            language: "zh"
+            precision: "sym_int4 fp8"
+            seq_len: "512"
+            device: "xpu"
+          - python-version: "3.9"
+            model_name: "Baichuan2-7B-Chat"
+            language: "zh"
+            precision: "sym_int4 fp8"
+            seq_len: "512"
+            device: "xpu"
+        
+    runs-on: ${{ fromJson(needs.set-matrix.outputs.runner) }}
+    env:
+      ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+      ORIGIN_DIR: /mnt/disk1/models
+      DATASET_DIR: /mnt/disk1/datasets/THUDM___long_bench/
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools==58.0.4
+          python -m pip install --upgrade wheel
+
+      - name: Download llm binary
+        uses: ./.github/actions/llm/download-llm-binary
+
+      - name: Run LLM install (all) test
+        uses: ./.github/actions/llm/setup-llm-env
+        with:
+          extra-dependency: "xpu_2.1"
+      
+      - name: Download models
+        shell: bash
+        run: |
+          echo "MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/" >> "$GITHUB_ENV"
+          MODEL_PATH=${ORIGIN_DIR}/${{ matrix.model_name }}/
+          wget -r -nH -nc --no-verbose --cut-dirs=1 ${LLM_FTP_URL}/llm/${{ matrix.model_name }} -P ${ORIGIN_DIR}
+          
+      - name: Upgrade packages
+        shell: bash
+        run: |
+          pip install --upgrade datasets==2.14.6 
+          if [ "${{ matrix.model_name }}" = "Mistral-7B-v0.1" ]; then
+          pip install --upgrade transformers==4.36
+          else
+          pip install --upgrade transformers==4.31
+          fi
+
+      - name: Run perplexity
+        shell: bash
+        working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/perplexity
+        env:
+          USE_XETLA: OFF
+          SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS: 1
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          python run.py \
+          --seq_len ${{ matrix.seq_len }} \
+          --model_path ${MODEL_PATH} \
+          --precisions ${{ matrix.precision }} \
+          --device ${{ matrix.device }} \
+          --dataset_path ${DATASET_DIR} \
+          --language ${{ matrix.language }}