add retry in run llm install part;test arc05 with llama2

2024-02-06 14:09:14 +08:00 · 2024-02-06 14:09:14 +08:00 · 94723bb0b1
commit 94723bb0b1
parent 2c75b5b981
1 changed files with 18 additions and 4 deletions
--- a/.github/workflows/llm-harness-evaluation.yml
+++ b/.github/workflows/llm-harness-evaluation.yml
@ -68,7 +68,7 @@ jobs:
          PR_MATRIX_MODEL_NAME: '["Llama2-7b-guanaco-dolphin-500"]'
          PR_MATRIX_TASK: '["truthfulqa"]'
          PR_MATRIX_PRECISION: '["fp8"]'
-          PR_LABELS: '["self-hosted", "llm", "temp-arc01"]'
+          PR_LABELS: '["self-hosted", "llm", "accuracy2", "accuracy-nightly"]'
        run: |
            echo "model_name=$PR_MATRIX_MODEL_NAME" >> $GITHUB_ENV
            echo "precision=$PR_MATRIX_PRECISION" >> $GITHUB_ENV
@ -135,9 +135,23 @@ jobs:
        uses: ./.github/actions/llm/download-llm-binary
      - name: Run LLM install (all) test
-        uses: ./.github/actions/llm/setup-llm-env
+        # uses: ./.github/actions/llm/setup-llm-env
-        with:
+        # with:
-          extra-dependency: "xpu_2.1"
+        #   extra-dependency: "xpu_2.1"
        run: |
          retry_count=0
          max_retries=1
          command="bash ./.github/actions/llm/setup-llm-env --extra-dependency xpu_2.1"
          until $command; do
              exit_code=$?
              echo "Attempt $((retry_count+1)) failed with exit code $exit_code. Retrying..."
              retry_count=$((retry_count+1))
              if [ "$retry_count" -gt "$max_retries" ]; then
                  echo "Reached maximum retry attempts. Exiting."
                  exit $exit_code
              fi
              sleep 5
          done
      - name: Install harness
        working-directory: ${{ github.workspace }}/python/llm/dev/benchmark/harness/