name: LLM Performance Test # Cancel previous runs in the PR when you push new commits concurrency: group: ${{ github.workflow }}-llm-performance-tests-${{ github.event.pull_request.number || github.run_id }} cancel-in-progress: true # Controls when the action will run. on: schedule: - cron: "00 13 * * *" # GMT time, 13:00 GMT == 21:00 China pull_request: branches: [main] paths: - ".github/workflows/llm_performance_tests.yml" - ".github/workflows/llm-binary-build.yml" - ".github/actions/llm/setup-llm-env/action.yml" - ".github/actions/llm/remove-llm-env/action.yml" - ".github/actions/llm/download-llm-binary/action.yml" - "python/llm/test/benchmark/**" workflow_dispatch: workflow_call: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: llm-cpp-build: uses: ./.github/workflows/llm-binary-build.yml llm-performance-test: if: false # skip cpu performance test for now; may add it back with separated runner needs: llm-cpp-build strategy: fail-fast: false matrix: python-version: ["3.9"] instruction: ["AVX512"] runs-on: [self-hosted, llm, perf] env: THREAD_NUM: 24 steps: - name: Set environment variables shell: bash run: | echo "LLAMA2_7B_ORIGIN_PATH=${ORIGIN_DIR}/Llama-2-7b-chat-hf" >> "$GITHUB_ENV" - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install --upgrade setuptools==58.0.4 python -m pip install --upgrade wheel - name: Download llm binary uses: ./.github/actions/llm/download-llm-binary - name: Run LLM install (all) test uses: ./.github/actions/llm/setup-llm-env env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} - name: Download LLMs shell: bash run: | if [ ! -d $LLAMA2_7B_ORIGIN_PATH ]; then echo "Directory $LLAMA2_7B_ORIGIN_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/Llama-2-7b-chat-hf -P $ORIGIN_DIR fi - name: Run LLM Performance test env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} run: bash python/llm/dev/benchmark/run-benchmark-tests.sh # - name: Clean up test environment # uses: ./.github/actions/llm/remove-llm-env # env: # ANALYTICS_ZOO_ROOT: ${{ github.workspace }} llm-performance-test-on-arc: needs: llm-cpp-build strategy: fail-fast: false matrix: python-version: ["3.9"] runs-on: [self-hosted, llm, perf] env: OMP_NUM_THREADS: 16 THREAD_NUM: 16 ANALYTICS_ZOO_ROOT: ${{ github.workspace }} steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies shell: bash # pip install transformers_stream_generator for model internlm-chat-7b-8k # pip install tiktoken for model Qwen-7B-Chat-10-12 run: | python -m pip install --upgrade pip python -m pip install --upgrade wheel python -m pip install --upgrade omegaconf python -m pip install --upgrade pandas python -m pip install --upgrade einops python -m pip install --upgrade transformers_stream_generator python -m pip install --upgrade tiktoken - name: Download llm binary uses: ./.github/actions/llm/download-llm-binary - name: Run LLM install (all) test uses: ./.github/actions/llm/setup-llm-env with: extra-dependency: "xpu" - name: Test installed xpu version shell: bash run: | source /opt/intel/oneapi/setvars.sh bash python/llm/test/run-llm-install-tests.sh - name: Test on xpu shell: bash run: | source /opt/intel/oneapi/setvars.sh export USE_XETLA=OFF export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 mv python/llm/test/benchmark/arc-perf-test.yaml python/llm/dev/benchmark/all-in-one/config.yaml cd python/llm/dev/benchmark/all-in-one export http_proxy=${HTTP_PROXY} export https_proxy=${HTTPS_PROXY} python run.py curl -T ./*.csv ${LLM_FTP_URL}/llm/ggml-actions/perf/ cp ./*.csv /mnt/disk1/nightly_perf/ cd ../../../test/benchmark python csv_to_html.py -f /mnt/disk1/nightly_perf/