diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 605b0162..f259bd35 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -63,7 +63,12 @@ on: type: boolean default: true igpu: - description: "If trigger performance test on iGPU" + description: "If trigger performance test on iGPU (Windows)" + required: false + type: boolean + default: true + dgpu: + description: "If trigger performance test on dGPU (Windows)" required: false type: boolean default: true @@ -583,16 +588,39 @@ jobs: curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/ fi - llm-performance-test-on-igpu: - if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.igpu ) }} # please comment it for PR tests + select-gpu-win-test-platform: + if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.igpu ) || ( github.event_name == 'workflow_dispatch' && inputs.dgpu ) }} needs: llm-cpp-build + runs-on: [self-hosted, Shire] + outputs: + platform: ${{ steps.select-platform.outputs.platform }} + steps: + - name: Select GPU Windows test platform + shell: bash + id: select-platform + run: | + if [[ ${{ github.event_name }} == "workflow_dispatch" ]]; then + if [ ${{ inputs.igpu }} == "true" ] && [ ${{ inputs.dgpu }} == 'true' ]; then + echo 'platform=["perf-igpu", "perf-dgpu"]' >> "$GITHUB_OUTPUT" + elif [ ${{ inputs.igpu }} == "true" ]; then + echo 'platform=["perf-igpu"]' >> "$GITHUB_OUTPUT" + else + echo 'platform=["perf-dgpu"]' >> "$GITHUB_OUTPUT" + fi + else + echo 'platform=["perf-igpu"]' >> "$GITHUB_OUTPUT" + fi + + # TODO: rename igpu specific tests to gpu-win + llm-performance-test-on-gpu-win: + if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.igpu ) || ( github.event_name == 'workflow_dispatch' && inputs.dgpu ) }} + needs: select-gpu-win-test-platform strategy: fail-fast: false matrix: - include: - - os: windows - python-version: "3.11" - runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu] + platform: ${{ fromJSON(needs.select-gpu-win-test-platform.outputs.platform) }} + python-version: ["3.11"] + runs-on: [self-hosted, Windows, llm, "${{ matrix.platform }}"] env: ANALYTICS_ZOO_ROOT: ${{ github.workspace }} steps: @@ -612,10 +640,16 @@ jobs: if: ${{ github.event_name == 'workflow_dispatch' && (inputs.checkout-ref != 'main') }} shell: bash run: | - sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py - sed -i 's/"bigdl-core-xe-batch-21==" + CORE_XE_VERSION/"bigdl-core-xe-batch-21"/g' python/llm/setup.py - sed -i 's/"bigdl-core-xe-addons-21==" + CORE_XE_VERSION/"bigdl-core-xe-addons-21"/g' python/llm/setup.py - sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py + if [ ${{ matrix.platform }} == "perf-igpu" ]; then + sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-batch-21==" + CORE_XE_VERSION/"bigdl-core-xe-batch-21"/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-addons-21==" + CORE_XE_VERSION/"bigdl-core-xe-addons-21"/g' python/llm/setup.py + fi + if [ ${{ matrix.platform }} == "perf-dgpu" ]; then + sed -i 's/"bigdl-core-xe-23==" + CORE_XE_VERSION/"bigdl-core-xe-23"/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-batch-23==" + CORE_XE_VERSION/"bigdl-core-xe-batch-23"/g' python/llm/setup.py + sed -i 's/"bigdl-core-xe-addons-23==" + CORE_XE_VERSION/"bigdl-core-xe-addons-23"/g' python/llm/setup.py + fi - name: Install ipex-llm and other related packages (install from source) if: ${{ github.event_name == 'workflow_dispatch' && (inputs.checkout-ref != 'main') }} @@ -634,7 +668,12 @@ jobs: if not exist dist\ipex_llm*.whl (exit /b 1) for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i - pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + if "${{ matrix.platform }}"=="perf-igpu" ( + pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + pip install --pre --upgrade %whl_name%[xpu_lnl] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/lnl/cn/ + ) if %ERRORLEVEL% neq 0 (exit /b 1) pip list @@ -660,7 +699,12 @@ jobs: pip install --upgrade omegaconf pandas pip install --upgrade tiktoken einops transformers_stream_generator matplotlib - pip install --pre --upgrade ipex-llm[xpu]==%TEST_VERSION% --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + if "${{ matrix.platform }}"=="perf-igpu" ( + pip install --pre --upgrade ipex-llm[xpu]==%TEST_VERSION% --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + pip install --pre --upgrade ipex-llm[xpu_lnl]==%TEST_VERSION% --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/lnl/cn/ + ) pip show ipex-llm | findstr %TEST_VERSION% if %ERRORLEVEL% neq 0 ( echo "Did not install ipex-llm with excepted version %TEST_VERSION%" @@ -698,6 +742,7 @@ jobs: sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py - name: Add extra warmup for chatglm3-6b int4+fp32 & MiniCPM int4+fp16 int4+fp32 for more stable results + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i '/^\s*result = run_transformer_int4_gpu_win(repo_id, local_model_hub, in_out_pairs, warm_up, num_trials, num_beams, low_bit, cpu_embedding, batch_size, streaming)/ i\ @@ -723,8 +768,14 @@ jobs: shell: cmd run: | call conda activate igpu-perf - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) REM for llava set TRANSFORMERS_OFFLINE=1 @@ -750,8 +801,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.36.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_436.yaml config.yaml @@ -775,8 +832,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_438.yaml config.yaml @@ -801,8 +864,14 @@ jobs: pip install transformers==4.43.1 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_443.yaml config.yaml @@ -829,8 +898,14 @@ jobs: pip install accelerate==0.33.0 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_445.yaml config.yaml @@ -876,8 +951,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.37.0 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) REM for llava set TRANSFORMERS_OFFLINE=1 @@ -903,8 +984,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.36.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_436.yaml config.yaml @@ -928,8 +1015,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_438.yaml config.yaml @@ -954,8 +1047,14 @@ jobs: pip install transformers==4.43.1 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_443.yaml config.yaml @@ -982,8 +1081,14 @@ jobs: pip install accelerate==0.33.0 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_445.yaml config.yaml @@ -1028,8 +1133,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.37.0 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) REM for llava set TRANSFORMERS_OFFLINE=1 @@ -1055,8 +1166,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.36.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_436.yaml config.yaml @@ -1080,8 +1197,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_438.yaml config.yaml @@ -1106,8 +1229,14 @@ jobs: pip install transformers==4.43.1 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_443.yaml config.yaml @@ -1134,8 +1263,14 @@ jobs: pip install accelerate==0.33.0 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_445.yaml config.yaml @@ -1180,8 +1315,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.37.0 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) REM for llava set TRANSFORMERS_OFFLINE=1 @@ -1195,20 +1336,29 @@ jobs: call conda deactivate + # Remove Qwen-VL-Chat on dGPU for 3072-384 tests - name: Prepare igpu perf test for transformers 4.36 (3072-384 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_436.yaml - name: Test on igpu for transformers 4.36 (3072-384 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.36.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_436.yaml config.yaml @@ -1223,7 +1373,12 @@ jobs: - name: Prepare igpu perf test for transformers 4.38 (3072-384 int4+fp16) shell: bash run: | - sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + if [ ${{ matrix.platform }} == "perf-igpu" ]; then + sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + fi + if [ ${{ matrix.platform }} == "perf-dgpu" ]; then + sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py + fi sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_438.yaml - name: Test on igpu for transformers 4.38 (3072-384 int4+fp16) @@ -1232,15 +1387,26 @@ jobs: call conda activate igpu-perf pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_438.yaml config.yaml set PYTHONIOENCODING=utf-8 python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1 if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + if "${{ matrix.platform }}"=="perf-igpu" ( + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2 + ) if %ERRORLEVEL% neq 0 (exit /b 1) call conda deactivate @@ -1248,7 +1414,12 @@ jobs: - name: Prepare igpu perf test for transformers 4.43 (3072-384 int4+fp16) shell: bash run: | - sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + if [ ${{ matrix.platform }} == "perf-igpu" ]; then + sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + fi + if [ ${{ matrix.platform }} == "perf-dgpu" ]; then + sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py + fi sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml - name: Test on igpu for transformers 4.43 (3072-384 int4+fp16) @@ -1258,15 +1429,26 @@ jobs: pip install transformers==4.43.1 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_443.yaml config.yaml set PYTHONIOENCODING=utf-8 python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1 if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + if "${{ matrix.platform }}"=="perf-igpu" ( + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3 + ) if %ERRORLEVEL% neq 0 (exit /b 1) pip uninstall trl -y @@ -1275,7 +1457,12 @@ jobs: - name: Prepare igpu perf test for transformers 4.45 (3072-384 int4+fp16) shell: bash run: | - sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py + if [ ${{ matrix.platform }} == "perf-igpu" ]; then + sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py + fi + if [ ${{ matrix.platform }} == "perf-dgpu" ]; then + sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py + fi sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_445.yaml - name: Test on igpu for transformers 4.45 (3072-384 int4+fp16) @@ -1286,15 +1473,26 @@ jobs: pip install accelerate==0.33.0 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_445.yaml config.yaml set PYTHONIOENCODING=utf-8 python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1 if %ERRORLEVEL% neq 0 (exit /b 1) - python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5 + if "${{ matrix.platform }}"=="perf-igpu" ( + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4 + ) if %ERRORLEVEL% neq 0 (exit /b 1) pip uninstall trl -y @@ -1323,7 +1521,12 @@ jobs: shell: bash run: | sed -i 's/3072-384/4096-512/g' python/llm/dev/benchmark/all-in-one/run.py - sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + if [ ${{ matrix.platform }} == "perf-igpu" ]; then + sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + fi + if [ ${{ matrix.platform }} == "perf-dgpu" ]; then + sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py + fi sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml - name: Test on igpu (4096-512 int4+fp16) @@ -1332,8 +1535,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.37.0 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) REM for llava set TRANSFORMERS_OFFLINE=1 @@ -1359,8 +1568,14 @@ jobs: call conda activate igpu-perf pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_438.yaml config.yaml @@ -1385,8 +1600,14 @@ jobs: pip install transformers==4.43.1 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_443.yaml config.yaml @@ -1413,8 +1634,14 @@ jobs: pip install accelerate==0.33.0 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_445.yaml config.yaml @@ -1447,6 +1674,7 @@ jobs: # load_low_bit 1024-128 int4+fp16 - name: Prepare igpu perf test (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/4096-512/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py @@ -1454,13 +1682,20 @@ jobs: sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml - name: Test on igpu (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.37.0 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) REM for llava set TRANSFORMERS_OFFLINE=1 @@ -1475,19 +1710,27 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.36 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_436.yaml - name: Test on igpu for transformers 4.36 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.36.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_436.yaml config.yaml @@ -1500,19 +1743,27 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.38 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_438.yaml - name: Test on igpu for transformers 4.38 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_438.yaml config.yaml @@ -1525,20 +1776,28 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.43 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml - name: Test on igpu for transformers 4.43 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.43.1 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_443.yaml config.yaml @@ -1552,12 +1811,14 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.45 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_445.yaml - name: Test on igpu for transformers 4.45 (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf @@ -1565,8 +1826,14 @@ jobs: pip install accelerate==0.33.0 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_445.yaml config.yaml @@ -1581,6 +1848,7 @@ jobs: call conda deactivate - name: Concat csv and generate html (load_low_bit 1024-128 int4+fp16) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate html-gen @@ -1599,19 +1867,27 @@ jobs: # 1024-128 - name: Prepare igpu perf test (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml - name: Test on igpu (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.37.0 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) REM for llava set TRANSFORMERS_OFFLINE=1 @@ -1626,19 +1902,27 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.36 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_436.yaml - name: Test on igpu for transformers 4.36 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.36.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_436.yaml config.yaml @@ -1651,19 +1935,27 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.38 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_438.yaml - name: Test on igpu for transformers 4.38 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.38.2 - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_438.yaml config.yaml @@ -1676,20 +1968,28 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.43 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_443.yaml - name: Test on igpu for transformers 4.43 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf pip install transformers==4.43.1 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_443.yaml config.yaml @@ -1703,12 +2003,14 @@ jobs: call conda deactivate - name: Prepare igpu perf test for transformers 4.45 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: bash run: | sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_445.yaml - name: Test on igpu for transformers 4.45 (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate igpu-perf @@ -1716,8 +2018,14 @@ jobs: pip install accelerate==0.33.0 pip install trl - set SYCL_CACHE_PERSISTENT=1 - set BIGDL_LLM_XMX_DISABLED=1 + if "${{ matrix.platform }}"=="perf-igpu" ( + set SYCL_CACHE_PERSISTENT=1 + set BIGDL_LLM_XMX_DISABLED=1 + ) + if "${{ matrix.platform }}"=="perf-dgpu" ( + set SYCL_CACHE_PERSISTENT=1 + set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + ) cd python\llm\dev\benchmark\all-in-one move ..\..\..\test\benchmark\igpu-perf\1024-128_445.yaml config.yaml @@ -1732,6 +2040,7 @@ jobs: call conda deactivate - name: Concat csv and generate html (1024-128) + if: ${{ matrix.platform == 'perf-igpu' }} shell: cmd run: | call conda activate html-gen @@ -1755,14 +2064,18 @@ jobs: run: | cd %CSV_SAVE_PATH% IF "${{ github.event_name }}"=="schedule" ( - for %%f in (*.html) do ( - curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH% + IF "${{ matrix.platform }}"=="perf-igpu" ( + for %%f in (*.html) do ( + curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH% + ) ) ) IF "${{ github.event_name }}"=="workflow_dispatch" ( IF "${{ inputs.checkout-ref }}"=="main" ( - for %%f in (*.html) do ( - curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH% + IF "${{ matrix.platform }}"=="perf-igpu" ( + for %%f in (*.html) do ( + curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH% + ) ) ) )