diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml
index 605b0162..f259bd35 100644
--- a/.github/workflows/llm_performance_tests.yml
+++ b/.github/workflows/llm_performance_tests.yml
@@ -63,7 +63,12 @@ on:
         type: boolean
         default: true
       igpu:
-        description: "If trigger performance test on iGPU"
+        description: "If trigger performance test on iGPU (Windows)"
+        required: false
+        type: boolean
+        default: true
+      dgpu:
+        description: "If trigger performance test on dGPU (Windows)"
         required: false
         type: boolean
         default: true
@@ -583,16 +588,39 @@ jobs:
             curl -T ./*.csv ${LLM_FTP_URL}/llm/nightly_perf/core_${{ matrix.platform }}/
           fi
 
-  llm-performance-test-on-igpu:
-    if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.igpu ) }} # please comment it for PR tests
+  select-gpu-win-test-platform:
+    if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.igpu ) || ( github.event_name == 'workflow_dispatch' && inputs.dgpu ) }}
     needs: llm-cpp-build
+    runs-on: [self-hosted, Shire]
+    outputs:
+      platform: ${{ steps.select-platform.outputs.platform }}
+    steps:
+      - name: Select GPU Windows test platform
+        shell: bash
+        id: select-platform
+        run: |
+          if [[ ${{ github.event_name }} == "workflow_dispatch" ]]; then
+            if [ ${{ inputs.igpu }} == "true" ] && [ ${{ inputs.dgpu }} == 'true' ]; then
+              echo 'platform=["perf-igpu", "perf-dgpu"]' >> "$GITHUB_OUTPUT"
+            elif [ ${{ inputs.igpu }} == "true"  ]; then
+              echo 'platform=["perf-igpu"]' >> "$GITHUB_OUTPUT"
+            else
+              echo 'platform=["perf-dgpu"]' >> "$GITHUB_OUTPUT"
+            fi
+          else
+            echo 'platform=["perf-igpu"]' >> "$GITHUB_OUTPUT"
+          fi
+
+  # TODO: rename igpu specific tests to gpu-win
+  llm-performance-test-on-gpu-win:
+    if: ${{ github.event.schedule || ( github.event_name == 'workflow_dispatch' && inputs.igpu ) || ( github.event_name == 'workflow_dispatch' && inputs.dgpu ) }}
+    needs: select-gpu-win-test-platform
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - os: windows
-            python-version: "3.11"
-    runs-on: [self-hosted, "${{ matrix.os }}", llm, perf-igpu]
+        platform: ${{ fromJSON(needs.select-gpu-win-test-platform.outputs.platform) }}
+        python-version: ["3.11"]
+    runs-on: [self-hosted, Windows, llm, "${{ matrix.platform }}"]
     env:
       ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
     steps:
@@ -612,10 +640,16 @@ jobs:
         if: ${{ github.event_name == 'workflow_dispatch' && (inputs.checkout-ref != 'main') }}
         shell: bash
         run: |
-          sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
-          sed -i 's/"bigdl-core-xe-batch-21==" + CORE_XE_VERSION/"bigdl-core-xe-batch-21"/g' python/llm/setup.py
-          sed -i 's/"bigdl-core-xe-addons-21==" + CORE_XE_VERSION/"bigdl-core-xe-addons-21"/g' python/llm/setup.py
-          sed -i 's/"bigdl-core-xe-esimd-21==" + CORE_XE_VERSION/"bigdl-core-xe-esimd-21"/g' python/llm/setup.py
+          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+            sed -i 's/"bigdl-core-xe-21==" + CORE_XE_VERSION/"bigdl-core-xe-21"/g' python/llm/setup.py
+            sed -i 's/"bigdl-core-xe-batch-21==" + CORE_XE_VERSION/"bigdl-core-xe-batch-21"/g' python/llm/setup.py
+            sed -i 's/"bigdl-core-xe-addons-21==" + CORE_XE_VERSION/"bigdl-core-xe-addons-21"/g' python/llm/setup.py
+          fi
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
+            sed -i 's/"bigdl-core-xe-23==" + CORE_XE_VERSION/"bigdl-core-xe-23"/g' python/llm/setup.py
+            sed -i 's/"bigdl-core-xe-batch-23==" + CORE_XE_VERSION/"bigdl-core-xe-batch-23"/g' python/llm/setup.py
+            sed -i 's/"bigdl-core-xe-addons-23==" + CORE_XE_VERSION/"bigdl-core-xe-addons-23"/g' python/llm/setup.py
+          fi
 
       - name: Install ipex-llm and other related packages (install from source)
         if: ${{ github.event_name == 'workflow_dispatch' && (inputs.checkout-ref != 'main') }}
@@ -634,7 +668,12 @@ jobs:
           if not exist dist\ipex_llm*.whl (exit /b 1)
           for %%i in (dist\ipex_llm*.whl) do set whl_name=%%i
 
-          pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            pip install --pre --upgrade %whl_name%[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            pip install --pre --upgrade %whl_name%[xpu_lnl] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/lnl/cn/
+          )
           if %ERRORLEVEL% neq 0 (exit /b 1)
           pip list
 
@@ -660,7 +699,12 @@ jobs:
           pip install --upgrade omegaconf pandas
           pip install --upgrade tiktoken einops transformers_stream_generator matplotlib
 
-          pip install --pre --upgrade ipex-llm[xpu]==%TEST_VERSION% --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            pip install --pre --upgrade ipex-llm[xpu]==%TEST_VERSION% --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            pip install --pre --upgrade ipex-llm[xpu_lnl]==%TEST_VERSION% --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/lnl/cn/
+          )
           pip show ipex-llm | findstr %TEST_VERSION%
           if %ERRORLEVEL% neq 0 (
             echo "Did not install ipex-llm with excepted version %TEST_VERSION%"
@@ -698,6 +742,7 @@ jobs:
           sed -i "s/date.today()/\"$date_for_test_version\"/g" python/llm/dev/benchmark/all-in-one/run.py
 
       - name: Add extra warmup for chatglm3-6b int4+fp32 & MiniCPM int4+fp16 int4+fp32 for more stable results
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i '/^\s*result = run_transformer_int4_gpu_win(repo_id, local_model_hub, in_out_pairs, warm_up, num_trials, num_beams, low_bit, cpu_embedding, batch_size, streaming)/ i\
@@ -723,8 +768,14 @@ jobs:
         shell: cmd
         run: |
           call conda activate igpu-perf
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
           REM for llava
           set TRANSFORMERS_OFFLINE=1
 
@@ -750,8 +801,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.36.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_436.yaml config.yaml
@@ -775,8 +832,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.38.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_438.yaml config.yaml
@@ -801,8 +864,14 @@ jobs:
           pip install transformers==4.43.1
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_443.yaml config.yaml
@@ -829,8 +898,14 @@ jobs:
           pip install accelerate==0.33.0
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16_445.yaml config.yaml
@@ -876,8 +951,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.37.0
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
           REM for llava
           set TRANSFORMERS_OFFLINE=1
 
@@ -903,8 +984,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.36.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_436.yaml config.yaml
@@ -928,8 +1015,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.38.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_438.yaml config.yaml
@@ -954,8 +1047,14 @@ jobs:
           pip install transformers==4.43.1
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_443.yaml config.yaml
@@ -982,8 +1081,14 @@ jobs:
           pip install accelerate==0.33.0
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_445.yaml config.yaml
@@ -1028,8 +1133,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.37.0
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
           REM for llava
           set TRANSFORMERS_OFFLINE=1
 
@@ -1055,8 +1166,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.36.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_436.yaml config.yaml
@@ -1080,8 +1197,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.38.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_438.yaml config.yaml
@@ -1106,8 +1229,14 @@ jobs:
           pip install transformers==4.43.1
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_443.yaml config.yaml
@@ -1134,8 +1263,14 @@ jobs:
           pip install accelerate==0.33.0
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\2048-256_int4_fp16_445.yaml config.yaml
@@ -1180,8 +1315,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.37.0
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
           REM for llava
           set TRANSFORMERS_OFFLINE=1
 
@@ -1195,20 +1336,29 @@ jobs:
 
           call conda deactivate
 
+      # Remove Qwen-VL-Chat on dGPU for 3072-384 tests
       - name: Prepare igpu perf test for transformers 4.36 (3072-384 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_436.yaml
 
       - name: Test on igpu for transformers 4.36 (3072-384 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.36.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_436.yaml config.yaml
@@ -1223,7 +1373,12 @@ jobs:
       - name: Prepare igpu perf test for transformers 4.38 (3072-384 int4+fp16)
         shell: bash
         run: |
-          sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+            sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
+            sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_438.yaml
 
       - name: Test on igpu for transformers 4.38 (3072-384 int4+fp16)
@@ -1232,15 +1387,26 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.38.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_438.yaml config.yaml
           set PYTHONIOENCODING=utf-8
           python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
           if %ERRORLEVEL% neq 0 (exit /b 1)
-          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test2
+          )
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           call conda deactivate
@@ -1248,7 +1414,12 @@ jobs:
       - name: Prepare igpu perf test for transformers 4.43 (3072-384 int4+fp16)
         shell: bash
         run: |
-          sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+            sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
+            sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml
 
       - name: Test on igpu for transformers 4.43 (3072-384 int4+fp16)
@@ -1258,15 +1429,26 @@ jobs:
           pip install transformers==4.43.1
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_443.yaml config.yaml
           set PYTHONIOENCODING=utf-8
           python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
           if %ERRORLEVEL% neq 0 (exit /b 1)
-          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test3
+          )
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           pip uninstall trl -y
@@ -1275,7 +1457,12 @@ jobs:
       - name: Prepare igpu perf test for transformers 4.45 (3072-384 int4+fp16)
         shell: bash
         run: |
-          sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
+          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+            sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
+            sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_445.yaml
 
       - name: Test on igpu for transformers 4.45 (3072-384 int4+fp16)
@@ -1286,15 +1473,26 @@ jobs:
           pip install accelerate==0.33.0
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\3072-384_int4_fp16_445.yaml config.yaml
           set PYTHONIOENCODING=utf-8
           python run.py >> %CSV_SAVE_PATH%\3072-384_int4_fp16\log\%LOG_FILE% 2>&1
           if %ERRORLEVEL% neq 0 (exit /b 1)
-          python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test5
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test4
+          )
           if %ERRORLEVEL% neq 0 (exit /b 1)
 
           pip uninstall trl -y
@@ -1323,7 +1521,12 @@ jobs:
         shell: bash
         run: |
           sed -i 's/3072-384/4096-512/g' python/llm/dev/benchmark/all-in-one/run.py
-          sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          if [ ${{ matrix.platform }} == "perf-igpu" ]; then
+            sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
+          if [ ${{ matrix.platform }} == "perf-dgpu" ]; then
+            sed -i 's/{today}_test4/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
+          fi
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml
       
       - name: Test on igpu (4096-512 int4+fp16)
@@ -1332,8 +1535,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.37.0
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
           REM for llava
           set TRANSFORMERS_OFFLINE=1
 
@@ -1359,8 +1568,14 @@ jobs:
           call conda activate igpu-perf
           pip install transformers==4.38.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_438.yaml config.yaml
@@ -1385,8 +1600,14 @@ jobs:
           pip install transformers==4.43.1
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_443.yaml config.yaml
@@ -1413,8 +1634,14 @@ jobs:
           pip install accelerate==0.33.0
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\4096-512_int4_fp16_445.yaml config.yaml
@@ -1447,6 +1674,7 @@ jobs:
 
       # load_low_bit 1024-128 int4+fp16 
       - name: Prepare igpu perf test (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/4096-512/1024-128/g' python/llm/dev/benchmark/all-in-one/run.py
@@ -1454,13 +1682,20 @@ jobs:
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml
 
       - name: Test on igpu (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.37.0
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
           REM for llava
           set TRANSFORMERS_OFFLINE=1
 
@@ -1475,19 +1710,27 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.36 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_436.yaml
 
       - name: Test on igpu for transformers 4.36 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.36.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_436.yaml config.yaml
@@ -1500,19 +1743,27 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.38 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_438.yaml
 
       - name: Test on igpu for transformers 4.38 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.38.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_438.yaml config.yaml
@@ -1525,20 +1776,28 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.43 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml
 
       - name: Test on igpu for transformers 4.43 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.43.1
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_443.yaml config.yaml
@@ -1552,12 +1811,14 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.45 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_445.yaml
 
       - name: Test on igpu for transformers 4.45 (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
@@ -1565,8 +1826,14 @@ jobs:
           pip install accelerate==0.33.0
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_int4_fp16_loadlowbit_445.yaml config.yaml
@@ -1581,6 +1848,7 @@ jobs:
           call conda deactivate
 
       - name: Concat csv and generate html (load_low_bit 1024-128 int4+fp16)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate html-gen
@@ -1599,19 +1867,27 @@ jobs:
 
       # 1024-128
       - name: Prepare igpu perf test (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test5/{today}_test1/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128.yaml
 
       - name: Test on igpu (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.37.0
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
           REM for llava
           set TRANSFORMERS_OFFLINE=1
 
@@ -1626,19 +1902,27 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.36 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test1/{today}_test2/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_436.yaml
 
       - name: Test on igpu for transformers 4.36 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.36.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_436.yaml config.yaml
@@ -1651,19 +1935,27 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.38 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test2/{today}_test3/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_438.yaml
 
       - name: Test on igpu for transformers 4.38 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.38.2
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_438.yaml config.yaml
@@ -1676,20 +1968,28 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.43 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test3/{today}_test4/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_443.yaml
 
       - name: Test on igpu for transformers 4.43 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
           pip install transformers==4.43.1
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_443.yaml config.yaml
@@ -1703,12 +2003,14 @@ jobs:
           call conda deactivate
 
       - name: Prepare igpu perf test for transformers 4.45 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: bash
         run: |
           sed -i 's/{today}_test4/{today}_test5/g' python/llm/dev/benchmark/all-in-one/run.py
           sed -i "s/path to your local model hub/$MODEL_HUB_DIR/g" python/llm/test/benchmark/igpu-perf/1024-128_445.yaml
 
       - name: Test on igpu for transformers 4.45 (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate igpu-perf
@@ -1716,8 +2018,14 @@ jobs:
           pip install accelerate==0.33.0
           pip install trl
 
-          set SYCL_CACHE_PERSISTENT=1
-          set BIGDL_LLM_XMX_DISABLED=1
+          if "${{ matrix.platform }}"=="perf-igpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set BIGDL_LLM_XMX_DISABLED=1
+          )
+          if "${{ matrix.platform }}"=="perf-dgpu" (
+            set SYCL_CACHE_PERSISTENT=1
+            set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+          )
 
           cd python\llm\dev\benchmark\all-in-one
           move ..\..\..\test\benchmark\igpu-perf\1024-128_445.yaml config.yaml
@@ -1732,6 +2040,7 @@ jobs:
           call conda deactivate
 
       - name: Concat csv and generate html (1024-128)
+        if: ${{ matrix.platform == 'perf-igpu' }}
         shell: cmd
         run: |
           call conda activate html-gen
@@ -1755,14 +2064,18 @@ jobs:
         run: |
           cd %CSV_SAVE_PATH%
           IF "${{ github.event_name }}"=="schedule" (
-            for %%f in (*.html) do (
-                curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+            IF "${{ matrix.platform }}"=="perf-igpu" (
+              for %%f in (*.html) do (
+                  curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+              )
             )
           )
           IF "${{ github.event_name }}"=="workflow_dispatch" (
             IF "${{ inputs.checkout-ref }}"=="main" (
-              for %%f in (*.html) do (
-                  curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+              IF "${{ matrix.platform }}"=="perf-igpu" (
+                for %%f in (*.html) do (
+                    curl -T "%%f" %FTP_IGPU_NIGHTLY_PERF_PATH%
+                )
               )
             )
           )