diff --git a/.github/workflows/llm-binary-build.yml b/.github/workflows/llm-binary-build.yml
index 4a46500f..9e6366d8 100644
--- a/.github/workflows/llm-binary-build.yml
+++ b/.github/workflows/llm-binary-build.yml
@@ -1,5 +1,10 @@
 name: LLM Binary Build
 
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-binary-build-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
 # Controls when the action will run.
 on:
   # Triggers the workflow on push or pull request events but only for the main branch
@@ -17,8 +22,21 @@ on:
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
+  check-linux-avx2-artifact:
+    runs-on: ubuntu-latest
+    outputs:
+      if-exists: ${{steps.check_artifact.outputs.exists}}
+    steps:
+      - name: Check if built
+        id: check_artifact
+        uses: xSAVIKx/artifact-exists-action@v0
+        with:
+          name: linux-avx2
+          
   linux-build-avx2:
     runs-on: [self-hosted, AVX2, centos7]
+    needs: check-linux-avx2-artifact
+    if: needs.check-linux-avx2-artifact.outputs.if-exists == 'false'
     steps:
       - name: Set access token
         run: |
@@ -64,9 +82,22 @@ jobs:
         shell: bash
         run: |
           make clean
+          
+  check-linux-avx512-artifact:
+    runs-on: ubuntu-latest
+    outputs:
+      if-exists: ${{steps.check_artifact.outputs.exists}}
+    steps:
+      - name: Check if built
+        id: check_artifact
+        uses: xSAVIKx/artifact-exists-action@v0
+        with:
+          name: linux-avx512
 
   linux-build-avx512:
     runs-on: [self-hosted, AVX512, centos7]
+    needs: check-linux-avx512-artifact
+    if: needs.check-linux-avx512-artifact.outputs.if-exists == 'false'
     steps:
       - name: Set access token
         run: |
@@ -113,8 +144,21 @@ jobs:
         run: |
           make clean
 
+  check-windows-avx2-artifact:
+    runs-on: ubuntu-latest
+    outputs:
+      if-exists: ${{steps.check_artifact.outputs.exists}}
+    steps:
+      - name: Check if built
+        id: check_artifact
+        uses: xSAVIKx/artifact-exists-action@v0
+        with:
+          name: windows-avx2
+
   windows-build-avx2:
     runs-on: [self-hosted, Windows]
+    needs: check-windows-avx2-artifact
+    if: needs.check-windows-avx2-artifact.outputs.if-exists == 'false'
     steps:
       - name: Set access token
         run: |
@@ -142,8 +186,21 @@ jobs:
           path: |
             build/Release
 
+  check-windows-avx2-vnni-artifact:
+    runs-on: ubuntu-latest
+    outputs:
+      if-exists: ${{steps.check_artifact.outputs.exists}}
+    steps:
+      - name: Check if built
+        id: check_artifact
+        uses: xSAVIKx/artifact-exists-action@v0
+        with:
+          name: windows-avx2-vnni
+
   windows-build-avx2-vnni:
     runs-on: [self-hosted, Windows]
+    needs: check-windows-avx2-vnni-artifact
+    if: needs.check-windows-avx2-vnni-artifact.outputs.if-exists == 'false'
     steps:
       - name: Set access token
         run: |
diff --git a/.github/workflows/llm-nightly-test.yml b/.github/workflows/llm-nightly-test.yml
index e3120d4d..433905a9 100644
--- a/.github/workflows/llm-nightly-test.yml
+++ b/.github/workflows/llm-nightly-test.yml
@@ -1,5 +1,10 @@
 name: LLM Nightly Tests
 
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-nightly-test-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
 # Controls when the action will run.
 on:
   schedule:
@@ -75,83 +80,7 @@ jobs:
         env:
           ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
 
-  llm-nightly-inference-test-avx512:
-    runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ]
-    needs: llm-cpp-build
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9"]
-    env:
-      INT4_CKPT_DIR: ./llm/ggml-actions/stable
-      LLAMA_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_llama_7b_q4_0.bin
-      GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
-      BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
-      STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
-
-      LLM_DIR: ./llm
-      ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/
-
-      THREAD_NUM: 24
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install --upgrade setuptools==58.0.4
-          python -m pip install --upgrade wheel
-
-      - name: Download llm binary
-        uses: ./.github/actions/llm/download-llm-binary
-      
-      - name: Install BigDL-LLM
-        uses: ./.github/actions/llm/setup-llm-env
-        env:
-          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-
-      - name: Download ckpt & original models
-        run: |
-          if [ ! -d $LLAMA_INT4_CKPT_PATH ]; then
-            echo "Directory $LLAMA_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $LLM_FTP_URL/${LLAMA_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
-          fi
-          if [ ! -d $GPTNEOX_INT4_CKPT_PATH ]; then
-            echo "Directory $GPTNEOX_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $LLM_FTP_URL/${GPTNEOX_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
-          fi
-          if [ ! -d $BLOOM_INT4_CKPT_PATH ]; then
-            echo "Directory $BLOOM_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $LLM_FTP_URL/${BLOOM_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
-          fi
-          if [ ! -d $STARCODER_INT4_CKPT_PATH ]; then
-            echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
-          fi
-          if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then
-            echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
-            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR
-          fi
-
-      - name: Run LLM cli test
-        uses: ./.github/actions/llm/cli-test
-        env:
-          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-
-      - name: Run LLM inference test
-        uses: ./.github/actions/llm/inference-test
-        env:
-          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-
-      - name: Run LLM langchain test
-        uses: ./.github/actions/llm/langchain-test
-        env:
-          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-    
-      - name: Clean up test environment
-        uses: ./.github/actions/llm/remove-llm-env
-        env:
-          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+  llm-inference-test-on-linux:
+    uses: ./.github/workflows/llm_unit_tests_linux.yml
+  llm-inference-test-on-windows:
+    uses: ./.github/workflows/llm_unit_tests_windows.yml
\ No newline at end of file
diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml
index 5f940e3b..99dea304 100644
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@@ -2,7 +2,7 @@ name: LLM Unit Tests on Linux
 
 # Cancel previous runs in the PR when you push new commits
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
+  group: ${{ github.workflow }}-llm-linux-unittest-${{ github.event.pull_request.number || github.run_id }}
   cancel-in-progress: true
 
 # Controls when the action will run. 
@@ -48,13 +48,14 @@ env:
 jobs:
   llm-cpp-build:
     uses: ./.github/workflows/llm-binary-build.yml
-  llm-unit-test-linux-avx512:
-    runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ]
+  llm-unit-test-linux:
     needs: llm-cpp-build
     strategy:
       fail-fast: false
       matrix:
         python-version: ["3.9"]
+        instruction: ["AVX512"]
+    runs-on: [ self-hosted, llm,"${{matrix.instruction}}", ubuntu-20.04-lts ]
     env:
       THREAD_NUM: 24
     steps:
diff --git a/.github/workflows/llm_unit_tests_windows.yml b/.github/workflows/llm_unit_tests_windows.yml
index 89a22f7e..6688e886 100644
--- a/.github/workflows/llm_unit_tests_windows.yml
+++ b/.github/workflows/llm_unit_tests_windows.yml
@@ -2,7 +2,7 @@ name: LLM Unit Tests on Windows
 
 # Cancel previous runs in the PR when you push new commits
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
+  group: ${{ github.workflow }}-llm-win-unittest-${{ github.event.pull_request.number || github.run_id }}
   cancel-in-progress: true
 
 # Controls when the action will run.