diff --git a/.github/workflows/llm-nightly-test-windows.yml b/.github/workflows/llm-nightly-test-windows.yml
new file mode 100644
index 00000000..e2fb93cf
--- /dev/null
+++ b/.github/workflows/llm-nightly-test-windows.yml
@@ -0,0 +1,126 @@
+name: LLM Nightly Tests on Windows
+
+# Cancel previous runs in the PR when you push new commits
+concurrency:
+  group: ${{ github.workflow }}-llm-nightly-test-win-${{ github.event.pull_request.number || github.run_id }}
+  cancel-in-progress: true
+
+# Controls when the action will run.
+on:
+  # Triggers the workflow on push or pull request events but only for the main branch
+  push:
+    branches: [main]
+    paths:
+      - ".github/workflows/llm-nightly-test-windows.yml"
+  pull_request:
+    branches: [main]
+    paths:
+      - ".github/workflows/llm-nightly-test-windows.yml"
+  workflow_dispatch:
+  workflow_call:
+
+jobs:
+  llm-cpp-build:
+    uses: ./.github/workflows/llm-binary-build.yml
+  llm-nightly-test-windows:
+    runs-on: ["self-hosted", "Windows"]
+    needs: llm-cpp-build
+    steps:
+      - name: Set model directories
+        shell: bash
+        run: |
+          echo "ORIGIN_DIR=$(pwd)/../llm/origin-models" >> "$GITHUB_ENV"
+          echo "INT4_CKPT_DIR=$(pwd)/../llm/converted-models" >> "$GITHUB_ENV"
+      - name: Create model directories
+        shell: bash
+        run: |
+          if [ ! -d $ORIGIN_DIR];then
+            mkdir -p $ORIGIN_DIR
+          fi
+          if [ ! -d $INT4_CKPT_DIR];then
+            mkdir -p $INT4_CKPT_DIR
+          fi
+      - name: Set environment variables
+        shell: bash
+        run: |
+          echo "LLAMA_ORIGIN_PATH=${ORIGIN_DIR}/gpt4all-7b-hf" >> "$GITHUB_ENV"
+          echo "GPTNEOX_ORIGIN_PATH=${ORIGIN_DIR}/gptneox-7b-redpajama-bf16" >> "$GITHUB_ENV"
+          echo "BLOOM_ORIGIN_PATH=${ORIGIN_DIR}/bloomz-7b1" >> "$GITHUB_ENV"
+          echo "STARCODER_ORIGIN_PATH=${ORIGIN_DIR}/gpt_bigcode-santacoder" >> "$GITHUB_ENV"
+
+          echo "LLAMA_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_llama_q4_0.bin" >> "$GITHUB_ENV"
+          echo "GPTNEOX_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_gptneox_q4_0.bin" >> "$GITHUB_ENV"
+          echo "BLOOM_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_bloom_q4_0.bin" >> "$GITHUB_ENV"
+          echo "STARCODER_INT4_CKPT_PATH=${INT4_CKPT_DIR}/bigdl_llm_starcoder_q4_0.bin" >> "$GITHUB_ENV"
+
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools==58.0.4
+          python -m pip install --upgrade wheel
+      - name: Download llm binary
+        uses: ./.github/actions/llm/download-llm-binary
+      - name: Install BigDL-llm
+        shell: bash
+        run: |
+          pip install requests
+          bash python/llm/dev/release_default_windows.sh default false
+          whl_name=$(ls python/llm/dist)
+          pip install -i https://pypi.python.org/simple --force-reinstall "python/llm/dist/${whl_name}[all]"
+          pip install pytest
+      - name: Download origin models
+        shell: bash
+        run: |
+          if [ ! -d $LLAMA_ORIGIN_PATH ]; then
+            echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gpt4all-7b-hf -P $ORIGIN_DIR
+          fi
+          if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
+            echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
+          fi
+          if [ ! -d $BLOOM_ORIGIN_PATH ]; then
+            echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/bloomz-7b1 -P $ORIGIN_DIR
+          fi
+          if [ ! -d $STARCODER_ORIGIN_PATH ]; then
+            echo "Directory $STARCODER_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gpt_bigcode-santacoder -P $ORIGIN_DIR
+          fi
+
+          # if [ ! -d $LLAMA_INT4_CKPT_PATH ]; then
+          # echo "Directory $LLAMA_INT4_CKPT_PATH not found. Downloading from FTP server..."
+          #   wget --no-verbose $LLM_FTP_URL/${LLAMA_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
+          # fi
+          # if [ ! -d $GPTNEOX_INT4_CKPT_PATH ]; then
+          #   echo "Directory $GPTNEOX_INT4_CKPT_PATH not found. Downloading from FTP server..."
+          #   wget --no-verbose $LLM_FTP_URL/${GPTNEOX_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
+          # fi
+          # if [ ! -d $BLOOM_INT4_CKPT_PATH ]; then
+          #   echo "Directory $BLOOM_INT4_CKPT_PATH not found. Downloading from FTP server..."
+          #   wget --no-verbose $LLM_FTP_URL/${BLOOM_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
+          # fi
+          # if [ ! -d $STARCODER_INT4_CKPT_PATH ]; then
+          #   echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
+          #   wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
+          # fi
+      - name: Test converting models
+        shell: bash
+        run: |
+          echo "Running the convert models tests..."
+          python -m pytest -s python/llm/test/convert/test_convert_model.py
+      - name: Test model inference
+        shell: bash
+        run: |
+          echo "Running the inference models tests..."
+          python -m pytest -s python/llm/test/inference/test_call_models.py
+      - name: Clean up environment
+        shell: bash
+        run: |
+          pip uninstall bigdl-llm -y
\ No newline at end of file
diff --git a/.github/workflows/llm-nightly-test.yml b/.github/workflows/llm-nightly-test.yml
index d2674b73..c39fad75 100644
--- a/.github/workflows/llm-nightly-test.yml
+++ b/.github/workflows/llm-nightly-test.yml
@@ -69,7 +69,7 @@ jobs:
 
       - name: Upload ckpt to ftp
         run: |
-          apt-get update && apt install tnftp
+          apt-get update && apt install -y tnftp
           tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_llama_7b_q4_0.bin $LLAMA_INT4_CKPT_PATH
           tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_redpajama_7b_q4_0.bin $GPTNEOX_INT4_CKPT_PATH
           tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_bloom_7b_q4_0.bin $BLOOM_INT4_CKPT_PATH
diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml
index 53f62c09..6b0e2ab3 100644
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@@ -5,33 +5,33 @@ concurrency:
   group: ${{ github.workflow }}-llm-linux-unittest-${{ github.event.pull_request.number || github.run_id }}
   cancel-in-progress: true
 
-# Controls when the action will run. 
+# Controls when the action will run.
 on:
   # Triggers the workflow on push or pull request events but only for the main branch
   push:
-    branches: [ main ]
+    branches: [main]
     paths:
-      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_linux.yml'
-      - '.github/workflows/llm-binary-build.yml'
-      - '.github/actions/llm/setup-llm-env/action.yml'
-      - '.github/actions/llm/remove-llm-env/action.yml'
-      - '.github/actions/llm/cli-test/action.yml'
-      - '.github/actions/llm/inference-test/action.yml'
-      - '.github/actions/llm/langchain-test/action.yml'
-      - '.github/actions/llm/download-llm-binary/action.yml'
+      - "python/llm/**"
+      - ".github/workflows/llm_unit_tests_linux.yml"
+      - ".github/workflows/llm-binary-build.yml"
+      - ".github/actions/llm/setup-llm-env/action.yml"
+      - ".github/actions/llm/remove-llm-env/action.yml"
+      - ".github/actions/llm/cli-test/action.yml"
+      - ".github/actions/llm/inference-test/action.yml"
+      - ".github/actions/llm/langchain-test/action.yml"
+      - ".github/actions/llm/download-llm-binary/action.yml"
   pull_request:
-    branches: [ main ]
+    branches: [main]
     paths:
-      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_linux.yml'
-      - '.github/workflows/llm-binary-build.yml'
-      - '.github/actions/llm/setup-llm-env/action.yml'
-      - '.github/actions/llm/remove-llm-env/action.yml'
-      - '.github/actions/llm/cli-test/action.yml'
-      - '.github/actions/llm/inference-test/action.yml'
-      - '.github/actions/llm/langchain-test/action.yml'
-      - '.github/actions/llm/download-llm-binary/action.yml'
+      - "python/llm/**"
+      - ".github/workflows/llm_unit_tests_linux.yml"
+      - ".github/workflows/llm-binary-build.yml"
+      - ".github/actions/llm/setup-llm-env/action.yml"
+      - ".github/actions/llm/remove-llm-env/action.yml"
+      - ".github/actions/llm/cli-test/action.yml"
+      - ".github/actions/llm/inference-test/action.yml"
+      - ".github/actions/llm/langchain-test/action.yml"
+      - ".github/actions/llm/download-llm-binary/action.yml"
   workflow_dispatch:
   workflow_call:
 
@@ -49,7 +49,6 @@ env:
   ORIGINAL_WHISPER_TINY_PATH: ./llm/whisper-tiny/
   SPEECH_DATASET_PATH: ./llm/datasets/librispeech_asr_dummy
 
-
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
   llm-cpp-build:
@@ -61,7 +60,7 @@ jobs:
       matrix:
         python-version: ["3.9"]
         instruction: ["AVX512"]
-    runs-on: [ self-hosted, llm,"${{matrix.instruction}}", ubuntu-20.04-lts ]
+    runs-on: [self-hosted, llm, "${{matrix.instruction}}", ubuntu-20.04-lts]
     env:
       THREAD_NUM: 24
     steps:
diff --git a/.github/workflows/llm_unit_tests_windows.yml b/.github/workflows/llm_unit_tests_windows.yml
index e453d33e..bddee69d 100644
--- a/.github/workflows/llm_unit_tests_windows.yml
+++ b/.github/workflows/llm_unit_tests_windows.yml
@@ -5,19 +5,19 @@ concurrency:
   group: ${{ github.workflow }}-llm-win-unittest-${{ github.event.pull_request.number || github.run_id }}
   cancel-in-progress: true
 
-# Controls when the action will run. 
+# Controls when the action will run.
 on:
   # Triggers the workflow on push or pull request events but only for the main branch
   push:
-    branches: [ main ]
+    branches: [main]
     paths:
-      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_windows.yml'
+      - "python/llm/**"
+      - ".github/workflows/llm_unit_tests_windows.yml"
   pull_request:
-    branches: [ main ]
+    branches: [main]
     paths:
-      - 'python/llm/**'
-      - '.github/workflows/llm_unit_tests_windows.yml'
+      - "python/llm/**"
+      - ".github/workflows/llm_unit_tests_windows.yml"
   workflow_dispatch:
   workflow_call:
 
@@ -54,7 +54,7 @@ jobs:
           pip install -i https://pypi.python.org/simple python/llm/dist/bigdl_llm*.whl
         env:
           ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
-      
+
       - name: Run LLM install (all) test
         shell: bash
         run: |
diff --git a/python/llm/test/convert/test_convert_model.py b/python/llm/test/convert/test_convert_model.py
index aeb4fa80..9959f2bc 100644
--- a/python/llm/test/convert/test_convert_model.py
+++ b/python/llm/test/convert/test_convert_model.py
@@ -68,8 +68,10 @@ class TestConvertModel(TestCase):
         model = AutoModelForCausalLM.from_pretrained(llama_model_path, load_in_4bit=True)
         tempdir = tempfile.mkdtemp(dir=output_dir)
         model.save_pretrained(tempdir)
-        model = AutoModelForCausalLM.from_pretrained(tempdir)
+        model = AutoModelForCausalLM.load_low_bit(tempdir)
         assert model is not None
+        import shutil
+        shutil.rmtree(tempdir)
 
     def test_transformer_convert_llama_q5(self):
         model = AutoModelForCausalLM.from_pretrained(llama_model_path,