diff --git a/.github/workflows/llm-nightly-test.yml b/.github/workflows/llm-nightly-test.yml
index 7dbef3b9..cca96aa6 100644
--- a/.github/workflows/llm-nightly-test.yml
+++ b/.github/workflows/llm-nightly-test.yml
@@ -14,7 +14,7 @@ on:
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
   llm-nightly-convert-test:
-    runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ]
+    runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ]
     strategy:
       fail-fast: false
       matrix:
@@ -41,34 +41,12 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install --upgrade setuptools==58.0.4
           python -m pip install --upgrade wheel
-
-      - name: Download original models
-        run: |
-          llm_ftp_user=${RUNNER_REPOSITORY_URL:35}
-          llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]')
-          llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821
-          if [ ! -d $LLAMA_ORIGIN_PATH ]; then
-            echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
-            wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/llama-7b-hf -P $ORIGIN_DIR
-          fi
-          if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
-            echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
-            wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
-          fi
-          if [ ! -d $BLOOM_ORIGIN_PATH ]; then
-            echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
-            wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/bloomz-7b1 -P $ORIGIN_DIR
-          fi
-          if [ ! -d $STARCODER_ORIGIN_PATH ]; then
-            echo "Directory $STARCODER_ORIGIN_PATH not found. Downloading from FTP server..."
-            wget -r -nH --no-verbose --cut-dirs=1 $llm_ftp_url/llm/gpt_bigcode-santacoder -P $ORIGIN_DIR
-          fi
       
-      - name: Run LLM convert test
+      - name: Install BigDL-LLM
         shell: bash
         run: |
           $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
-          $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels
+          $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4
           source $CONDA_HOME/bin/activate bigdl-init-llm
           $CONDA_HOME/bin/conda info
           pip install requests
@@ -76,19 +54,58 @@ jobs:
           whl_name=$(ls python/llm/dist)
           pip install -i https://pypi.python.org/simple "python/llm/dist/${whl_name}[all]"
           pip install pytest
-          bash python/llm/test/run-llm-convert-tests.sh
           source $CONDA_HOME/bin/deactivate
-          $CONDA_HOME/bin/conda remove -n bigdl-init-llm --all
+        env:
+          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
+
+      - name: Download original models & convert 
+        shell: bash
+        run: |
+          source $CONDA_HOME/bin/activate bigdl-init-llm
+
+          if [ ! -d $LLAMA_ORIGIN_PATH ]; then
+            echo "Directory $LLAMA_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/llama-7b-hf -P $ORIGIN_DIR
+          fi
+          bash python/llm/test/run-llm-convert-tests.sh llama
+          rm -rf $LLAMA_ORIGIN_PATH
+
+          if [ ! -d $GPTNEOX_ORIGIN_PATH ]; then
+            echo "Directory $GPTNEOX_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gptneox-7b-redpajama-bf16 -P $ORIGIN_DIR
+          fi
+          bash python/llm/test/run-llm-convert-tests.sh gptneox
+          rm -rf $GPTNEOX_ORIGIN_PATH
+
+          if [ ! -d $BLOOM_ORIGIN_PATH ]; then
+            echo "Directory $BLOOM_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/bloomz-7b1 -P $ORIGIN_DIR
+          fi
+          bash python/llm/test/run-llm-convert-tests.sh bloom
+          rm -rf $BLOOM_ORIGIN_PATH
+
+          if [ ! -d $STARCODER_ORIGIN_PATH ]; then
+            echo "Directory $STARCODER_ORIGIN_PATH not found. Downloading from FTP server..."
+            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/llm/gpt_bigcode-santacoder -P $ORIGIN_DIR
+          fi
+          bash python/llm/test/run-llm-convert-tests.sh starcoder
+          rm -rf $STARCODER_ORIGIN_PATH
+
+          source $CONDA_HOME/bin/deactivate
         env:
           ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
 
       - name: Upload ckpt to ftp
         run: |
           apt-get update && apt install tnftp
-          llm_ftp_user=${RUNNER_REPOSITORY_URL:35}
-          llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]')
-          llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821
-          tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_llama_7b_q4_0.bin $LLAMA_INT4_CKPT_PATH
-          tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_redpajama_7b_q4_0.bin $GPTNEOX_INT4_CKPT_PATH
-          tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_bloom_7b_q4_0.bin $BLOOM_INT4_CKPT_PATH
-          tnftp -u ${llm_ftp_url}/${INT4_CKPT_DIR:1}/bigdl_llm_santacoder_1b_q4_0.bin $STARCODER_INT4_CKPT_PATH
+          tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_llama_7b_q4_0.bin $LLAMA_INT4_CKPT_PATH
+          tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_redpajama_7b_q4_0.bin $GPTNEOX_INT4_CKPT_PATH
+          tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_bloom_7b_q4_0.bin $BLOOM_INT4_CKPT_PATH
+          tnftp -u ${LLM_FTP_URL}/${INT4_CKPT_DIR:1}/bigdl_llm_santacoder_1b_q4_0.bin $STARCODER_INT4_CKPT_PATH
+
+      - name: Clean up test environment
+        shell: bash
+        run: |
+          $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
+        env:
+          ANALYTICS_ZOO_ROOT: ${{ github.workspace }}
diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml
index 6a127993..02f2b535 100644
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@@ -21,8 +21,8 @@ on:
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  llm-unit-test-linux:
-    runs-on: [ self-hosted, Gondolin, ubuntu-20.04-lts ]
+  llm-unit-test-linux-avx512:
+    runs-on: [ self-hosted, llm, AVX512, ubuntu-20.04-lts ]
     strategy:
       fail-fast: false
       matrix:
@@ -33,6 +33,7 @@ jobs:
       GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
       BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
       STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
+      THREAD_NUM: 6
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
@@ -49,7 +50,7 @@ jobs:
         shell: bash
         run: |
           $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
-          $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels
+          $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4
           source $CONDA_HOME/bin/activate bigdl-init-llm
           $CONDA_HOME/bin/conda info
           pip install requests
@@ -64,7 +65,7 @@ jobs:
         shell: bash
         run: |
           $CONDA_HOME/bin/conda env remove -y -n bigdl-init-llm || true
-          $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4 -c ${GONDOLIN_CONDA_CHANNEL} --override-channels
+          $CONDA_HOME/bin/conda create -n bigdl-init-llm -y python==3.9 setuptools==58.0.4
           source $CONDA_HOME/bin/activate bigdl-init-llm
           $CONDA_HOME/bin/conda info
           pip install requests
@@ -79,24 +80,21 @@ jobs:
 
       - name: Download ckpt models
         run: |
-          llm_ftp_user=${RUNNER_REPOSITORY_URL:35}
-          llm_ftp_user=$(echo $llm_ftp_user | tr '[:upper:]' '[:lower:]')
-          llm_ftp_url=${FTP_URI:0:6}${llm_ftp_user}${FTP_URI:9}:8821
           if [ ! -d $LLAMA_INT4_CKPT_PATH ]; then
             echo "Directory $LLAMA_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $llm_ftp_url/${LLAMA_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
+            wget --no-verbose $LLM_FTP_URL/${LLAMA_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
           fi
           if [ ! -d $GPTNEOX_INT4_CKPT_PATH ]; then
             echo "Directory $GPTNEOX_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $llm_ftp_url/${GPTNEOX_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
+            wget --no-verbose $LLM_FTP_URL/${GPTNEOX_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
           fi
           if [ ! -d $BLOOM_INT4_CKPT_PATH ]; then
             echo "Directory $BLOOM_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $llm_ftp_url/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
+            wget --no-verbose $LLM_FTP_URL/${BLOOM_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
           fi
           if [ ! -d $STARCODER_INT4_CKPT_PATH ]; then
             echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
-            wget --no-verbose $llm_ftp_url/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
+            wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:1} -P $INT4_CKPT_DIR
           fi
 
       - name: Run LLM inference test
diff --git a/python/llm/test/inference/test_call_models.py b/python/llm/test/inference/test_call_models.py
index 40613132..04f53214 100644
--- a/python/llm/test/inference/test_call_models.py
+++ b/python/llm/test/inference/test_call_models.py
@@ -29,18 +29,23 @@ class Test_Models_Basics(TestCase):
         self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
         self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
         self.starcoder_model_path = os.environ.get('STARCODER_INT4_CKPT_PATH')
+        thread_num = os.environ.get('THREAD_NUM')
+        if thread_num is not None:
+            self.n_threads = int(thread_num)
+        else:
+            self.n_threads = 2         
 
     def test_llama_completion_success(self):
-        llm = Llama(self.llama_model_path)
+        llm = Llama(self.llama_model_path, n_threads=self.n_threads)
         output = llm("What is the capital of France?", max_tokens=32, stream=False)
         # assert "Paris" in output['choices'][0]['text']
 
     def test_llama_completion_with_stream_success(self):
-        llm = Llama(self.llama_model_path)
+        llm = Llama(self.llama_model_path, n_threads=self.n_threads)
         output = llm("What is the capital of France?", max_tokens=32, stream=True)
 
     def test_bloom_completion_success(self):
-        llm = Bloom(self.bloom_model_path)
+        llm = Bloom(self.bloom_model_path, n_threads=self.n_threads)
         output = llm("What is the capital of France?", max_tokens=32, stream=False)
         # avx = get_avx_flags()
         # if avx == "_avx512":
@@ -48,25 +53,25 @@ class Test_Models_Basics(TestCase):
         #     assert "Paris" in output['choices'][0]['text']
 
     def test_bloom_completion_with_stream_success(self):
-        llm = Bloom(self.bloom_model_path)
+        llm = Bloom(self.bloom_model_path, n_threads=self.n_threads)
         output = llm("What is the capital of France?", max_tokens=32, stream=True)
 
     def test_gptneox_completion_success(self):
-        llm = Gptneox(self.gptneox_model_path)
+        llm = Gptneox(self.gptneox_model_path, n_threads=self.n_threads)
         output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=False)
         # assert "Paris" in output['choices'][0]['text']
 
     def test_gptneox_completion_with_stream_success(self):
-        llm = Gptneox(self.gptneox_model_path)
+        llm = Gptneox(self.gptneox_model_path, n_threads=self.n_threads)
         output = llm("Q: What is the capital of France? A:", max_tokens=32, stream=True)
     
     def test_starcoder_completion_success(self):
-        llm = Starcoder(self.starcoder_model_path)
+        llm = Starcoder(self.starcoder_model_path, n_threads=self.n_threads)
         output = llm("def print_hello_world(", max_tokens=32, stream=False)
         # assert "Paris" in output['choices'][0]['text']
 
     def test_starcoder_completion_with_stream_success(self):
-        llm = Starcoder(self.starcoder_model_path)
+        llm = Starcoder(self.starcoder_model_path, n_threads=self.n_threads)
         output = llm("def print_hello_world(", max_tokens=32, stream=True)
 
 
diff --git a/python/llm/test/langchain/test_langchain.py b/python/llm/test/langchain/test_langchain.py
index 3b72e100..1d221225 100644
--- a/python/llm/test/langchain/test_langchain.py
+++ b/python/llm/test/langchain/test_langchain.py
@@ -27,7 +27,12 @@ class Test_Models_Basics(TestCase):
         self.llama_model_path = os.environ.get('LLAMA_INT4_CKPT_PATH')
         self.bloom_model_path = os.environ.get('BLOOM_INT4_CKPT_PATH')
         self.gptneox_model_path = os.environ.get('GPTNEOX_INT4_CKPT_PATH')
-        
+        thread_num = os.environ.get('THREAD_NUM')
+        if thread_num is not None:
+            self.n_threads = int(thread_num)
+        else:
+            self.n_threads = 2         
+
         
     def test_langchain_llm_embedding_llama(self):
         bigdl_embeddings = BigdlLLMEmbeddings(
@@ -49,7 +54,7 @@ class Test_Models_Basics(TestCase):
         llm = BigdlLLM(
             model_path=self.llama_model_path, 
             max_tokens=32,
-            n_threads=2)
+            n_threads=self.n_threads)
         question = "What is AI?"
         result = llm(question)
         
@@ -58,7 +63,7 @@ class Test_Models_Basics(TestCase):
             model_path=self.gptneox_model_path,
             model_family="gptneox", 
             max_tokens=32,
-            n_threads=2)
+            n_threads=self.n_threads)
         question = "What is AI?"
         result = llm(question)
         
@@ -67,7 +72,7 @@ class Test_Models_Basics(TestCase):
             model_path=self.bloom_model_path, 
             model_family="bloom",
             max_tokens=32,
-            n_threads=2)
+            n_threads=self.n_threads)
         question = "What is AI?"
         result = llm(question)
         
diff --git a/python/llm/test/run-llm-convert-tests.sh b/python/llm/test/run-llm-convert-tests.sh
index 42ffe148..3396435c 100644
--- a/python/llm/test/run-llm-convert-tests.sh
+++ b/python/llm/test/run-llm-convert-tests.sh
@@ -9,7 +9,16 @@ set -e
 echo "# Start testing convert"
 start=$(date "+%s")
 
-python -m pytest -s ${LLM_CONVERT_TEST_DIR}
+# separate convert process to save disk space
+if [[ $1 == "llama" ]]; then
+  python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_llama"
+elif [[ $1 == "gptneox" ]]; then
+  python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_gptneox"
+elif [[ $1 == "bloom" ]]; then
+  python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_bloom"
+elif [[ $1 == "starcoder" ]]; then
+  python -m pytest -s ${LLM_CONVERT_TEST_DIR}/test_convert_model.py -k "test_convert_starcoder"
+fi
 
 now=$(date "+%s")
 time=$((now-start))