diff --git a/python/llm/dev/benchmark/all-in-one/run-deepspeed-arc.sh b/python/llm/dev/benchmark/all-in-one/run-deepspeed-arc.sh
index 42354272..9a24eedc 100644
--- a/python/llm/dev/benchmark/all-in-one/run-deepspeed-arc.sh
+++ b/python/llm/dev/benchmark/all-in-one/run-deepspeed-arc.sh
@@ -14,5 +14,5 @@ if grep -q "Core" /proc/cpuinfo; then
     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 fi
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank python run.py
diff --git a/python/llm/dev/benchmark/all-in-one/run-deepspeed-pvc.sh b/python/llm/dev/benchmark/all-in-one/run-deepspeed-pvc.sh
index 16d14831..a0998477 100644
--- a/python/llm/dev/benchmark/all-in-one/run-deepspeed-pvc.sh
+++ b/python/llm/dev/benchmark/all-in-one/run-deepspeed-pvc.sh
@@ -13,4 +13,5 @@ source $basekit_root/ccl/latest/env/vars.sh --force
 export OMP_NUM_THREADS=$((56/$NUM_GPUS))
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=1
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank python run.py
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/start-deepspeed-autotp-ipex-llm-serving.sh b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/start-deepspeed-autotp-ipex-llm-serving.sh
index c3d3bd85..9e7e818c 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/start-deepspeed-autotp-ipex-llm-serving.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/start-deepspeed-autotp-ipex-llm-serving.sh
@@ -31,6 +31,6 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=0
 
 export WORLD_SIZE=2
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
         python serving.py --repo-id-or-model-path YOUR_REPO_ID_OR_MODEL_PATH --low-bit 'fp8' --port 8000 --max-num-seqs 8 --max-num-batched-tokens 8192
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/run_llama2_70b_pvc_1550_1_card.sh b/python/llm/example/GPU/Deepspeed-AutoTP/run_llama2_70b_pvc_1550_1_card.sh
index 4e968541..37e53545 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_llama2_70b_pvc_1550_1_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_llama2_70b_pvc_1550_1_card.sh
@@ -29,5 +29,6 @@ source $basekit_root/ccl/latest/env/vars.sh --force
 export OMP_NUM_THREADS=$((56/$NUM_GPUS))
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=1
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
     python deepspeed_autotp.py --repo-id-or-model-path 'meta-llama/Llama-2-70b-chat-hf' --low-bit 'sym_int4'
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/run_mistral_7b_instruct_flex_2_card.sh b/python/llm/example/GPU/Deepspeed-AutoTP/run_mistral_7b_instruct_flex_2_card.sh
index ed471962..177ed5d0 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_mistral_7b_instruct_flex_2_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_mistral_7b_instruct_flex_2_card.sh
@@ -28,6 +28,6 @@ NUM_GPUS=2 # number of used GPU
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
     python deepspeed_autotp.py --repo-id-or-model-path 'mistralai/Mistral-7B-Instruct-v0.1' --low-bit 'sym_int4'
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/run_qwen_14b_arc_2_card.sh b/python/llm/example/GPU/Deepspeed-AutoTP/run_qwen_14b_arc_2_card.sh
index 0b45569b..6686d3ee 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_qwen_14b_arc_2_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_qwen_14b_arc_2_card.sh
@@ -33,6 +33,6 @@ if grep -q "Core" /proc/cpuinfo; then
     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 fi
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
     python deepspeed_autotp.py --repo-id-or-model-path 'Qwen/Qwen1.5-14B-Chat' --low-bit 'sym_int4'
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/run_vicuna_33b_arc_2_card.sh b/python/llm/example/GPU/Deepspeed-AutoTP/run_vicuna_33b_arc_2_card.sh
index 1e23668f..7cf50a5e 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_vicuna_33b_arc_2_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_vicuna_33b_arc_2_card.sh
@@ -30,6 +30,6 @@ if grep -q "Core" /proc/cpuinfo; then
     export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 fi
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
     python deepspeed_autotp.py --repo-id-or-model-path 'lmsys/vicuna-33b-v1.3' --low-bit 'sym_int4'