diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py
index 0e0f7b79..8d57bc88 100644
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@@ -1444,7 +1444,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
                                                      torch_dtype=torch.float16, trust_remote_code=True, use_cache=True).eval()
         tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
     model = deepspeed.init_inference(model, mp_size=world_size,
-                                     dtype=torch.float16, replace_method="auto",)
+                                     dtype=torch.bfloat16, replace_method="auto",)
     end = time.perf_counter()
     load_time = end - st
     print(">> loading of model costs {}s".format(load_time))
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/README.md b/python/llm/example/GPU/Deepspeed-AutoTP/README.md
index 22ec3e5a..948bf8c5 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP/README.md
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/README.md
@@ -17,8 +17,8 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 # configures OneAPI environment variables
 source /opt/intel/oneapi/setvars.sh
-pip install git+https://github.com/microsoft/DeepSpeed.git@4fc181b0
-pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@ec33277
+pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5
+pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@0eb734b
 pip install mpi4py
 conda install -c conda-forge -y gperftools=2.10 # to enable tcmalloc
 ```
diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py b/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py
index b2fa7aaf..f16a670f 100644
--- a/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py
@@ -76,7 +76,7 @@ if __name__ == '__main__':
     model = deepspeed.init_inference(
         model,
         mp_size=world_size,
-        dtype=torch.float16,
+        dtype=torch.bfloat16,
         replace_method="auto",
     )