LLM: upgrade deepspeed in AutoTP on GPU (#10647)

2024-04-07 14:05:19 +08:00 · 2024-04-07 14:05:19 +08:00 · d9a1153b4e
commit d9a1153b4e
parent 56dfcb2ade
3 changed files with 4 additions and 4 deletions
--- a/python/llm/dev/benchmark/all-in-one/run.py
+++ b/python/llm/dev/benchmark/all-in-one/run.py
@ -1444,7 +1444,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
                                                     torch_dtype=torch.float16, trust_remote_code=True, use_cache=True).eval()
        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    model = deepspeed.init_inference(model, mp_size=world_size,
-                                     dtype=torch.float16, replace_method="auto",)
+                                     dtype=torch.bfloat16, replace_method="auto",)
    end = time.perf_counter()
    load_time = end - st
    print(">> loading of model costs {}s".format(load_time))
--- a/python/llm/example/GPU/Deepspeed-AutoTP/README.md
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/README.md
@ -17,8 +17,8 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
 pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 # configures OneAPI environment variables
 source /opt/intel/oneapi/setvars.sh
-pip install git+https://github.com/microsoft/DeepSpeed.git@4fc181b0
+pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5
-pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@ec33277
+pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@0eb734b
 pip install mpi4py
 conda install -c conda-forge -y gperftools=2.10 # to enable tcmalloc
 ```
--- a/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py
@ -76,7 +76,7 @@ if __name__ == '__main__':
    model = deepspeed.init_inference(
        model,
        mp_size=world_size,
-        dtype=torch.float16,
+        dtype=torch.bfloat16,
        replace_method="auto",
    )