diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 0e0f7b79..8d57bc88 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -1444,7 +1444,7 @@ def run_deepspeed_optimize_model_gpu(repo_id, torch_dtype=torch.float16, trust_remote_code=True, use_cache=True).eval() tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = deepspeed.init_inference(model, mp_size=world_size, - dtype=torch.float16, replace_method="auto",) + dtype=torch.bfloat16, replace_method="auto",) end = time.perf_counter() load_time = end - st print(">> loading of model costs {}s".format(load_time)) diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/README.md b/python/llm/example/GPU/Deepspeed-AutoTP/README.md index 22ec3e5a..948bf8c5 100644 --- a/python/llm/example/GPU/Deepspeed-AutoTP/README.md +++ b/python/llm/example/GPU/Deepspeed-AutoTP/README.md @@ -17,8 +17,8 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # configures OneAPI environment variables source /opt/intel/oneapi/setvars.sh -pip install git+https://github.com/microsoft/DeepSpeed.git@4fc181b0 -pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@ec33277 +pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5 +pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@0eb734b pip install mpi4py conda install -c conda-forge -y gperftools=2.10 # to enable tcmalloc ``` diff --git a/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py b/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py index b2fa7aaf..f16a670f 100644 --- a/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py +++ b/python/llm/example/GPU/Deepspeed-AutoTP/deepspeed_autotp.py @@ -76,7 +76,7 @@ if __name__ == '__main__': model = deepspeed.init_inference( model, mp_size=world_size, - dtype=torch.float16, + dtype=torch.bfloat16, replace_method="auto", )