LLM: upgrade deepspeed in AutoTP on GPU (#10647)

This commit is contained in:
binbin Deng 2024-04-07 14:05:19 +08:00 committed by GitHub
parent 56dfcb2ade
commit d9a1153b4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 4 additions and 4 deletions

View file

@ -1444,7 +1444,7 @@ def run_deepspeed_optimize_model_gpu(repo_id,
torch_dtype=torch.float16, trust_remote_code=True, use_cache=True).eval() torch_dtype=torch.float16, trust_remote_code=True, use_cache=True).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = deepspeed.init_inference(model, mp_size=world_size, model = deepspeed.init_inference(model, mp_size=world_size,
dtype=torch.float16, replace_method="auto",) dtype=torch.bfloat16, replace_method="auto",)
end = time.perf_counter() end = time.perf_counter()
load_time = end - st load_time = end - st
print(">> loading of model costs {}s".format(load_time)) print(">> loading of model costs {}s".format(load_time))

View file

@ -17,8 +17,8 @@ pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-exte
pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install oneccl_bind_pt==2.1.100 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
# configures OneAPI environment variables # configures OneAPI environment variables
source /opt/intel/oneapi/setvars.sh source /opt/intel/oneapi/setvars.sh
pip install git+https://github.com/microsoft/DeepSpeed.git@4fc181b0 pip install git+https://github.com/microsoft/DeepSpeed.git@ed8aed5
pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@ec33277 pip install git+https://github.com/intel/intel-extension-for-deepspeed.git@0eb734b
pip install mpi4py pip install mpi4py
conda install -c conda-forge -y gperftools=2.10 # to enable tcmalloc conda install -c conda-forge -y gperftools=2.10 # to enable tcmalloc
``` ```

View file

@ -76,7 +76,7 @@ if __name__ == '__main__':
model = deepspeed.init_inference( model = deepspeed.init_inference(
model, model,
mp_size=world_size, mp_size=world_size,
dtype=torch.float16, dtype=torch.bfloat16,
replace_method="auto", replace_method="auto",
) )