* [LLM] Support CPU Deepspeed distributed inference * Update run_deepspeed.py * Rename * fix style * add new codes * refine * remove annotated codes * refine * Update README.md * refine doc and example code
		
			
				
	
	
		
			18 lines
		
	
	
	
		
			603 B
		
	
	
	
		
			Bash
		
	
	
	
	
	
			
		
		
	
	
			18 lines
		
	
	
	
		
			603 B
		
	
	
	
		
			Bash
		
	
	
	
	
	
#!/bin/bash
 | 
						|
source bigdl-nano-init
 | 
						|
unset OMP_NUM_THREADS # deepspeed will set it for each instance automatically
 | 
						|
source /opt/intel/oneccl/env/setvars.sh
 | 
						|
export WORLD_SIZE=2 # run 1 instance per SPR socket, thus 2 instances on 2 sockets, 96 cores
 | 
						|
export MASTER_ADDR=127.0.0.1
 | 
						|
export CCL_ZE_IPC_EXCHANGE=sockets
 | 
						|
export DS_ACCELERATOR="cpu"
 | 
						|
export CCL_WORKER_AFFINITY=auto
 | 
						|
unset KMP_AFFINITY # deepspeed will set it for each instance automatically
 | 
						|
export FI_PROVIDER=tcp
 | 
						|
export CCL_ATL_TRANSPORT=ofi
 | 
						|
export CCL_PROCESS_LAUNCHER=none
 | 
						|
 | 
						|
deepspeed \
 | 
						|
  --bind_cores_to_rank \
 | 
						|
  --bind_core_list 0-95 \
 | 
						|
  run.py
 |