Add run-hbm.sh & add user guide for spr and hbm (#10357)
* add run-hbm.sh * add spr and hbm guide * only support quad mode * only support quad mode * update special cases * update special cases
This commit is contained in:
		
							parent
							
								
									5d996a5caf
								
							
						
					
					
						commit
						5809a3f5fe
					
				
					 2 changed files with 74 additions and 0 deletions
				
			
		| 
						 | 
					@ -104,6 +104,42 @@ Please refer to [here](https://bigdl.readthedocs.io/en/latest/doc/LLM/Overview/i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
         ./run-max-gpu.sh
 | 
					         ./run-max-gpu.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   .. tab:: Intel SPR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      For Intel SPR machine, we recommend:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      .. code-block:: bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					         ./run-spr.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      The scipt uses a default numactl strategy. If you want to customize it, please use ``lscpu`` or ``numactl -H`` to check how cpu indexs are assigned to numa node, and make sure the run command is binded to only one socket.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   .. tab:: Intel HBM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      For Intel HBM machine, we recommend:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      .. code-block:: bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					         ./run-hbm.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					       The scipt uses a default numactl strategy. If you want to customize it, please use ``numactl -H`` to check how the index of hbm node and cpu are assigned.
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      For example:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      .. code-block:: bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					         node   0   1   2   3
 | 
				
			||||||
 | 
					            0:  10  21  13  23
 | 
				
			||||||
 | 
					            1:  21  10  23  13
 | 
				
			||||||
 | 
					            2:  13  23  10  23
 | 
				
			||||||
 | 
					            3:  23  13  23  10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      here hbm node is the node whose distance from the checked node is 13, node 2 is node 0's hbm node.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      And make sure the run command is binded to only one socket.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Result
 | 
					## Result
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										38
									
								
								python/llm/dev/benchmark/all-in-one/run-hbm.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								python/llm/dev/benchmark/all-in-one/run-hbm.sh
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,38 @@
 | 
				
			||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					source bigdl-llm-init
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sockets_num=$(lscpu | grep "Socket(s)" | awk -F ':' '{print $2}')
 | 
				
			||||||
 | 
					cores_per_socket=$(lscpu | grep "Core(s) per socket" | awk -F ':' '{print $2}')
 | 
				
			||||||
 | 
					numa_nodes=$(lscpu | grep "NUMA node(s)" | awk -F ':' '{print $2}')
 | 
				
			||||||
 | 
					# Multiply by 2 to avoid an float result in HBM flat mode that the NUMA count twice and it will be divided later.
 | 
				
			||||||
 | 
					cores_per_numa=$(($sockets_num * $cores_per_socket * 2 / $numa_nodes))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Only support Quad-mode now
 | 
				
			||||||
 | 
					if [ "${numa_nodes}" -eq 4 ]; then
 | 
				
			||||||
 | 
					    #HBM flat Quad-mode, Confirm that there are 2 HBM memory nodes and 2 DRAM memory nodes through "nuamctl -H"
 | 
				
			||||||
 | 
					    echo "HBM Quad mode"
 | 
				
			||||||
 | 
					    export OMP_NUM_THREADS=${cores_per_numa}
 | 
				
			||||||
 | 
					    echo "OMP_NUM_THREADS: ${cores_per_numa}"
 | 
				
			||||||
 | 
					    last_cpu_index=$(($OMP_NUM_THREADS - 1))
 | 
				
			||||||
 | 
					    numactl -C 0-$last_cpu_index -p 2 python $(dirname "$0")/run.py
 | 
				
			||||||
 | 
					elif [ "${numa_nodes}" -eq 2 ]; then
 | 
				
			||||||
 | 
					    #SPR or hbm only or hbm cache Quad-mode, Confirm that there are 2 DRAM memory nodes through "nuamctl -H"
 | 
				
			||||||
 | 
					    echo "Warning: SPR Quad mode, hbm usage is default off, please check if HBM can be on."
 | 
				
			||||||
 | 
					    export OMP_NUM_THREADS=$((${cores_per_numa} / 2))
 | 
				
			||||||
 | 
					    echo "OMP_NUM_THREADS: $((${cores_per_numa} / 2))"
 | 
				
			||||||
 | 
					    last_cpu_index=$(($OMP_NUM_THREADS - 1))
 | 
				
			||||||
 | 
					    numactl -C 0-$last_cpu_index -p 0 python $(dirname "$0")/run.py
 | 
				
			||||||
 | 
					elif [ "${numa_nodes}" -eq 1 ]; then
 | 
				
			||||||
 | 
					    # General Test mode
 | 
				
			||||||
 | 
					    echo "General Test mode"
 | 
				
			||||||
 | 
					    export OMP_NUM_THREADS=$((${cores_per_numa} / 2))
 | 
				
			||||||
 | 
					    echo "OMP_NUM_THREADS: $((${cores_per_numa} / 2))"
 | 
				
			||||||
 | 
					    last_cpu_index=$(($OMP_NUM_THREADS - 1))
 | 
				
			||||||
 | 
					    numactl -C 0-$last_cpu_index -p 0 python $(dirname "$0")/run.py
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					    echo "Warning: The number of nodes in this machine is ${numa_nodes}. Node 0 will be used for run. "
 | 
				
			||||||
 | 
					    export OMP_NUM_THREADS=${cores_per_numa}
 | 
				
			||||||
 | 
					    echo "OMP_NUM_THREADS: ${cores_per_numa}"
 | 
				
			||||||
 | 
					    last_cpu_index=$(($OMP_NUM_THREADS - 1))
 | 
				
			||||||
 | 
					    numactl -C 0-$last_cpu_index -p 0 python $(dirname "$0")/run.py
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
		Loading…
	
		Reference in a new issue