From 5809a3f5fecbe464ea1cb004177c8380a9e422f9 Mon Sep 17 00:00:00 2001 From: Lilac09 <74996885+Zhengjin-Wang@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:15:27 +0800 Subject: [PATCH] Add run-hbm.sh & add user guide for spr and hbm (#10357) * add run-hbm.sh * add spr and hbm guide * only support quad mode * only support quad mode * update special cases * update special cases --- .../LLM/Quickstart/benchmark_quickstart.md | 36 ++++++++++++++++++ .../llm/dev/benchmark/all-in-one/run-hbm.sh | 38 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 python/llm/dev/benchmark/all-in-one/run-hbm.sh diff --git a/docs/readthedocs/source/doc/LLM/Quickstart/benchmark_quickstart.md b/docs/readthedocs/source/doc/LLM/Quickstart/benchmark_quickstart.md index 0dc3bc48..138f421e 100644 --- a/docs/readthedocs/source/doc/LLM/Quickstart/benchmark_quickstart.md +++ b/docs/readthedocs/source/doc/LLM/Quickstart/benchmark_quickstart.md @@ -104,6 +104,42 @@ Please refer to [here](https://bigdl.readthedocs.io/en/latest/doc/LLM/Overview/i ./run-max-gpu.sh + .. tab:: Intel SPR + + For Intel SPR machine, we recommend: + + .. code-block:: bash + + ./run-spr.sh + + The scipt uses a default numactl strategy. If you want to customize it, please use ``lscpu`` or ``numactl -H`` to check how cpu indexs are assigned to numa node, and make sure the run command is binded to only one socket. + + .. tab:: Intel HBM + + For Intel HBM machine, we recommend: + + .. code-block:: bash + + ./run-hbm.sh + + The scipt uses a default numactl strategy. If you want to customize it, please use ``numactl -H`` to check how the index of hbm node and cpu are assigned. + + For example: + + + .. code-block:: bash + + node 0 1 2 3 + 0: 10 21 13 23 + 1: 21 10 23 13 + 2: 13 23 10 23 + 3: 23 13 23 10 + + + here hbm node is the node whose distance from the checked node is 13, node 2 is node 0's hbm node. + + And make sure the run command is binded to only one socket. + ``` ## Result diff --git a/python/llm/dev/benchmark/all-in-one/run-hbm.sh b/python/llm/dev/benchmark/all-in-one/run-hbm.sh new file mode 100644 index 00000000..d57b5ec3 --- /dev/null +++ b/python/llm/dev/benchmark/all-in-one/run-hbm.sh @@ -0,0 +1,38 @@ +#!/bin/bash +source bigdl-llm-init + +sockets_num=$(lscpu | grep "Socket(s)" | awk -F ':' '{print $2}') +cores_per_socket=$(lscpu | grep "Core(s) per socket" | awk -F ':' '{print $2}') +numa_nodes=$(lscpu | grep "NUMA node(s)" | awk -F ':' '{print $2}') +# Multiply by 2 to avoid an float result in HBM flat mode that the NUMA count twice and it will be divided later. +cores_per_numa=$(($sockets_num * $cores_per_socket * 2 / $numa_nodes)) + +# Only support Quad-mode now +if [ "${numa_nodes}" -eq 4 ]; then + #HBM flat Quad-mode, Confirm that there are 2 HBM memory nodes and 2 DRAM memory nodes through "nuamctl -H" + echo "HBM Quad mode" + export OMP_NUM_THREADS=${cores_per_numa} + echo "OMP_NUM_THREADS: ${cores_per_numa}" + last_cpu_index=$(($OMP_NUM_THREADS - 1)) + numactl -C 0-$last_cpu_index -p 2 python $(dirname "$0")/run.py +elif [ "${numa_nodes}" -eq 2 ]; then + #SPR or hbm only or hbm cache Quad-mode, Confirm that there are 2 DRAM memory nodes through "nuamctl -H" + echo "Warning: SPR Quad mode, hbm usage is default off, please check if HBM can be on." + export OMP_NUM_THREADS=$((${cores_per_numa} / 2)) + echo "OMP_NUM_THREADS: $((${cores_per_numa} / 2))" + last_cpu_index=$(($OMP_NUM_THREADS - 1)) + numactl -C 0-$last_cpu_index -p 0 python $(dirname "$0")/run.py +elif [ "${numa_nodes}" -eq 1 ]; then + # General Test mode + echo "General Test mode" + export OMP_NUM_THREADS=$((${cores_per_numa} / 2)) + echo "OMP_NUM_THREADS: $((${cores_per_numa} / 2))" + last_cpu_index=$(($OMP_NUM_THREADS - 1)) + numactl -C 0-$last_cpu_index -p 0 python $(dirname "$0")/run.py +else + echo "Warning: The number of nodes in this machine is ${numa_nodes}. Node 0 will be used for run. " + export OMP_NUM_THREADS=${cores_per_numa} + echo "OMP_NUM_THREADS: ${cores_per_numa}" + last_cpu_index=$(($OMP_NUM_THREADS - 1)) + numactl -C 0-$last_cpu_index -p 0 python $(dirname "$0")/run.py +fi