From 08ad40b2516a2e297324447ebc5b434e6a296a67 Mon Sep 17 00:00:00 2001 From: Shaojun Liu <61072813+liu-shaojun@users.noreply.github.com> Date: Tue, 7 May 2024 12:55:14 +0800 Subject: [PATCH] improve ipex-llm-init for Linux (#10928) * refine ipex-llm-init * install libtcmalloc.so for Max * update based on comment * remove unneeded code --- python/llm/scripts/ipex-llm-init | 173 ++++++++++++++++--------------- 1 file changed, 88 insertions(+), 85 deletions(-) diff --git a/python/llm/scripts/ipex-llm-init b/python/llm/scripts/ipex-llm-init index 7e4d2608..dd38f757 100644 --- a/python/llm/scripts/ipex-llm-init +++ b/python/llm/scripts/ipex-llm-init @@ -38,29 +38,42 @@ function enable_gpu { function disable_gpu { ENABLE_GPU=0 + unset_gpu_envs +} + +function unset_gpu_envs { + unset USE_XETLA + unset ENABLE_SDP_FUSION + unset SYCL_CACHE_PERSISTENT + unset BIGDL_LLM_XMX_DISABLED + unset SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS } function display-var { + echo "+++++ Env Variables +++++" echo "Internal:" echo " ENABLE_IOMP = ${ENABLE_IOMP}" + echo " ENABLE_GPU = ${ENABLE_GPU}" echo " ENABLE_JEMALLOC = ${ENABLE_JEMALLOC}" echo " ENABLE_TCMALLOC = ${ENABLE_TCMALLOC}" - echo " ENABLE_GPU = ${ENABLE_GPU}" echo " LIB_DIR = ${LIB_DIR}" echo " BIN_DIR = ${BIN_DIR}" echo " LLM_DIR = ${LLM_DIR}" echo "" echo "Exported:" - echo " LD_PRELOAD = ${LD_PRELOAD}" - echo " OMP_NUM_THREADS = ${OMP_NUM_THREADS}" - echo " MALLOC_CONF = ${MALLOC_CONF}" - echo " USE_XETLA = ${USE_XETLA}" - echo " ENABLE_SDP_FUSION = ${ENABLE_SDP_FUSION}" + echo " LD_PRELOAD = ${LD_PRELOAD}" + echo " OMP_NUM_THREADS = ${OMP_NUM_THREADS}" + echo " MALLOC_CONF = ${MALLOC_CONF}" + echo " USE_XETLA = ${USE_XETLA}" + echo " ENABLE_SDP_FUSION = ${ENABLE_SDP_FUSION}" + echo " SYCL_CACHE_PERSISTENT = ${SYCL_CACHE_PERSISTENT}" + echo " BIGDL_LLM_XMX_DISABLED = ${BIGDL_LLM_XMX_DISABLED}" echo " SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS = ${SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS}" + echo "+++++++++++++++++++++++++" } function display-help { - echo "Usage: source ipex-llm-init [-o] [--option]" + echo "Usage: source ipex-llm-init [--option]" echo "" echo "ipex-llm-init is a tool to automatically configure and run the subcommand under" echo "environment variables for accelerating IPEX-LLM." @@ -73,6 +86,7 @@ function display-help { echo " -c, --disable-allocator Use the system default allocator" echo " -g, --gpu Enable OneAPI and other settings for GPU support" echo " -d, --debug Print all internal and exported variables (for debug)" + echo " --device Specify the device type (Max, Flex, Arc, iGPU)" } function display-error { @@ -88,75 +102,50 @@ disable_tcmalloc LD_PRELOAD="" OPTIND=1 +DEVICE="" -while getopts "hojtcgd:-:" opt; do - case ${opt} in - - ) - case "${OPTARG}" in - help) - display-help - return 0 - ;; - gomp) - disable_iomp - ;; - jemalloc) - enable_jemalloc - ;; - tcmalloc) - enable_tcmalloc - ;; - disable-allocator) - disable_jemalloc - disable_tcmalloc - ;; - gpu) - enable_gpu - ;; - debug) - display-var - return 0 - ;; - *) - display-error $OPTARG - return 1 - ;; - esac - ;; - - h ) +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) display-help return 0 ;; - o ) + -o|--gomp) disable_iomp + shift ;; - j ) + -j|--jemalloc) enable_jemalloc + shift ;; - t ) + -t|--tcmalloc) enable_tcmalloc + shift ;; - c ) + -c|--disable-allocator) disable_jemalloc disable_tcmalloc + shift ;; - g ) + -g|--gpu) enable_gpu + shift ;; - d ) + -d|--debug) display-var return 0 ;; - \? ) - display-error $OPTARG + --device) + DEVICE="$2" + shift 2 + ;; + *) + display-error "$1" return 1 ;; esac done -shift $((OPTIND -1)) - # Find ipex-llm-init dir if [ ! -z $BASH_SOURCE ]; then # using bash @@ -175,7 +164,6 @@ else fi LIB_DIR=$(dirname ${BIN_DIR})/lib -LLM_DIR=$(dirname $(python3 -c "import ipex_llm; print(ipex_llm.__file__)")) if [ "${ENABLE_IOMP}" -eq 1 ]; then file="${LIB_DIR}/libiomp5.so" @@ -188,6 +176,48 @@ else unset OMP_NUM_THREADS fi +if [ "${ENABLE_GPU}" -eq 1 ]; then + file="/opt/intel/oneapi/setvars.sh" + if [ -f "${file}" ]; then + echo "found oneapi in ${file}" + # set +xv + source "${file}" --force + # set -xv + case "${DEVICE}" in + Max|MAX) + conda install -c conda-forge -y gperftools=2.10 + export LD_PRELOAD=$(echo ${LD_PRELOAD} ${CONDA_PREFIX}/lib/libtcmalloc.so) + export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + export SYCL_CACHE_PERSISTENT=1 + export ENABLE_SDP_FUSION=1 + ;; + Flex|FLEX|Arc|ARC) + export USE_XETLA=OFF + export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 + export SYCL_CACHE_PERSISTENT=1 + ;; + iGPU|IGPU|MTL) + export SYCL_CACHE_PERSISTENT=1 + export BIGDL_LLM_XMX_DISABLED=1 + ;; + *) + echo "Error: Invalid device type specified for GPU." + echo "" + display-help + return 1 + ;; + esac + else + echo "Error: ${file} not found" + return 1 + fi +else + unset_gpu_envs +fi + + +LLM_DIR=$(dirname $(python3 -c "import ipex_llm; print(ipex_llm.__file__)")) + if [ "${ENABLE_JEMALLOC}" -eq 1 ]; then file="${LLM_DIR}/libs/libjemalloc.so" if [ -f ${file} ]; then @@ -200,6 +230,9 @@ else fi if [ "${ENABLE_TCMALLOC}" -eq 1 ]; then + if [ "${DEVICE}" = "Arc" ] || [ "${DEVICE}" = "ARC" ]; then + echo "Warning: We do not recommend enabling tcmalloc on ARC, as this will cause segmentation fault" + fi file="${LLM_DIR}/libs/libtcmalloc.so" if [ -f ${file} ]; then echo "found tcmalloc in ${file}" @@ -207,37 +240,7 @@ if [ "${ENABLE_TCMALLOC}" -eq 1 ]; then fi fi -if [ "${ENABLE_GPU}" -eq 1 ]; then - for file in {"~","/opt"}"/intel/oneapi/setvars.sh"; do - if [ -f ${file} ]; then - echo "found oneapi in ${file}" - source ${file} - export USE_XETLA=OFF - export ENABLE_SDP_FUSION=1 - export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 - break - fi - done -else - unset USE_XETLA - unset ENABLE_SDP_FUSION - unset SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS -fi - export LD_PRELOAD=${LD_PRELOAD} -echo "+++++ Env Variables +++++" -echo "LD_PRELOAD = ${LD_PRELOAD}" -if [ "${ENABLE_IOMP}" -eq 1 ]; then - echo "OMP_NUM_THREADS = ${OMP_NUM_THREADS}" -fi -if [ "${ENABLE_JEMALLOC}" -eq 1 ]; then - echo "MALLOC_CONF = ${MALLOC_CONF}" -fi -if [ "${ENABLE_GPU}" -eq 1 ]; then - echo "USE_XETLA = ${USE_XETLA}" - echo "ENABLE_SDP_FUSION = ${ENABLE_SDP_FUSION}" - echo "SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS = ${SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS}" -fi -echo "+++++++++++++++++++++++++" +display-var echo "Complete."