diff --git a/.github/workflows/llm-binary-build.yml b/.github/workflows/llm-binary-build.yml index 9df8b36a..91dc9606 100644 --- a/.github/workflows/llm-binary-build.yml +++ b/.github/workflows/llm-binary-build.yml @@ -41,10 +41,18 @@ jobs: - name: Set access token run: | echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" + - name: Update Git + shell: bash + run: | + yum -y remove git + yum -y remove git-* + yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true + yum -y install git - uses: actions/checkout@v3 with: repository: "intel-analytics/llm.cpp" token: ${{ env.github_access_token }} + submodules: 'recursive' - name: Install Build Environment shell: bash run: | @@ -72,6 +80,33 @@ jobs: mv build/main-starcoder release/main-starcoder_avx2 mv build/quantize-starcoder release/quantize-starcoder mv build/libstarcoder.so release/libstarcoder_avx2.so + - name: Download cmake + shell: bash + run: | + export http_proxy=${HTTP_PROXY} + export https_proxy=${HTTPS_PROXY} + cd .. + if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then + actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}') + if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then + wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz + fi + else + wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz + fi + tar zxvf cmake-3.27.1-linux-x86_64.tar.gz + - name: Build Chatglm + shell: bash + run: | + cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake" + cd src/chatglm + scl enable devtoolset-11 "$cmake_path -B build" + scl enable devtoolset-11 "$cmake_path --build build --config Release" + - name: Move Chatglm binaries + shell: bash + run: | + mv src/chatglm/build/main release/main-chatglm_vnni + - name: Archive build files uses: actions/upload-artifact@v3 with: @@ -102,10 +137,18 @@ jobs: - name: Set access token run: | echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" + - name: Update Git + shell: bash + run: | + yum -y remove git + yum -y remove git-* + yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true + yum -y install git - uses: actions/checkout@v3 with: repository: "intel-analytics/llm.cpp" token: ${{ env.github_access_token }} + submodules: 'recursive' - name: Install Build Environment shell: bash run: | @@ -133,6 +176,28 @@ jobs: mv build/main-starcoder release/main-starcoder_avx512 mv build/quantize-starcoder release/quantize-starcoder_avx512 mv build/libstarcoder.so release/libstarcoder_avx512.so + # - name: Download cmake + # shell: bash + # run: | + # export http_proxy=${HTTP_PROXY} + # export https_proxy=${HTTPS_PROXY} + # cd .. + # if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then + # actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}') + # if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then + # wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz + # fi + # else + # wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz + # fi + # tar zxvf cmake-3.27.1-linux-x86_64.tar.gz + # - name: Build Chatglm + # shell: bash + # run: | + # cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake" + # cd src/chatglm + # scl enable devtoolset-11 "$cmake_path -B build" + # scl enable devtoolset-11 "$cmake_path --build build --config Release" - name: Archive build files uses: actions/upload-artifact@v3 with: @@ -168,6 +233,7 @@ jobs: with: repository: "intel-analytics/llm.cpp" token: ${{ env.github_access_token }} + submodules: 'recursive' - name: Add msbuild to PATH uses: microsoft/setup-msbuild@v1.1 with: @@ -210,6 +276,7 @@ jobs: with: repository: "intel-analytics/llm.cpp" token: ${{ env.github_access_token }} + submodules: 'recursive' - name: Add msbuild to PATH uses: microsoft/setup-msbuild@v1.1 with: @@ -241,7 +308,16 @@ jobs: # mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe mv build/Release/starcoder.dll release/libstarcoder_vnni.dll - + - name: Build Chatglm + shell: powershell + run: | + cd src/chatglm + cmake -DAVXVNNI=ON -B build + cmake --build build --config Release + - name: Move Chatglm binaries + shell: powershell + run: | + mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe - name: Archive build files uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/llm_unit_tests_linux.yml b/.github/workflows/llm_unit_tests_linux.yml index e4a99514..91ffbe09 100644 --- a/.github/workflows/llm_unit_tests_linux.yml +++ b/.github/workflows/llm_unit_tests_linux.yml @@ -41,6 +41,7 @@ env: GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin + CHATGLM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/chatglm2-6b-q4_0.bin LLM_DIR: ./llm ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/ @@ -101,6 +102,10 @@ jobs: echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..." wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR fi + if [ ! -d $CHATGLM_INT4_CKPT_PATH ]; then + echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..." + wget --no-verbose $LLM_FTP_URL/${CHATGLM_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR + fi if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR diff --git a/python/llm/setup.py b/python/llm/setup.py index 99bc2f4c..536ae5c4 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -76,6 +76,8 @@ windows_binarys = [ "quantize-gptneox_vnni.exe", "quantize-bloom_vnni.exe", "quantize-starcoder_vnni.exe", + + "main-chatglm_vnni.exe", ] linux_binarys = [ "libllama_avx2.so", @@ -97,7 +99,9 @@ linux_binarys = [ "main-starcoder_avx512", "quantize-starcoder", "libstarcoder_avx2.so", - "main-starcoder_avx2" + "main-starcoder_avx2", + + "main-chatglm_vnni", ] @@ -208,6 +212,8 @@ def setup_package(): # copy built files for github workflow for built_file in glob.glob(os.path.join(github_artifact_dir, '*')): print(f'Copy workflow built file: {built_file}') + if change_permission: + os.chmod(built_file, 0o775) shutil.copy(built_file, libs_dir) lib_urls = obtain_lib_urls() diff --git a/python/llm/src/bigdl/llm/cli/llm-cli b/python/llm/src/bigdl/llm/cli/llm-cli index 6fd86cae..2e753642 100755 --- a/python/llm/src/bigdl/llm/cli/llm-cli +++ b/python/llm/src/bigdl/llm/cli/llm-cli @@ -28,7 +28,7 @@ function display_help { echo "" echo "options:" echo " -h, --help show this help message" - echo " -x, --model_family {llama,bloom,gptneox}" + echo " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}" echo " family name of model" echo " -t N, --threads N number of threads to use during computation (default: 8)" echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)" @@ -59,6 +59,12 @@ function starcoder { eval "$command" } +function chatglm { + command="$lib_dir/main-chatglm_vnni -t $threads -n $n_predict ${filteredArguments[*]}" + echo "$command" + eval "$command" +} + # Remove model_family/x parameter filteredArguments=() while [[ $# -gt 0 ]]; do @@ -99,6 +105,8 @@ elif [[ "$model_family" == "gptneox" ]]; then gptneox elif [[ "$model_family" == "starcoder" ]]; then starcoder +elif [[ "$model_family" == "chatglm" ]]; then + chatglm else echo "Invalid model_family: $model_family" display_help diff --git a/python/llm/src/bigdl/llm/cli/llm-cli.ps1 b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 index 40160e10..c30138c2 100755 --- a/python/llm/src/bigdl/llm/cli/llm-cli.ps1 +++ b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 @@ -14,7 +14,7 @@ function Display-Help Write-Host "" Write-Host "options:" Write-Host " -h, --help show this help message" - Write-Host " -x, --model_family {llama,bloom,gptneox}" + Write-Host " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}" Write-Host " family name of model" Write-Host " -t N, --threads N number of threads to use during computation (default: 8)" Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)" @@ -53,6 +53,15 @@ function starcoder Invoke-Expression $command } +function chatglm +{ + $exec_file = "main-chatglm_vnni.exe" + $command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments" + Write-Host "$command" + Invoke-Expression $command +} + + # Remove model_family/x parameter $filteredArguments = @() for ($i = 0; $i -lt $args.Length; $i++) { @@ -95,6 +104,9 @@ switch ($model_family) "starcoder" { starcoder } + "chatglm" { + chatglm + } default { Write-Host "Invalid model_family: $model_family" Display-Help