[LLM] Add chatglm support for llm-cli (#8641)

* add chatglm build * add llm-cli support * update git * install cmake * add ut for chatglm * add files to setup * fix bug cause permission error when sf lack file
2023-08-01 14:30:17 +09:00 · 2023-08-01 14:30:17 +09:00 · cdfbe652ca
commit cdfbe652ca
parent d6cbfc6d2c
5 changed files with 111 additions and 4 deletions
--- a/.github/workflows/llm-binary-build.yml
+++ b/.github/workflows/llm-binary-build.yml
@ -41,10 +41,18 @@ jobs:
      - name: Set access token
        run: |
          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+      - name: Update Git
+        shell: bash
+        run: |
+          yum -y remove git
+          yum -y remove git-*
+          yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
+          yum -y install git
      - uses: actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          token: ${{ env.github_access_token }}
+          submodules: 'recursive'
      - name: Install Build Environment
        shell: bash
        run: |
@ -72,6 +80,33 @@ jobs:
          mv build/main-starcoder release/main-starcoder_avx2
          mv build/quantize-starcoder release/quantize-starcoder
          mv build/libstarcoder.so release/libstarcoder_avx2.so
+      - name: Download cmake
+        shell: bash
+        run: |
+          export http_proxy=${HTTP_PROXY}
+          export https_proxy=${HTTPS_PROXY}
+          cd ..
+          if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
+            actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
+            if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
+              wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
+            fi
+          else
+            wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
+          fi
+          tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
+      - name: Build Chatglm
+        shell: bash
+        run: |
+          cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
+          cd src/chatglm
+          scl enable devtoolset-11 "$cmake_path -B build"
+          scl enable devtoolset-11 "$cmake_path --build build --config Release"
+      - name: Move Chatglm binaries
+        shell: bash
+        run: |
+          mv src/chatglm/build/main release/main-chatglm_vnni
+
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
@ -102,10 +137,18 @@ jobs:
      - name: Set access token
        run: |
          echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
+      - name: Update Git
+        shell: bash
+        run: |
+          yum -y remove git
+          yum -y remove git-*
+          yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
+          yum -y install git
      - uses: actions/checkout@v3
        with:
          repository: "intel-analytics/llm.cpp"
          token: ${{ env.github_access_token }}
+          submodules: 'recursive'
      - name: Install Build Environment
        shell: bash
        run: |
@ -133,6 +176,28 @@ jobs:
          mv build/main-starcoder release/main-starcoder_avx512
          mv build/quantize-starcoder release/quantize-starcoder_avx512
          mv build/libstarcoder.so release/libstarcoder_avx512.so
+      # - name: Download cmake
+      #   shell: bash
+      #   run: |
+      #     export http_proxy=${HTTP_PROXY}
+      #     export https_proxy=${HTTPS_PROXY}
+      #     cd ..
+      #     if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
+      #       actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
+      #       if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
+      #         wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
+      #       fi
+      #     else
+      #       wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
+      #     fi
+      #     tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
+      # - name: Build Chatglm
+      #   shell: bash
+      #   run: |
+      #     cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
+      #     cd src/chatglm
+      #     scl enable devtoolset-11 "$cmake_path -B build"
+      #     scl enable devtoolset-11 "$cmake_path --build build --config Release"
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
@ -168,6 +233,7 @@ jobs:
        with:
          repository: "intel-analytics/llm.cpp"
          token: ${{ env.github_access_token }}
+          submodules: 'recursive'
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.1
        with:
@ -210,6 +276,7 @@ jobs:
        with:
          repository: "intel-analytics/llm.cpp"
          token: ${{ env.github_access_token }}
+          submodules: 'recursive'
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.1
        with:
@ -241,7 +308,16 @@ jobs:
          # mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
          mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
          mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
-
+      - name: Build Chatglm
+        shell: powershell
+        run: |
+          cd src/chatglm
+          cmake -DAVXVNNI=ON -B build
+          cmake --build build --config Release
+      - name: Move Chatglm binaries
+        shell: powershell
+        run: |
+          mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe
      - name: Archive build files
        uses: actions/upload-artifact@v3
        with:
--- a/.github/workflows/llm_unit_tests_linux.yml
+++ b/.github/workflows/llm_unit_tests_linux.yml
@ -41,6 +41,7 @@ env:
  GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
  BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
  STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
+  CHATGLM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/chatglm2-6b-q4_0.bin

  LLM_DIR: ./llm
  ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/
@ -101,6 +102,10 @@ jobs:
            echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
            wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
          fi
+          if [ ! -d $CHATGLM_INT4_CKPT_PATH ]; then
+            echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..."
+            wget --no-verbose $LLM_FTP_URL/${CHATGLM_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
+          fi
          if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then
            echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
            wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR
--- a/python/llm/setup.py
+++ b/python/llm/setup.py
@ -76,6 +76,8 @@ windows_binarys = [
    "quantize-gptneox_vnni.exe",
    "quantize-bloom_vnni.exe",
    "quantize-starcoder_vnni.exe",
+    
+    "main-chatglm_vnni.exe",
 ]
 linux_binarys = [
    "libllama_avx2.so",
@ -97,7 +99,9 @@ linux_binarys = [
    "main-starcoder_avx512",
    "quantize-starcoder",
    "libstarcoder_avx2.so",
-    "main-starcoder_avx2"
+    "main-starcoder_avx2",
+    
+    "main-chatglm_vnni",
 ]


@ -208,6 +212,8 @@ def setup_package():
    # copy built files for github workflow
    for built_file in glob.glob(os.path.join(github_artifact_dir, '*')):
        print(f'Copy workflow built file: {built_file}')
+        if change_permission:
+            os.chmod(built_file, 0o775)
        shutil.copy(built_file, libs_dir)

    lib_urls = obtain_lib_urls()
--- a/python/llm/src/bigdl/llm/cli/llm-cli
+++ b/python/llm/src/bigdl/llm/cli/llm-cli
@ -28,7 +28,7 @@ function display_help {
  echo ""
  echo "options:"
  echo "  -h, --help           show this help message"
-  echo "  -x, --model_family {llama,bloom,gptneox}"
+  echo "  -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
  echo "                       family name of model"
  echo "  -t N, --threads N    number of threads to use during computation (default: 8)"
  echo "  -n N, --n_predict N  number of tokens to predict (default: 128, -1 = infinity)"
@ -59,6 +59,12 @@ function starcoder {
  eval "$command"
 }

+function chatglm {
+  command="$lib_dir/main-chatglm_vnni -t $threads -n $n_predict ${filteredArguments[*]}"
+  echo "$command"
+  eval "$command"
+}
+
 # Remove model_family/x parameter
 filteredArguments=()
 while [[ $# -gt 0 ]]; do
@ -99,6 +105,8 @@ elif [[ "$model_family" == "gptneox" ]]; then
  gptneox
 elif [[ "$model_family" == "starcoder" ]]; then
  starcoder
+elif [[ "$model_family" == "chatglm" ]]; then
+  chatglm
 else
  echo "Invalid model_family: $model_family"
  display_help
--- a/python/llm/src/bigdl/llm/cli/llm-cli.ps1
+++ b/python/llm/src/bigdl/llm/cli/llm-cli.ps1
@ -14,7 +14,7 @@ function Display-Help
    Write-Host ""
    Write-Host "options:"
    Write-Host "  -h, --help           show this help message"
-    Write-Host "  -x, --model_family {llama,bloom,gptneox}"
+    Write-Host "  -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
    Write-Host "                       family name of model"
    Write-Host "  -t N, --threads N    number of threads to use during computation (default: 8)"
    Write-Host "  -n N, --n_predict N  number of tokens to predict (default: 128, -1 = infinity)"
@ -53,6 +53,15 @@ function starcoder
    Invoke-Expression $command
 }

+function chatglm
+{
+    $exec_file = "main-chatglm_vnni.exe"
+    $command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
+    Write-Host "$command"
+    Invoke-Expression $command
+}
+
+
 # Remove model_family/x parameter
 $filteredArguments = @()
 for ($i = 0; $i -lt $args.Length; $i++) {
@ -95,6 +104,9 @@ switch ($model_family)
    "starcoder" {
        starcoder
    }
+    "chatglm" {
+        chatglm
+    }
    default {
        Write-Host "Invalid model_family: $model_family"
        Display-Help