[LLM] Add chatglm support for llm-cli (#8641)

* add chatglm build
* add llm-cli support
* update git
* install cmake
* add ut for chatglm
* add files to setup
* fix bug cause permission error when sf lack file
This commit is contained in:
xingyuan li 2023-08-01 14:30:17 +09:00 committed by GitHub
parent d6cbfc6d2c
commit cdfbe652ca
5 changed files with 111 additions and 4 deletions

View file

@ -41,10 +41,18 @@ jobs:
- name: Set access token - name: Set access token
run: | run: |
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
- name: Update Git
shell: bash
run: |
yum -y remove git
yum -y remove git-*
yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
yum -y install git
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
repository: "intel-analytics/llm.cpp" repository: "intel-analytics/llm.cpp"
token: ${{ env.github_access_token }} token: ${{ env.github_access_token }}
submodules: 'recursive'
- name: Install Build Environment - name: Install Build Environment
shell: bash shell: bash
run: | run: |
@ -72,6 +80,33 @@ jobs:
mv build/main-starcoder release/main-starcoder_avx2 mv build/main-starcoder release/main-starcoder_avx2
mv build/quantize-starcoder release/quantize-starcoder mv build/quantize-starcoder release/quantize-starcoder
mv build/libstarcoder.so release/libstarcoder_avx2.so mv build/libstarcoder.so release/libstarcoder_avx2.so
- name: Download cmake
shell: bash
run: |
export http_proxy=${HTTP_PROXY}
export https_proxy=${HTTPS_PROXY}
cd ..
if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
fi
else
wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
fi
tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
- name: Build Chatglm
shell: bash
run: |
cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
cd src/chatglm
scl enable devtoolset-11 "$cmake_path -B build"
scl enable devtoolset-11 "$cmake_path --build build --config Release"
- name: Move Chatglm binaries
shell: bash
run: |
mv src/chatglm/build/main release/main-chatglm_vnni
- name: Archive build files - name: Archive build files
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
with: with:
@ -102,10 +137,18 @@ jobs:
- name: Set access token - name: Set access token
run: | run: |
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV" echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
- name: Update Git
shell: bash
run: |
yum -y remove git
yum -y remove git-*
yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
yum -y install git
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
repository: "intel-analytics/llm.cpp" repository: "intel-analytics/llm.cpp"
token: ${{ env.github_access_token }} token: ${{ env.github_access_token }}
submodules: 'recursive'
- name: Install Build Environment - name: Install Build Environment
shell: bash shell: bash
run: | run: |
@ -133,6 +176,28 @@ jobs:
mv build/main-starcoder release/main-starcoder_avx512 mv build/main-starcoder release/main-starcoder_avx512
mv build/quantize-starcoder release/quantize-starcoder_avx512 mv build/quantize-starcoder release/quantize-starcoder_avx512
mv build/libstarcoder.so release/libstarcoder_avx512.so mv build/libstarcoder.so release/libstarcoder_avx512.so
# - name: Download cmake
# shell: bash
# run: |
# export http_proxy=${HTTP_PROXY}
# export https_proxy=${HTTPS_PROXY}
# cd ..
# if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
# actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
# if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
# wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
# fi
# else
# wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
# fi
# tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
# - name: Build Chatglm
# shell: bash
# run: |
# cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
# cd src/chatglm
# scl enable devtoolset-11 "$cmake_path -B build"
# scl enable devtoolset-11 "$cmake_path --build build --config Release"
- name: Archive build files - name: Archive build files
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
with: with:
@ -168,6 +233,7 @@ jobs:
with: with:
repository: "intel-analytics/llm.cpp" repository: "intel-analytics/llm.cpp"
token: ${{ env.github_access_token }} token: ${{ env.github_access_token }}
submodules: 'recursive'
- name: Add msbuild to PATH - name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v1.1 uses: microsoft/setup-msbuild@v1.1
with: with:
@ -210,6 +276,7 @@ jobs:
with: with:
repository: "intel-analytics/llm.cpp" repository: "intel-analytics/llm.cpp"
token: ${{ env.github_access_token }} token: ${{ env.github_access_token }}
submodules: 'recursive'
- name: Add msbuild to PATH - name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v1.1 uses: microsoft/setup-msbuild@v1.1
with: with:
@ -241,7 +308,16 @@ jobs:
# mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe # mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
- name: Build Chatglm
shell: powershell
run: |
cd src/chatglm
cmake -DAVXVNNI=ON -B build
cmake --build build --config Release
- name: Move Chatglm binaries
shell: powershell
run: |
mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe
- name: Archive build files - name: Archive build files
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
with: with:

View file

@ -41,6 +41,7 @@ env:
GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
CHATGLM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/chatglm2-6b-q4_0.bin
LLM_DIR: ./llm LLM_DIR: ./llm
ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/ ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/
@ -101,6 +102,10 @@ jobs:
echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..." echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
fi fi
if [ ! -d $CHATGLM_INT4_CKPT_PATH ]; then
echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..."
wget --no-verbose $LLM_FTP_URL/${CHATGLM_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
fi
if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then
echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..." echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR

View file

@ -76,6 +76,8 @@ windows_binarys = [
"quantize-gptneox_vnni.exe", "quantize-gptneox_vnni.exe",
"quantize-bloom_vnni.exe", "quantize-bloom_vnni.exe",
"quantize-starcoder_vnni.exe", "quantize-starcoder_vnni.exe",
"main-chatglm_vnni.exe",
] ]
linux_binarys = [ linux_binarys = [
"libllama_avx2.so", "libllama_avx2.so",
@ -97,7 +99,9 @@ linux_binarys = [
"main-starcoder_avx512", "main-starcoder_avx512",
"quantize-starcoder", "quantize-starcoder",
"libstarcoder_avx2.so", "libstarcoder_avx2.so",
"main-starcoder_avx2" "main-starcoder_avx2",
"main-chatglm_vnni",
] ]
@ -208,6 +212,8 @@ def setup_package():
# copy built files for github workflow # copy built files for github workflow
for built_file in glob.glob(os.path.join(github_artifact_dir, '*')): for built_file in glob.glob(os.path.join(github_artifact_dir, '*')):
print(f'Copy workflow built file: {built_file}') print(f'Copy workflow built file: {built_file}')
if change_permission:
os.chmod(built_file, 0o775)
shutil.copy(built_file, libs_dir) shutil.copy(built_file, libs_dir)
lib_urls = obtain_lib_urls() lib_urls = obtain_lib_urls()

View file

@ -28,7 +28,7 @@ function display_help {
echo "" echo ""
echo "options:" echo "options:"
echo " -h, --help show this help message" echo " -h, --help show this help message"
echo " -x, --model_family {llama,bloom,gptneox}" echo " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
echo " family name of model" echo " family name of model"
echo " -t N, --threads N number of threads to use during computation (default: 8)" echo " -t N, --threads N number of threads to use during computation (default: 8)"
echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)" echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
@ -59,6 +59,12 @@ function starcoder {
eval "$command" eval "$command"
} }
function chatglm {
command="$lib_dir/main-chatglm_vnni -t $threads -n $n_predict ${filteredArguments[*]}"
echo "$command"
eval "$command"
}
# Remove model_family/x parameter # Remove model_family/x parameter
filteredArguments=() filteredArguments=()
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
@ -99,6 +105,8 @@ elif [[ "$model_family" == "gptneox" ]]; then
gptneox gptneox
elif [[ "$model_family" == "starcoder" ]]; then elif [[ "$model_family" == "starcoder" ]]; then
starcoder starcoder
elif [[ "$model_family" == "chatglm" ]]; then
chatglm
else else
echo "Invalid model_family: $model_family" echo "Invalid model_family: $model_family"
display_help display_help

View file

@ -14,7 +14,7 @@ function Display-Help
Write-Host "" Write-Host ""
Write-Host "options:" Write-Host "options:"
Write-Host " -h, --help show this help message" Write-Host " -h, --help show this help message"
Write-Host " -x, --model_family {llama,bloom,gptneox}" Write-Host " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
Write-Host " family name of model" Write-Host " family name of model"
Write-Host " -t N, --threads N number of threads to use during computation (default: 8)" Write-Host " -t N, --threads N number of threads to use during computation (default: 8)"
Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)" Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
@ -53,6 +53,15 @@ function starcoder
Invoke-Expression $command Invoke-Expression $command
} }
function chatglm
{
$exec_file = "main-chatglm_vnni.exe"
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
Write-Host "$command"
Invoke-Expression $command
}
# Remove model_family/x parameter # Remove model_family/x parameter
$filteredArguments = @() $filteredArguments = @()
for ($i = 0; $i -lt $args.Length; $i++) { for ($i = 0; $i -lt $args.Length; $i++) {
@ -95,6 +104,9 @@ switch ($model_family)
"starcoder" { "starcoder" {
starcoder starcoder
} }
"chatglm" {
chatglm
}
default { default {
Write-Host "Invalid model_family: $model_family" Write-Host "Invalid model_family: $model_family"
Display-Help Display-Help