[LLM] Add chatglm support for llm-cli (#8641)
* add chatglm build * add llm-cli support * update git * install cmake * add ut for chatglm * add files to setup * fix bug cause permission error when sf lack file
This commit is contained in:
parent
d6cbfc6d2c
commit
cdfbe652ca
5 changed files with 111 additions and 4 deletions
78
.github/workflows/llm-binary-build.yml
vendored
78
.github/workflows/llm-binary-build.yml
vendored
|
|
@ -41,10 +41,18 @@ jobs:
|
||||||
- name: Set access token
|
- name: Set access token
|
||||||
run: |
|
run: |
|
||||||
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
|
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
|
||||||
|
- name: Update Git
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
yum -y remove git
|
||||||
|
yum -y remove git-*
|
||||||
|
yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
|
||||||
|
yum -y install git
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
repository: "intel-analytics/llm.cpp"
|
repository: "intel-analytics/llm.cpp"
|
||||||
token: ${{ env.github_access_token }}
|
token: ${{ env.github_access_token }}
|
||||||
|
submodules: 'recursive'
|
||||||
- name: Install Build Environment
|
- name: Install Build Environment
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -72,6 +80,33 @@ jobs:
|
||||||
mv build/main-starcoder release/main-starcoder_avx2
|
mv build/main-starcoder release/main-starcoder_avx2
|
||||||
mv build/quantize-starcoder release/quantize-starcoder
|
mv build/quantize-starcoder release/quantize-starcoder
|
||||||
mv build/libstarcoder.so release/libstarcoder_avx2.so
|
mv build/libstarcoder.so release/libstarcoder_avx2.so
|
||||||
|
- name: Download cmake
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export http_proxy=${HTTP_PROXY}
|
||||||
|
export https_proxy=${HTTPS_PROXY}
|
||||||
|
cd ..
|
||||||
|
if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
|
||||||
|
actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
|
||||||
|
if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||||
|
fi
|
||||||
|
tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
|
||||||
|
- name: Build Chatglm
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
|
||||||
|
cd src/chatglm
|
||||||
|
scl enable devtoolset-11 "$cmake_path -B build"
|
||||||
|
scl enable devtoolset-11 "$cmake_path --build build --config Release"
|
||||||
|
- name: Move Chatglm binaries
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
mv src/chatglm/build/main release/main-chatglm_vnni
|
||||||
|
|
||||||
- name: Archive build files
|
- name: Archive build files
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
|
@ -102,10 +137,18 @@ jobs:
|
||||||
- name: Set access token
|
- name: Set access token
|
||||||
run: |
|
run: |
|
||||||
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
|
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
|
||||||
|
- name: Update Git
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
yum -y remove git
|
||||||
|
yum -y remove git-*
|
||||||
|
yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
|
||||||
|
yum -y install git
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
repository: "intel-analytics/llm.cpp"
|
repository: "intel-analytics/llm.cpp"
|
||||||
token: ${{ env.github_access_token }}
|
token: ${{ env.github_access_token }}
|
||||||
|
submodules: 'recursive'
|
||||||
- name: Install Build Environment
|
- name: Install Build Environment
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -133,6 +176,28 @@ jobs:
|
||||||
mv build/main-starcoder release/main-starcoder_avx512
|
mv build/main-starcoder release/main-starcoder_avx512
|
||||||
mv build/quantize-starcoder release/quantize-starcoder_avx512
|
mv build/quantize-starcoder release/quantize-starcoder_avx512
|
||||||
mv build/libstarcoder.so release/libstarcoder_avx512.so
|
mv build/libstarcoder.so release/libstarcoder_avx512.so
|
||||||
|
# - name: Download cmake
|
||||||
|
# shell: bash
|
||||||
|
# run: |
|
||||||
|
# export http_proxy=${HTTP_PROXY}
|
||||||
|
# export https_proxy=${HTTPS_PROXY}
|
||||||
|
# cd ..
|
||||||
|
# if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
|
||||||
|
# actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
|
||||||
|
# if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
|
||||||
|
# wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||||
|
# fi
|
||||||
|
# else
|
||||||
|
# wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||||
|
# fi
|
||||||
|
# tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
|
||||||
|
# - name: Build Chatglm
|
||||||
|
# shell: bash
|
||||||
|
# run: |
|
||||||
|
# cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
|
||||||
|
# cd src/chatglm
|
||||||
|
# scl enable devtoolset-11 "$cmake_path -B build"
|
||||||
|
# scl enable devtoolset-11 "$cmake_path --build build --config Release"
|
||||||
- name: Archive build files
|
- name: Archive build files
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
|
@ -168,6 +233,7 @@ jobs:
|
||||||
with:
|
with:
|
||||||
repository: "intel-analytics/llm.cpp"
|
repository: "intel-analytics/llm.cpp"
|
||||||
token: ${{ env.github_access_token }}
|
token: ${{ env.github_access_token }}
|
||||||
|
submodules: 'recursive'
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v1.1
|
uses: microsoft/setup-msbuild@v1.1
|
||||||
with:
|
with:
|
||||||
|
|
@ -210,6 +276,7 @@ jobs:
|
||||||
with:
|
with:
|
||||||
repository: "intel-analytics/llm.cpp"
|
repository: "intel-analytics/llm.cpp"
|
||||||
token: ${{ env.github_access_token }}
|
token: ${{ env.github_access_token }}
|
||||||
|
submodules: 'recursive'
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v1.1
|
uses: microsoft/setup-msbuild@v1.1
|
||||||
with:
|
with:
|
||||||
|
|
@ -241,7 +308,16 @@ jobs:
|
||||||
# mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
|
# mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
|
||||||
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
|
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
|
||||||
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
|
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
|
||||||
|
- name: Build Chatglm
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
cd src/chatglm
|
||||||
|
cmake -DAVXVNNI=ON -B build
|
||||||
|
cmake --build build --config Release
|
||||||
|
- name: Move Chatglm binaries
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe
|
||||||
- name: Archive build files
|
- name: Archive build files
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
|
|
||||||
5
.github/workflows/llm_unit_tests_linux.yml
vendored
5
.github/workflows/llm_unit_tests_linux.yml
vendored
|
|
@ -41,6 +41,7 @@ env:
|
||||||
GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
|
GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
|
||||||
BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
|
BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
|
||||||
STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
|
STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
|
||||||
|
CHATGLM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/chatglm2-6b-q4_0.bin
|
||||||
|
|
||||||
LLM_DIR: ./llm
|
LLM_DIR: ./llm
|
||||||
ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/
|
ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/
|
||||||
|
|
@ -101,6 +102,10 @@ jobs:
|
||||||
echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||||
wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
|
wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
|
||||||
fi
|
fi
|
||||||
|
if [ ! -d $CHATGLM_INT4_CKPT_PATH ]; then
|
||||||
|
echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||||
|
wget --no-verbose $LLM_FTP_URL/${CHATGLM_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
|
||||||
|
fi
|
||||||
if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then
|
if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then
|
||||||
echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
|
echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
|
||||||
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR
|
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR
|
||||||
|
|
|
||||||
|
|
@ -76,6 +76,8 @@ windows_binarys = [
|
||||||
"quantize-gptneox_vnni.exe",
|
"quantize-gptneox_vnni.exe",
|
||||||
"quantize-bloom_vnni.exe",
|
"quantize-bloom_vnni.exe",
|
||||||
"quantize-starcoder_vnni.exe",
|
"quantize-starcoder_vnni.exe",
|
||||||
|
|
||||||
|
"main-chatglm_vnni.exe",
|
||||||
]
|
]
|
||||||
linux_binarys = [
|
linux_binarys = [
|
||||||
"libllama_avx2.so",
|
"libllama_avx2.so",
|
||||||
|
|
@ -97,7 +99,9 @@ linux_binarys = [
|
||||||
"main-starcoder_avx512",
|
"main-starcoder_avx512",
|
||||||
"quantize-starcoder",
|
"quantize-starcoder",
|
||||||
"libstarcoder_avx2.so",
|
"libstarcoder_avx2.so",
|
||||||
"main-starcoder_avx2"
|
"main-starcoder_avx2",
|
||||||
|
|
||||||
|
"main-chatglm_vnni",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -208,6 +212,8 @@ def setup_package():
|
||||||
# copy built files for github workflow
|
# copy built files for github workflow
|
||||||
for built_file in glob.glob(os.path.join(github_artifact_dir, '*')):
|
for built_file in glob.glob(os.path.join(github_artifact_dir, '*')):
|
||||||
print(f'Copy workflow built file: {built_file}')
|
print(f'Copy workflow built file: {built_file}')
|
||||||
|
if change_permission:
|
||||||
|
os.chmod(built_file, 0o775)
|
||||||
shutil.copy(built_file, libs_dir)
|
shutil.copy(built_file, libs_dir)
|
||||||
|
|
||||||
lib_urls = obtain_lib_urls()
|
lib_urls = obtain_lib_urls()
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ function display_help {
|
||||||
echo ""
|
echo ""
|
||||||
echo "options:"
|
echo "options:"
|
||||||
echo " -h, --help show this help message"
|
echo " -h, --help show this help message"
|
||||||
echo " -x, --model_family {llama,bloom,gptneox}"
|
echo " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
|
||||||
echo " family name of model"
|
echo " family name of model"
|
||||||
echo " -t N, --threads N number of threads to use during computation (default: 8)"
|
echo " -t N, --threads N number of threads to use during computation (default: 8)"
|
||||||
echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
||||||
|
|
@ -59,6 +59,12 @@ function starcoder {
|
||||||
eval "$command"
|
eval "$command"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function chatglm {
|
||||||
|
command="$lib_dir/main-chatglm_vnni -t $threads -n $n_predict ${filteredArguments[*]}"
|
||||||
|
echo "$command"
|
||||||
|
eval "$command"
|
||||||
|
}
|
||||||
|
|
||||||
# Remove model_family/x parameter
|
# Remove model_family/x parameter
|
||||||
filteredArguments=()
|
filteredArguments=()
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
|
|
@ -99,6 +105,8 @@ elif [[ "$model_family" == "gptneox" ]]; then
|
||||||
gptneox
|
gptneox
|
||||||
elif [[ "$model_family" == "starcoder" ]]; then
|
elif [[ "$model_family" == "starcoder" ]]; then
|
||||||
starcoder
|
starcoder
|
||||||
|
elif [[ "$model_family" == "chatglm" ]]; then
|
||||||
|
chatglm
|
||||||
else
|
else
|
||||||
echo "Invalid model_family: $model_family"
|
echo "Invalid model_family: $model_family"
|
||||||
display_help
|
display_help
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ function Display-Help
|
||||||
Write-Host ""
|
Write-Host ""
|
||||||
Write-Host "options:"
|
Write-Host "options:"
|
||||||
Write-Host " -h, --help show this help message"
|
Write-Host " -h, --help show this help message"
|
||||||
Write-Host " -x, --model_family {llama,bloom,gptneox}"
|
Write-Host " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
|
||||||
Write-Host " family name of model"
|
Write-Host " family name of model"
|
||||||
Write-Host " -t N, --threads N number of threads to use during computation (default: 8)"
|
Write-Host " -t N, --threads N number of threads to use during computation (default: 8)"
|
||||||
Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
||||||
|
|
@ -53,6 +53,15 @@ function starcoder
|
||||||
Invoke-Expression $command
|
Invoke-Expression $command
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function chatglm
|
||||||
|
{
|
||||||
|
$exec_file = "main-chatglm_vnni.exe"
|
||||||
|
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||||
|
Write-Host "$command"
|
||||||
|
Invoke-Expression $command
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Remove model_family/x parameter
|
# Remove model_family/x parameter
|
||||||
$filteredArguments = @()
|
$filteredArguments = @()
|
||||||
for ($i = 0; $i -lt $args.Length; $i++) {
|
for ($i = 0; $i -lt $args.Length; $i++) {
|
||||||
|
|
@ -95,6 +104,9 @@ switch ($model_family)
|
||||||
"starcoder" {
|
"starcoder" {
|
||||||
starcoder
|
starcoder
|
||||||
}
|
}
|
||||||
|
"chatglm" {
|
||||||
|
chatglm
|
||||||
|
}
|
||||||
default {
|
default {
|
||||||
Write-Host "Invalid model_family: $model_family"
|
Write-Host "Invalid model_family: $model_family"
|
||||||
Display-Help
|
Display-Help
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue