[LLM] Add chatglm support for llm-cli (#8641)
* add chatglm build * add llm-cli support * update git * install cmake * add ut for chatglm * add files to setup * fix bug cause permission error when sf lack file
This commit is contained in:
parent
d6cbfc6d2c
commit
cdfbe652ca
5 changed files with 111 additions and 4 deletions
78
.github/workflows/llm-binary-build.yml
vendored
78
.github/workflows/llm-binary-build.yml
vendored
|
|
@ -41,10 +41,18 @@ jobs:
|
|||
- name: Set access token
|
||||
run: |
|
||||
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
|
||||
- name: Update Git
|
||||
shell: bash
|
||||
run: |
|
||||
yum -y remove git
|
||||
yum -y remove git-*
|
||||
yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
|
||||
yum -y install git
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: "intel-analytics/llm.cpp"
|
||||
token: ${{ env.github_access_token }}
|
||||
submodules: 'recursive'
|
||||
- name: Install Build Environment
|
||||
shell: bash
|
||||
run: |
|
||||
|
|
@ -72,6 +80,33 @@ jobs:
|
|||
mv build/main-starcoder release/main-starcoder_avx2
|
||||
mv build/quantize-starcoder release/quantize-starcoder
|
||||
mv build/libstarcoder.so release/libstarcoder_avx2.so
|
||||
- name: Download cmake
|
||||
shell: bash
|
||||
run: |
|
||||
export http_proxy=${HTTP_PROXY}
|
||||
export https_proxy=${HTTPS_PROXY}
|
||||
cd ..
|
||||
if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
|
||||
actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
|
||||
if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||
fi
|
||||
else
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||
fi
|
||||
tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
|
||||
- name: Build Chatglm
|
||||
shell: bash
|
||||
run: |
|
||||
cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
|
||||
cd src/chatglm
|
||||
scl enable devtoolset-11 "$cmake_path -B build"
|
||||
scl enable devtoolset-11 "$cmake_path --build build --config Release"
|
||||
- name: Move Chatglm binaries
|
||||
shell: bash
|
||||
run: |
|
||||
mv src/chatglm/build/main release/main-chatglm_vnni
|
||||
|
||||
- name: Archive build files
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
|
|
@ -102,10 +137,18 @@ jobs:
|
|||
- name: Set access token
|
||||
run: |
|
||||
echo "github_access_token=${GITHUB_ACCESS_TOKEN}" >> "$GITHUB_ENV"
|
||||
- name: Update Git
|
||||
shell: bash
|
||||
run: |
|
||||
yum -y remove git
|
||||
yum -y remove git-*
|
||||
yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm || true
|
||||
yum -y install git
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: "intel-analytics/llm.cpp"
|
||||
token: ${{ env.github_access_token }}
|
||||
submodules: 'recursive'
|
||||
- name: Install Build Environment
|
||||
shell: bash
|
||||
run: |
|
||||
|
|
@ -133,6 +176,28 @@ jobs:
|
|||
mv build/main-starcoder release/main-starcoder_avx512
|
||||
mv build/quantize-starcoder release/quantize-starcoder_avx512
|
||||
mv build/libstarcoder.so release/libstarcoder_avx512.so
|
||||
# - name: Download cmake
|
||||
# shell: bash
|
||||
# run: |
|
||||
# export http_proxy=${HTTP_PROXY}
|
||||
# export https_proxy=${HTTPS_PROXY}
|
||||
# cd ..
|
||||
# if [ -f "cmake-3.27.1-linux-x86_64.tar.gz" ]; then
|
||||
# actual_sha256=$(sha256sum "cmake-3.27.1-linux-x86_64.tar.gz" | awk '{print $1}')
|
||||
# if [ "$actual_sha256" != "9fef63e1cf87cab1153f9433045df2e43c336e462518b0f5e52d2cc91d762cff" ]; then
|
||||
# wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||
# fi
|
||||
# else
|
||||
# wget https://github.com/Kitware/CMake/releases/download/v3.27.1/cmake-3.27.1-linux-x86_64.tar.gz
|
||||
# fi
|
||||
# tar zxvf cmake-3.27.1-linux-x86_64.tar.gz
|
||||
# - name: Build Chatglm
|
||||
# shell: bash
|
||||
# run: |
|
||||
# cmake_path="$(pwd)/../cmake-3.27.1-linux-x86_64/bin/cmake"
|
||||
# cd src/chatglm
|
||||
# scl enable devtoolset-11 "$cmake_path -B build"
|
||||
# scl enable devtoolset-11 "$cmake_path --build build --config Release"
|
||||
- name: Archive build files
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
|
|
@ -168,6 +233,7 @@ jobs:
|
|||
with:
|
||||
repository: "intel-analytics/llm.cpp"
|
||||
token: ${{ env.github_access_token }}
|
||||
submodules: 'recursive'
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v1.1
|
||||
with:
|
||||
|
|
@ -210,6 +276,7 @@ jobs:
|
|||
with:
|
||||
repository: "intel-analytics/llm.cpp"
|
||||
token: ${{ env.github_access_token }}
|
||||
submodules: 'recursive'
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v1.1
|
||||
with:
|
||||
|
|
@ -241,7 +308,16 @@ jobs:
|
|||
# mv build/Release/main-starcoder.exe release/main-starcoder_vnni.exe
|
||||
mv build/Release/quantize-starcoder.exe release/quantize-starcoder_vnni.exe
|
||||
mv build/Release/starcoder.dll release/libstarcoder_vnni.dll
|
||||
|
||||
- name: Build Chatglm
|
||||
shell: powershell
|
||||
run: |
|
||||
cd src/chatglm
|
||||
cmake -DAVXVNNI=ON -B build
|
||||
cmake --build build --config Release
|
||||
- name: Move Chatglm binaries
|
||||
shell: powershell
|
||||
run: |
|
||||
mv src/chatglm/build/Release/main.exe release/main-chatglm_vnni.exe
|
||||
- name: Archive build files
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
|
|
|
|||
5
.github/workflows/llm_unit_tests_linux.yml
vendored
5
.github/workflows/llm_unit_tests_linux.yml
vendored
|
|
@ -41,6 +41,7 @@ env:
|
|||
GPTNEOX_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_redpajama_7b_q4_0.bin
|
||||
BLOOM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_bloom_7b_q4_0.bin
|
||||
STARCODER_INT4_CKPT_PATH: ./llm/ggml-actions/stable/bigdl_llm_santacoder_1b_q4_0.bin
|
||||
CHATGLM_INT4_CKPT_PATH: ./llm/ggml-actions/stable/chatglm2-6b-q4_0.bin
|
||||
|
||||
LLM_DIR: ./llm
|
||||
ORIGINAL_CHATGLM2_6B_PATH: ./llm/chatglm2-6b/
|
||||
|
|
@ -101,6 +102,10 @@ jobs:
|
|||
echo "Directory $STARCODER_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||
wget --no-verbose $LLM_FTP_URL/${STARCODER_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
|
||||
fi
|
||||
if [ ! -d $CHATGLM_INT4_CKPT_PATH ]; then
|
||||
echo "Directory $CHATGLM_INT4_CKPT_PATH not found. Downloading from FTP server..."
|
||||
wget --no-verbose $LLM_FTP_URL/${CHATGLM_INT4_CKPT_PATH:2} -P $INT4_CKPT_DIR
|
||||
fi
|
||||
if [ ! -d $ORIGINAL_CHATGLM2_6B_PATH ]; then
|
||||
echo "Directory $ORIGINAL_CHATGLM2_6B_PATH not found. Downloading from FTP server..."
|
||||
wget -r -nH --no-verbose --cut-dirs=1 $LLM_FTP_URL/${ORIGINAL_CHATGLM2_6B_PATH:2} -P $LLM_DIR
|
||||
|
|
|
|||
|
|
@ -76,6 +76,8 @@ windows_binarys = [
|
|||
"quantize-gptneox_vnni.exe",
|
||||
"quantize-bloom_vnni.exe",
|
||||
"quantize-starcoder_vnni.exe",
|
||||
|
||||
"main-chatglm_vnni.exe",
|
||||
]
|
||||
linux_binarys = [
|
||||
"libllama_avx2.so",
|
||||
|
|
@ -97,7 +99,9 @@ linux_binarys = [
|
|||
"main-starcoder_avx512",
|
||||
"quantize-starcoder",
|
||||
"libstarcoder_avx2.so",
|
||||
"main-starcoder_avx2"
|
||||
"main-starcoder_avx2",
|
||||
|
||||
"main-chatglm_vnni",
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -208,6 +212,8 @@ def setup_package():
|
|||
# copy built files for github workflow
|
||||
for built_file in glob.glob(os.path.join(github_artifact_dir, '*')):
|
||||
print(f'Copy workflow built file: {built_file}')
|
||||
if change_permission:
|
||||
os.chmod(built_file, 0o775)
|
||||
shutil.copy(built_file, libs_dir)
|
||||
|
||||
lib_urls = obtain_lib_urls()
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ function display_help {
|
|||
echo ""
|
||||
echo "options:"
|
||||
echo " -h, --help show this help message"
|
||||
echo " -x, --model_family {llama,bloom,gptneox}"
|
||||
echo " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
|
||||
echo " family name of model"
|
||||
echo " -t N, --threads N number of threads to use during computation (default: 8)"
|
||||
echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
||||
|
|
@ -59,6 +59,12 @@ function starcoder {
|
|||
eval "$command"
|
||||
}
|
||||
|
||||
function chatglm {
|
||||
command="$lib_dir/main-chatglm_vnni -t $threads -n $n_predict ${filteredArguments[*]}"
|
||||
echo "$command"
|
||||
eval "$command"
|
||||
}
|
||||
|
||||
# Remove model_family/x parameter
|
||||
filteredArguments=()
|
||||
while [[ $# -gt 0 ]]; do
|
||||
|
|
@ -99,6 +105,8 @@ elif [[ "$model_family" == "gptneox" ]]; then
|
|||
gptneox
|
||||
elif [[ "$model_family" == "starcoder" ]]; then
|
||||
starcoder
|
||||
elif [[ "$model_family" == "chatglm" ]]; then
|
||||
chatglm
|
||||
else
|
||||
echo "Invalid model_family: $model_family"
|
||||
display_help
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ function Display-Help
|
|||
Write-Host ""
|
||||
Write-Host "options:"
|
||||
Write-Host " -h, --help show this help message"
|
||||
Write-Host " -x, --model_family {llama,bloom,gptneox}"
|
||||
Write-Host " -x, --model_family {llama,bloom,gptneox,starcoder,chatglm}"
|
||||
Write-Host " family name of model"
|
||||
Write-Host " -t N, --threads N number of threads to use during computation (default: 8)"
|
||||
Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
||||
|
|
@ -53,6 +53,15 @@ function starcoder
|
|||
Invoke-Expression $command
|
||||
}
|
||||
|
||||
function chatglm
|
||||
{
|
||||
$exec_file = "main-chatglm_vnni.exe"
|
||||
$command = "$lib_dir/$exec_file -t $threads -n $n_predict $filteredArguments"
|
||||
Write-Host "$command"
|
||||
Invoke-Expression $command
|
||||
}
|
||||
|
||||
|
||||
# Remove model_family/x parameter
|
||||
$filteredArguments = @()
|
||||
for ($i = 0; $i -lt $args.Length; $i++) {
|
||||
|
|
@ -95,6 +104,9 @@ switch ($model_family)
|
|||
"starcoder" {
|
||||
starcoder
|
||||
}
|
||||
"chatglm" {
|
||||
chatglm
|
||||
}
|
||||
default {
|
||||
Write-Host "Invalid model_family: $model_family"
|
||||
Display-Help
|
||||
|
|
|
|||
Loading…
Reference in a new issue