LLM: Command line wrapper for llama/bloom/gptneox (#8239)

* add llama/bloom/gptneox wrapper
* add readme
* upload binary main file
This commit is contained in:
xingyuan li 2023-06-08 14:55:22 +08:00 committed by GitHub
parent 08bdfce2d8
commit ea3cf6783e
4 changed files with 216 additions and 8 deletions

View file

@ -1 +1,47 @@
# BigDL LLM # BigDL LLM
## llm-cli
llm-cli is a command-line interface tool that allows easy execution of llama/gptneox/bloom models
and generates results based on the provided prompt.
### Usage
```bash
llm-cli -x <llama/gptneox/bloom> [-h] [args]
```
`args` are the arguments provided to the specified model program. You can use `-x MODEL_FAMILY -h`
to retrieve the parameter list for a specific `MODEL_FAMILY`, for example:
```bash
llm-cli.sh -x llama -h
# Output:
# usage: main-llama [options]
#
# options:
# -h, --help show this help message and exit
# -i, --interactive run in interactive mode
# --interactive-first run in interactive mode and wait for input right away
# ...
```
### Examples
Here are some examples of how to use the llm-cli tool:
#### Completion:
```bash
llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -p 'Once upon a time,'
```
#### Chatting:
```bash
llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -i --color
```
Feel free to explore different options and experiment with the llama/gptneox/bloom models using
llm-cli!

View file

@ -25,13 +25,14 @@
# >>>> Linux # >>>> Linux
# python setup.py clean --all bdist_wheel --linux # python setup.py clean --all bdist_wheel --linux
import os
import sys
import fnmatch import fnmatch
from setuptools import setup import os
import urllib.request
import platform import platform
import shutil import shutil
import sys
import urllib.request
from setuptools import setup
long_description = ''' long_description = '''
BigDL LLM BigDL LLM
@ -51,7 +52,7 @@ def get_llm_packages():
print(dirpath) print(dirpath)
package = dirpath.split(llm_home + os.sep)[1].replace(os.sep, '.') package = dirpath.split(llm_home + os.sep)[1].replace(os.sep, '.')
if any(fnmatch.fnmatchcase(package, pat=pattern) if any(fnmatch.fnmatchcase(package, pat=pattern)
for pattern in exclude_patterns): for pattern in exclude_patterns):
print("excluding", package) print("excluding", package)
else: else:
llm_packages.append(package) llm_packages.append(package)
@ -67,6 +68,9 @@ lib_urls["Windows"] = [
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/bloom.dll", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/bloom.dll",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom.exe", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom.exe",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama.exe",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom.exe",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox.exe",
] ]
lib_urls["Linux"] = [ lib_urls["Linux"] = [
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libllama_avx2.so", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libllama_avx2.so",
@ -77,6 +81,12 @@ lib_urls["Linux"] = [
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx2.so", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx2.so",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx512.so", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx512.so",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx2",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx2",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx2",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx512",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx512",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx512",
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom",
] ]
@ -92,7 +102,6 @@ def download_libs(url: str, change_permission=False):
def setup_package(): def setup_package():
package_data = {} package_data = {}
package_data["Windows"] = [ package_data["Windows"] = [
"libs/llama.dll", "libs/llama.dll",
@ -101,6 +110,9 @@ def setup_package():
"libs/quantize-gptneox.exe", "libs/quantize-gptneox.exe",
"libs/bloom.dll", "libs/bloom.dll",
"libs/quantize-bloom.exe", "libs/quantize-bloom.exe",
"libs/main-bloom.exe",
"libs/main-gptneox.exe",
"libs/main-llama.exe",
] ]
package_data["Linux"] = [ package_data["Linux"] = [
"libs/libllama_avx2.so", "libs/libllama_avx2.so",
@ -112,6 +124,12 @@ def setup_package():
"libs/libbloom_avx2.so", "libs/libbloom_avx2.so",
"libs/libbloom_avx512.so", "libs/libbloom_avx512.so",
"libs/quantize-bloom", "libs/quantize-bloom",
"libs/main-bloom_avx2",
"libs/main-bloom_avx512",
"libs/main-gptneox_avx2",
"libs/main-gptneox_avx512",
"libs/main-llama_avx2",
"libs/main-llama_avx512",
] ]
platform_name = None platform_name = None
@ -167,6 +185,10 @@ def setup_package():
'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: Implementation :: CPython'], 'Programming Language :: Python :: Implementation :: CPython'],
scripts={
'Linux': ['src/bigdl/llm/cli/llm-cli'],
'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'],
}[platform_name],
platforms=['windows'] platforms=['windows']
) )

View file

@ -0,0 +1,84 @@
#!/bin/bash
# Default values
model_family=""
llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")"
lib_dir="$llm_dir/libs"
function get_avx_flags() {
avx="avx2"
if command -v lscpu &>/dev/null; then
msg=$(lscpu)
if [[ $msg == *"avx512_vnni"* ]]; then
avx="avx512"
fi
else
echo "lscpu command not found. Please make sure it is installed."
fi
echo $avx
}
# Function to display help message
function display_help {
echo "usage: ./llm-cli.sh -x MODEL_FAMILY [-h] [args]"
echo ""
echo "options:"
echo " -h, --help show this help message"
echo " -x, --model_family {llama,bloom,gptneox}"
echo " family name of model"
echo " args parameters passed to the specified model function"
}
function llama {
command="$lib_dir/main-llama_$avx_flag ${filteredArguments[*]}"
echo "$command"
eval "$command"
}
function bloom {
command="$lib_dir/main-bloom_$avx_flag ${filteredArguments[*]}"
echo "$command"
eval "$command"
}
function gptneox {
command="$lib_dir/main-gptneox_$avx_flag ${filteredArguments[*]}"
echo "$command"
eval "$command"
}
# Remove model_family/x parameter
filteredArguments=()
while [[ $# -gt 0 ]]; do
case "$1" in
-h | --help)
display_help
filteredArguments+=("'$1'")
shift
;;
-x | --model_family)
model_family="$2"
shift 2
;;
*)
filteredArguments+=("'$1'")
shift
;;
esac
done
avx_flag=$(get_avx_flags)
echo "AVX Flags: $avx_flag"
# Perform actions based on the model_family
if [[ "$model_family" == "llama" ]]; then
llama
elif [[ "$model_family" == "bloom" ]]; then
bloom
elif [[ "$model_family" == "gptneox" ]]; then
gptneox
else
echo "Invalid model_family: $model_family"
display_help
fi

View file

@ -0,0 +1,56 @@
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
$lib_dir = Join-Path $llm_dir "libs"
# Function to display help message
function Display-Help {
Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]"
Write-Host ""
Write-Host "options:"
Write-Host " -h, --help show this help message"
Write-Host " -x, --model_family {llama,bloom,gptneox}"
Write-Host " family name of model"
Write-Host " args parameters passed to the specified model function"
}
function llama {
$command = "$lib_dir/main-llama.exe $filteredArguments"
Write-Host "$command"
Invoke-Expression $command
}
function bloom {
$command = "$lib_dir/main-bloom.exe $filteredArguments"
Write-Host "$command"
Invoke-Expression $command
}
function gptneox {
$command = "$lib_dir/main-gptneox.exe $filteredArguments"
Write-Host "$command"
Invoke-Expression $command
}
# Remove model_family/x parameter
$filteredArguments = @()
for ($i = 0; $i -lt $args.Length; $i++) {
if ($args[$i] -eq '--model_family' -or $args[$i] -eq '-x') {
if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*') {
$i++
$model_family = $args[$i]
}
}
else {
$filteredArguments += "`'"+$args[$i]+"`'"
}
}
# Perform actions based on the model_family
switch ($model_family) {
"llama" { llama }
"bloom" { bloom }
"gptneox" { gptneox }
default {
Write-Host "Invalid model_family: $model_family"
Display-Help
}
}