LLM: Command line wrapper for llama/bloom/gptneox (#8239)
* add llama/bloom/gptneox wrapper * add readme * upload binary main file
This commit is contained in:
parent
08bdfce2d8
commit
ea3cf6783e
4 changed files with 216 additions and 8 deletions
|
|
@ -1 +1,47 @@
|
||||||
# BigDL LLM
|
# BigDL LLM
|
||||||
|
|
||||||
|
## llm-cli
|
||||||
|
|
||||||
|
llm-cli is a command-line interface tool that allows easy execution of llama/gptneox/bloom models
|
||||||
|
and generates results based on the provided prompt.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llm-cli -x <llama/gptneox/bloom> [-h] [args]
|
||||||
|
```
|
||||||
|
|
||||||
|
`args` are the arguments provided to the specified model program. You can use `-x MODEL_FAMILY -h`
|
||||||
|
to retrieve the parameter list for a specific `MODEL_FAMILY`, for example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llm-cli.sh -x llama -h
|
||||||
|
|
||||||
|
# Output:
|
||||||
|
# usage: main-llama [options]
|
||||||
|
#
|
||||||
|
# options:
|
||||||
|
# -h, --help show this help message and exit
|
||||||
|
# -i, --interactive run in interactive mode
|
||||||
|
# --interactive-first run in interactive mode and wait for input right away
|
||||||
|
# ...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
Here are some examples of how to use the llm-cli tool:
|
||||||
|
|
||||||
|
#### Completion:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -p 'Once upon a time,'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Chatting:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -i --color
|
||||||
|
```
|
||||||
|
|
||||||
|
Feel free to explore different options and experiment with the llama/gptneox/bloom models using
|
||||||
|
llm-cli!
|
||||||
|
|
@ -25,13 +25,14 @@
|
||||||
# >>>> Linux:
|
# >>>> Linux:
|
||||||
# python setup.py clean --all bdist_wheel --linux
|
# python setup.py clean --all bdist_wheel --linux
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import fnmatch
|
import fnmatch
|
||||||
from setuptools import setup
|
import os
|
||||||
import urllib.request
|
|
||||||
import platform
|
import platform
|
||||||
import shutil
|
import shutil
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
long_description = '''
|
long_description = '''
|
||||||
BigDL LLM
|
BigDL LLM
|
||||||
|
|
@ -67,6 +68,9 @@ lib_urls["Windows"] = [
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe",
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/bloom.dll",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/bloom.dll",
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom.exe",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom.exe",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama.exe",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom.exe",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox.exe",
|
||||||
]
|
]
|
||||||
lib_urls["Linux"] = [
|
lib_urls["Linux"] = [
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libllama_avx2.so",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libllama_avx2.so",
|
||||||
|
|
@ -77,6 +81,12 @@ lib_urls["Linux"] = [
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox",
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx2.so",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx2.so",
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx512.so",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx512.so",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx2",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx2",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx2",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx512",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx512",
|
||||||
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx512",
|
||||||
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom",
|
"https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -92,7 +102,6 @@ def download_libs(url: str, change_permission=False):
|
||||||
|
|
||||||
|
|
||||||
def setup_package():
|
def setup_package():
|
||||||
|
|
||||||
package_data = {}
|
package_data = {}
|
||||||
package_data["Windows"] = [
|
package_data["Windows"] = [
|
||||||
"libs/llama.dll",
|
"libs/llama.dll",
|
||||||
|
|
@ -101,6 +110,9 @@ def setup_package():
|
||||||
"libs/quantize-gptneox.exe",
|
"libs/quantize-gptneox.exe",
|
||||||
"libs/bloom.dll",
|
"libs/bloom.dll",
|
||||||
"libs/quantize-bloom.exe",
|
"libs/quantize-bloom.exe",
|
||||||
|
"libs/main-bloom.exe",
|
||||||
|
"libs/main-gptneox.exe",
|
||||||
|
"libs/main-llama.exe",
|
||||||
]
|
]
|
||||||
package_data["Linux"] = [
|
package_data["Linux"] = [
|
||||||
"libs/libllama_avx2.so",
|
"libs/libllama_avx2.so",
|
||||||
|
|
@ -112,6 +124,12 @@ def setup_package():
|
||||||
"libs/libbloom_avx2.so",
|
"libs/libbloom_avx2.so",
|
||||||
"libs/libbloom_avx512.so",
|
"libs/libbloom_avx512.so",
|
||||||
"libs/quantize-bloom",
|
"libs/quantize-bloom",
|
||||||
|
"libs/main-bloom_avx2",
|
||||||
|
"libs/main-bloom_avx512",
|
||||||
|
"libs/main-gptneox_avx2",
|
||||||
|
"libs/main-gptneox_avx512",
|
||||||
|
"libs/main-llama_avx2",
|
||||||
|
"libs/main-llama_avx512",
|
||||||
]
|
]
|
||||||
|
|
||||||
platform_name = None
|
platform_name = None
|
||||||
|
|
@ -167,6 +185,10 @@ def setup_package():
|
||||||
'Programming Language :: Python :: 3',
|
'Programming Language :: Python :: 3',
|
||||||
'Programming Language :: Python :: 3.9',
|
'Programming Language :: Python :: 3.9',
|
||||||
'Programming Language :: Python :: Implementation :: CPython'],
|
'Programming Language :: Python :: Implementation :: CPython'],
|
||||||
|
scripts={
|
||||||
|
'Linux': ['src/bigdl/llm/cli/llm-cli'],
|
||||||
|
'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'],
|
||||||
|
}[platform_name],
|
||||||
platforms=['windows']
|
platforms=['windows']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
84
python/llm/src/bigdl/llm/cli/llm-cli
Executable file
84
python/llm/src/bigdl/llm/cli/llm-cli
Executable file
|
|
@ -0,0 +1,84 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
model_family=""
|
||||||
|
|
||||||
|
llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")"
|
||||||
|
lib_dir="$llm_dir/libs"
|
||||||
|
|
||||||
|
function get_avx_flags() {
|
||||||
|
avx="avx2"
|
||||||
|
if command -v lscpu &>/dev/null; then
|
||||||
|
msg=$(lscpu)
|
||||||
|
if [[ $msg == *"avx512_vnni"* ]]; then
|
||||||
|
avx="avx512"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "lscpu command not found. Please make sure it is installed."
|
||||||
|
fi
|
||||||
|
echo $avx
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to display help message
|
||||||
|
function display_help {
|
||||||
|
echo "usage: ./llm-cli.sh -x MODEL_FAMILY [-h] [args]"
|
||||||
|
echo ""
|
||||||
|
echo "options:"
|
||||||
|
echo " -h, --help show this help message"
|
||||||
|
echo " -x, --model_family {llama,bloom,gptneox}"
|
||||||
|
echo " family name of model"
|
||||||
|
echo " args parameters passed to the specified model function"
|
||||||
|
}
|
||||||
|
|
||||||
|
function llama {
|
||||||
|
command="$lib_dir/main-llama_$avx_flag ${filteredArguments[*]}"
|
||||||
|
echo "$command"
|
||||||
|
eval "$command"
|
||||||
|
}
|
||||||
|
|
||||||
|
function bloom {
|
||||||
|
command="$lib_dir/main-bloom_$avx_flag ${filteredArguments[*]}"
|
||||||
|
echo "$command"
|
||||||
|
eval "$command"
|
||||||
|
}
|
||||||
|
|
||||||
|
function gptneox {
|
||||||
|
command="$lib_dir/main-gptneox_$avx_flag ${filteredArguments[*]}"
|
||||||
|
echo "$command"
|
||||||
|
eval "$command"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove model_family/x parameter
|
||||||
|
filteredArguments=()
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
-h | --help)
|
||||||
|
display_help
|
||||||
|
filteredArguments+=("'$1'")
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-x | --model_family)
|
||||||
|
model_family="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
filteredArguments+=("'$1'")
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
avx_flag=$(get_avx_flags)
|
||||||
|
echo "AVX Flags: $avx_flag"
|
||||||
|
|
||||||
|
# Perform actions based on the model_family
|
||||||
|
if [[ "$model_family" == "llama" ]]; then
|
||||||
|
llama
|
||||||
|
elif [[ "$model_family" == "bloom" ]]; then
|
||||||
|
bloom
|
||||||
|
elif [[ "$model_family" == "gptneox" ]]; then
|
||||||
|
gptneox
|
||||||
|
else
|
||||||
|
echo "Invalid model_family: $model_family"
|
||||||
|
display_help
|
||||||
|
fi
|
||||||
56
python/llm/src/bigdl/llm/cli/llm-cli.ps1
Executable file
56
python/llm/src/bigdl/llm/cli/llm-cli.ps1
Executable file
|
|
@ -0,0 +1,56 @@
|
||||||
|
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
|
||||||
|
$lib_dir = Join-Path $llm_dir "libs"
|
||||||
|
|
||||||
|
# Function to display help message
|
||||||
|
function Display-Help {
|
||||||
|
Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]"
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "options:"
|
||||||
|
Write-Host " -h, --help show this help message"
|
||||||
|
Write-Host " -x, --model_family {llama,bloom,gptneox}"
|
||||||
|
Write-Host " family name of model"
|
||||||
|
Write-Host " args parameters passed to the specified model function"
|
||||||
|
}
|
||||||
|
|
||||||
|
function llama {
|
||||||
|
$command = "$lib_dir/main-llama.exe $filteredArguments"
|
||||||
|
Write-Host "$command"
|
||||||
|
Invoke-Expression $command
|
||||||
|
}
|
||||||
|
|
||||||
|
function bloom {
|
||||||
|
$command = "$lib_dir/main-bloom.exe $filteredArguments"
|
||||||
|
Write-Host "$command"
|
||||||
|
Invoke-Expression $command
|
||||||
|
}
|
||||||
|
|
||||||
|
function gptneox {
|
||||||
|
$command = "$lib_dir/main-gptneox.exe $filteredArguments"
|
||||||
|
Write-Host "$command"
|
||||||
|
Invoke-Expression $command
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove model_family/x parameter
|
||||||
|
$filteredArguments = @()
|
||||||
|
for ($i = 0; $i -lt $args.Length; $i++) {
|
||||||
|
if ($args[$i] -eq '--model_family' -or $args[$i] -eq '-x') {
|
||||||
|
if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*') {
|
||||||
|
$i++
|
||||||
|
$model_family = $args[$i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$filteredArguments += "`'"+$args[$i]+"`'"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Perform actions based on the model_family
|
||||||
|
switch ($model_family) {
|
||||||
|
"llama" { llama }
|
||||||
|
"bloom" { bloom }
|
||||||
|
"gptneox" { gptneox }
|
||||||
|
default {
|
||||||
|
Write-Host "Invalid model_family: $model_family"
|
||||||
|
Display-Help
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue