[WIP] LLm llm-cli chat mode (#8440)
* fix timezone * temp * Update linux interactive mode * modify init text for interactive mode * meet comments * update * win script * meet comments
This commit is contained in:
parent
936d21635f
commit
f2bb469847
5 changed files with 187 additions and 4 deletions
|
|
@ -74,6 +74,10 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
|
||||||
#text completion
|
#text completion
|
||||||
#llama/bloom/gptneox/starcoder model family is currently supported
|
#llama/bloom/gptneox/starcoder model family is currently supported
|
||||||
llm-cli -t 16 -x gptneox -m "/path/to/output/model.bin" -p 'Once upon a time,'
|
llm-cli -t 16 -x gptneox -m "/path/to/output/model.bin" -p 'Once upon a time,'
|
||||||
|
|
||||||
|
#chat mode
|
||||||
|
#Note: The chat mode only support LLaMA (e.g., *vicuna*), GPT-NeoX (e.g., *redpajama*)for now.
|
||||||
|
llm-chat -m "/path/to/output/model.bin" -x llama
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Hugging Face `transformers`-style API
|
#### Hugging Face `transformers`-style API
|
||||||
|
|
|
||||||
|
|
@ -214,7 +214,7 @@ def setup_package():
|
||||||
url='https://github.com/intel-analytics/BigDL',
|
url='https://github.com/intel-analytics/BigDL',
|
||||||
packages=get_llm_packages(),
|
packages=get_llm_packages(),
|
||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
package_data={"bigdl.llm": package_data[platform_name]},
|
package_data={"bigdl.llm": package_data[platform_name] + ["cli/prompts/*.txt"]},
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
entry_points={
|
entry_points={
|
||||||
"console_scripts": [
|
"console_scripts": [
|
||||||
|
|
@ -228,8 +228,8 @@ def setup_package():
|
||||||
'Programming Language :: Python :: 3.9',
|
'Programming Language :: Python :: 3.9',
|
||||||
'Programming Language :: Python :: Implementation :: CPython'],
|
'Programming Language :: Python :: Implementation :: CPython'],
|
||||||
scripts={
|
scripts={
|
||||||
'Linux': ['src/bigdl/llm/cli/llm-cli'],
|
'Linux': ['src/bigdl/llm/cli/llm-cli', 'src/bigdl/llm/cli/llm-chat'],
|
||||||
'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'],
|
'Windows': ['src/bigdl/llm/cli/llm-cli.ps1', 'src/bigdl/llm/cli/llm-chat.ps1'],
|
||||||
}[platform_name],
|
}[platform_name],
|
||||||
platforms=['windows']
|
platforms=['windows']
|
||||||
)
|
)
|
||||||
|
|
|
||||||
94
python/llm/src/bigdl/llm/cli/llm-chat
Executable file
94
python/llm/src/bigdl/llm/cli/llm-chat
Executable file
|
|
@ -0,0 +1,94 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
model_family=""
|
||||||
|
threads=8
|
||||||
|
# Number of tokens to predict (made it larger than default because we want a long interaction)
|
||||||
|
n_predict=512
|
||||||
|
|
||||||
|
EXTRA_ARGS=('--color')
|
||||||
|
|
||||||
|
llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")"
|
||||||
|
lib_dir="$llm_dir/libs"
|
||||||
|
prompts_dir="$llm_dir/cli/prompts"
|
||||||
|
|
||||||
|
function get_avx_flags() {
|
||||||
|
avx="avx2"
|
||||||
|
if command -v lscpu &>/dev/null; then
|
||||||
|
msg=$(lscpu)
|
||||||
|
if [[ $msg == *"avx512_vnni"* ]]; then
|
||||||
|
avx="avx512"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "lscpu command not found. Please make sure it is installed."
|
||||||
|
fi
|
||||||
|
echo $avx
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to display help message
|
||||||
|
function display_help {
|
||||||
|
echo "usage: ./llm-chat -x MODEL_FAMILY [-h] [args]"
|
||||||
|
echo ""
|
||||||
|
echo "options:"
|
||||||
|
echo " -h, --help show this help message"
|
||||||
|
echo " -x, --model_family {llama,gptneox}"
|
||||||
|
echo " family name of model"
|
||||||
|
echo " -t N, --threads N number of threads to use during computation (default: 8)"
|
||||||
|
echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
||||||
|
echo " args parameters passed to the specified model function"
|
||||||
|
}
|
||||||
|
|
||||||
|
function llama {
|
||||||
|
PROMPT_TEMPLATE="$prompts_dir/chat-with-llm.txt"
|
||||||
|
EXTRA_ARGS+=('-i' '--file' "'$PROMPT_TEMPLATE'" '--reverse-prompt' "'USER:'" '--in-prefix' "' '")
|
||||||
|
command="$lib_dir/main-llama_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
|
||||||
|
echo "$command"
|
||||||
|
eval "$command"
|
||||||
|
}
|
||||||
|
|
||||||
|
function gptneox {
|
||||||
|
command="$lib_dir/main-gptneox_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
|
||||||
|
echo "$command"
|
||||||
|
eval "$command"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove model_family/x parameter
|
||||||
|
filteredArguments=()
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
-h | --help)
|
||||||
|
display_help
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-x | --model_family | --model-family)
|
||||||
|
model_family="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-t | --threads)
|
||||||
|
threads="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-n | --n_predict | --n-predict)
|
||||||
|
n_predict="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
filteredArguments+=("'$1'")
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
avx_flag=$(get_avx_flags)
|
||||||
|
echo "AVX Flags: $avx_flag"
|
||||||
|
|
||||||
|
# Perform actions based on the model_family
|
||||||
|
if [[ "$model_family" == "llama" ]]; then
|
||||||
|
llama
|
||||||
|
elif [[ "$model_family" == "gptneox" ]]; then
|
||||||
|
# TODO
|
||||||
|
gptneox
|
||||||
|
else
|
||||||
|
echo "llm-chat does not support model_family $model_family for now."
|
||||||
|
display_help
|
||||||
|
fi
|
||||||
80
python/llm/src/bigdl/llm/cli/llm-chat.ps1
Normal file
80
python/llm/src/bigdl/llm/cli/llm-chat.ps1
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
|
||||||
|
$lib_dir = Join-Path $llm_dir "libs"
|
||||||
|
$prompt_dir = Join-Path $llm_dir "cli/prompts"
|
||||||
|
|
||||||
|
$model_family = ""
|
||||||
|
$threads = 8
|
||||||
|
# Number of tokens to predict (made it larger than default because we want a long interaction)
|
||||||
|
$n_predict = 512
|
||||||
|
|
||||||
|
# Function to display help message
|
||||||
|
function Display-Help
|
||||||
|
{
|
||||||
|
Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]"
|
||||||
|
Write-Host ""
|
||||||
|
Write-Host "options:"
|
||||||
|
Write-Host " -h, --help show this help message"
|
||||||
|
Write-Host " -x, --model_family {llama,bloom,gptneox}"
|
||||||
|
Write-Host " family name of model"
|
||||||
|
Write-Host " -t N, --threads N number of threads to use during computation (default: 8)"
|
||||||
|
Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
|
||||||
|
Write-Host " args parameters passed to the specified model function"
|
||||||
|
}
|
||||||
|
|
||||||
|
function llama
|
||||||
|
{
|
||||||
|
$prompt_file = Join-Path $prompt_dir "chat-with-llm.txt"
|
||||||
|
$command = "$lib_dir/main-llama.exe -t $threads -n $n_predict -f $prompt_file -i --color --reverse-prompt 'USER:' --in-prefix ' ' $filteredArguments"
|
||||||
|
Write-Host "$command"
|
||||||
|
Invoke-Expression $command
|
||||||
|
}
|
||||||
|
|
||||||
|
function gptneox
|
||||||
|
{
|
||||||
|
# TODO
|
||||||
|
$command = "$lib_dir/main-gptneox.exe -t $threads -n $n_predict $filteredArguments"
|
||||||
|
Write-Host "$command"
|
||||||
|
Invoke-Expression $command
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove model_family/x parameter
|
||||||
|
$filteredArguments = @()
|
||||||
|
for ($i = 0; $i -lt $args.Length; $i++) {
|
||||||
|
if ($args[$i] -eq '--model_family' -or $args[$i] -eq '--model-family' -or $args[$i] -eq '-x')
|
||||||
|
{
|
||||||
|
if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*')
|
||||||
|
{
|
||||||
|
$i++
|
||||||
|
$model_family = $args[$i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elseif ($args[$i] -eq '--threads' -or $args[$i] -eq '-t')
|
||||||
|
{
|
||||||
|
$i++
|
||||||
|
$threads = $args[$i]
|
||||||
|
}
|
||||||
|
elseif ($args[$i] -eq '--n_predict' -or $args[$i] -eq '--n-predict' -or $args[$i] -eq '-n')
|
||||||
|
{
|
||||||
|
$i++
|
||||||
|
$n_predict = $args[$i]
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$filteredArguments += "`'" + $args[$i] + "`'"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Perform actions based on the model_family
|
||||||
|
switch ($model_family)
|
||||||
|
{
|
||||||
|
"llama" {
|
||||||
|
llama
|
||||||
|
}
|
||||||
|
"gptneox" {
|
||||||
|
gptneox
|
||||||
|
}
|
||||||
|
default {
|
||||||
|
Write-Host "llm-chat does not support model_family $model_family for now."
|
||||||
|
Display-Help
|
||||||
|
}
|
||||||
|
}
|
||||||
5
python/llm/src/bigdl/llm/cli/prompts/chat-with-llm.txt
Normal file
5
python/llm/src/bigdl/llm/cli/prompts/chat-with-llm.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
A chat between a curious human USER and an artificial intelligence assistant ChatLLM. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||||
|
|
||||||
|
USER: Hello, ChatLLM.
|
||||||
|
ChatLLM: Hello.
|
||||||
|
USER:
|
||||||
Loading…
Reference in a new issue