[WIP] LLm llm-cli chat mode (#8440)

* fix timezone

* temp

* Update linux interactive mode

* modify init text for interactive mode

* meet comments

* update

* win script

* meet comments
This commit is contained in:
Yina Chen 2023-07-05 14:04:17 +08:00 committed by GitHub
parent 936d21635f
commit f2bb469847
5 changed files with 187 additions and 4 deletions

View file

@ -74,6 +74,10 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
#text completion
#llama/bloom/gptneox/starcoder model family is currently supported
llm-cli -t 16 -x gptneox -m "/path/to/output/model.bin" -p 'Once upon a time,'
#chat mode
#Note: The chat mode only support LLaMA (e.g., *vicuna*), GPT-NeoX (e.g., *redpajama*)for now.
llm-chat -m "/path/to/output/model.bin" -x llama
```
#### Hugging Face `transformers`-style API

View file

@ -214,7 +214,7 @@ def setup_package():
url='https://github.com/intel-analytics/BigDL',
packages=get_llm_packages(),
package_dir={"": "src"},
package_data={"bigdl.llm": package_data[platform_name]},
package_data={"bigdl.llm": package_data[platform_name] + ["cli/prompts/*.txt"]},
include_package_data=True,
entry_points={
"console_scripts": [
@ -228,8 +228,8 @@ def setup_package():
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: Implementation :: CPython'],
scripts={
'Linux': ['src/bigdl/llm/cli/llm-cli'],
'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'],
'Linux': ['src/bigdl/llm/cli/llm-cli', 'src/bigdl/llm/cli/llm-chat'],
'Windows': ['src/bigdl/llm/cli/llm-cli.ps1', 'src/bigdl/llm/cli/llm-chat.ps1'],
}[platform_name],
platforms=['windows']
)

View file

@ -0,0 +1,94 @@
#!/bin/bash
# Default values
model_family=""
threads=8
# Number of tokens to predict (made it larger than default because we want a long interaction)
n_predict=512
EXTRA_ARGS=('--color')
llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")"
lib_dir="$llm_dir/libs"
prompts_dir="$llm_dir/cli/prompts"
function get_avx_flags() {
avx="avx2"
if command -v lscpu &>/dev/null; then
msg=$(lscpu)
if [[ $msg == *"avx512_vnni"* ]]; then
avx="avx512"
fi
else
echo "lscpu command not found. Please make sure it is installed."
fi
echo $avx
}
# Function to display help message
function display_help {
echo "usage: ./llm-chat -x MODEL_FAMILY [-h] [args]"
echo ""
echo "options:"
echo " -h, --help show this help message"
echo " -x, --model_family {llama,gptneox}"
echo " family name of model"
echo " -t N, --threads N number of threads to use during computation (default: 8)"
echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
echo " args parameters passed to the specified model function"
}
function llama {
PROMPT_TEMPLATE="$prompts_dir/chat-with-llm.txt"
EXTRA_ARGS+=('-i' '--file' "'$PROMPT_TEMPLATE'" '--reverse-prompt' "'USER:'" '--in-prefix' "' '")
command="$lib_dir/main-llama_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
echo "$command"
eval "$command"
}
function gptneox {
command="$lib_dir/main-gptneox_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}"
echo "$command"
eval "$command"
}
# Remove model_family/x parameter
filteredArguments=()
while [[ $# -gt 0 ]]; do
case "$1" in
-h | --help)
display_help
shift
;;
-x | --model_family | --model-family)
model_family="$2"
shift 2
;;
-t | --threads)
threads="$2"
shift 2
;;
-n | --n_predict | --n-predict)
n_predict="$2"
shift 2
;;
*)
filteredArguments+=("'$1'")
shift
;;
esac
done
avx_flag=$(get_avx_flags)
echo "AVX Flags: $avx_flag"
# Perform actions based on the model_family
if [[ "$model_family" == "llama" ]]; then
llama
elif [[ "$model_family" == "gptneox" ]]; then
# TODO
gptneox
else
echo "llm-chat does not support model_family $model_family for now."
display_help
fi

View file

@ -0,0 +1,80 @@
$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
$lib_dir = Join-Path $llm_dir "libs"
$prompt_dir = Join-Path $llm_dir "cli/prompts"
$model_family = ""
$threads = 8
# Number of tokens to predict (made it larger than default because we want a long interaction)
$n_predict = 512
# Function to display help message
function Display-Help
{
Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]"
Write-Host ""
Write-Host "options:"
Write-Host " -h, --help show this help message"
Write-Host " -x, --model_family {llama,bloom,gptneox}"
Write-Host " family name of model"
Write-Host " -t N, --threads N number of threads to use during computation (default: 8)"
Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)"
Write-Host " args parameters passed to the specified model function"
}
function llama
{
$prompt_file = Join-Path $prompt_dir "chat-with-llm.txt"
$command = "$lib_dir/main-llama.exe -t $threads -n $n_predict -f $prompt_file -i --color --reverse-prompt 'USER:' --in-prefix ' ' $filteredArguments"
Write-Host "$command"
Invoke-Expression $command
}
function gptneox
{
# TODO
$command = "$lib_dir/main-gptneox.exe -t $threads -n $n_predict $filteredArguments"
Write-Host "$command"
Invoke-Expression $command
}
# Remove model_family/x parameter
$filteredArguments = @()
for ($i = 0; $i -lt $args.Length; $i++) {
if ($args[$i] -eq '--model_family' -or $args[$i] -eq '--model-family' -or $args[$i] -eq '-x')
{
if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*')
{
$i++
$model_family = $args[$i]
}
}
elseif ($args[$i] -eq '--threads' -or $args[$i] -eq '-t')
{
$i++
$threads = $args[$i]
}
elseif ($args[$i] -eq '--n_predict' -or $args[$i] -eq '--n-predict' -or $args[$i] -eq '-n')
{
$i++
$n_predict = $args[$i]
}
else
{
$filteredArguments += "`'" + $args[$i] + "`'"
}
}
# Perform actions based on the model_family
switch ($model_family)
{
"llama" {
llama
}
"gptneox" {
gptneox
}
default {
Write-Host "llm-chat does not support model_family $model_family for now."
Display-Help
}
}

View file

@ -0,0 +1,5 @@
A chat between a curious human USER and an artificial intelligence assistant ChatLLM. The assistant gives helpful, detailed, and polite answers to the human's questions.
USER: Hello, ChatLLM.
ChatLLM: Hello.
USER: