From f2bb469847e301ce0e17fcc016ae7069597c501d Mon Sep 17 00:00:00 2001 From: Yina Chen <33650826+cyita@users.noreply.github.com> Date: Wed, 5 Jul 2023 14:04:17 +0800 Subject: [PATCH] [WIP] LLm llm-cli chat mode (#8440) * fix timezone * temp * Update linux interactive mode * modify init text for interactive mode * meet comments * update * win script * meet comments --- python/llm/README.md | 6 +- python/llm/setup.py | 6 +- python/llm/src/bigdl/llm/cli/llm-chat | 94 +++++++++++++++++++ python/llm/src/bigdl/llm/cli/llm-chat.ps1 | 80 ++++++++++++++++ .../bigdl/llm/cli/prompts/chat-with-llm.txt | 5 + 5 files changed, 187 insertions(+), 4 deletions(-) create mode 100755 python/llm/src/bigdl/llm/cli/llm-chat create mode 100644 python/llm/src/bigdl/llm/cli/llm-chat.ps1 create mode 100644 python/llm/src/bigdl/llm/cli/prompts/chat-with-llm.txt diff --git a/python/llm/README.md b/python/llm/README.md index a6586c9b..9e314278 100644 --- a/python/llm/README.md +++ b/python/llm/README.md @@ -74,8 +74,12 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., * #text completion #llama/bloom/gptneox/starcoder model family is currently supported llm-cli -t 16 -x gptneox -m "/path/to/output/model.bin" -p 'Once upon a time,' + + #chat mode + #Note: The chat mode only support LLaMA (e.g., *vicuna*), GPT-NeoX (e.g., *redpajama*)for now. + llm-chat -m "/path/to/output/model.bin" -x llama ``` - + #### Hugging Face `transformers`-style API You may run the models using `transformers`-style API in `bigdl-llm`. diff --git a/python/llm/setup.py b/python/llm/setup.py index 07508840..79cde633 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -214,7 +214,7 @@ def setup_package(): url='https://github.com/intel-analytics/BigDL', packages=get_llm_packages(), package_dir={"": "src"}, - package_data={"bigdl.llm": package_data[platform_name]}, + package_data={"bigdl.llm": package_data[platform_name] + ["cli/prompts/*.txt"]}, include_package_data=True, entry_points={ "console_scripts": [ @@ -228,8 +228,8 @@ def setup_package(): 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: Implementation :: CPython'], scripts={ - 'Linux': ['src/bigdl/llm/cli/llm-cli'], - 'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'], + 'Linux': ['src/bigdl/llm/cli/llm-cli', 'src/bigdl/llm/cli/llm-chat'], + 'Windows': ['src/bigdl/llm/cli/llm-cli.ps1', 'src/bigdl/llm/cli/llm-chat.ps1'], }[platform_name], platforms=['windows'] ) diff --git a/python/llm/src/bigdl/llm/cli/llm-chat b/python/llm/src/bigdl/llm/cli/llm-chat new file mode 100755 index 00000000..0defd775 --- /dev/null +++ b/python/llm/src/bigdl/llm/cli/llm-chat @@ -0,0 +1,94 @@ +#!/bin/bash + +# Default values +model_family="" +threads=8 +# Number of tokens to predict (made it larger than default because we want a long interaction) +n_predict=512 + +EXTRA_ARGS=('--color') + +llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")" +lib_dir="$llm_dir/libs" +prompts_dir="$llm_dir/cli/prompts" + +function get_avx_flags() { + avx="avx2" + if command -v lscpu &>/dev/null; then + msg=$(lscpu) + if [[ $msg == *"avx512_vnni"* ]]; then + avx="avx512" + fi + else + echo "lscpu command not found. Please make sure it is installed." + fi + echo $avx +} + +# Function to display help message +function display_help { + echo "usage: ./llm-chat -x MODEL_FAMILY [-h] [args]" + echo "" + echo "options:" + echo " -h, --help show this help message" + echo " -x, --model_family {llama,gptneox}" + echo " family name of model" + echo " -t N, --threads N number of threads to use during computation (default: 8)" + echo " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)" + echo " args parameters passed to the specified model function" +} + +function llama { + PROMPT_TEMPLATE="$prompts_dir/chat-with-llm.txt" + EXTRA_ARGS+=('-i' '--file' "'$PROMPT_TEMPLATE'" '--reverse-prompt' "'USER:'" '--in-prefix' "' '") + command="$lib_dir/main-llama_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}" + echo "$command" + eval "$command" +} + +function gptneox { + command="$lib_dir/main-gptneox_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]} ${EXTRA_ARGS[*]}" + echo "$command" + eval "$command" +} + +# Remove model_family/x parameter +filteredArguments=() +while [[ $# -gt 0 ]]; do + case "$1" in + -h | --help) + display_help + shift + ;; + -x | --model_family | --model-family) + model_family="$2" + shift 2 + ;; + -t | --threads) + threads="$2" + shift 2 + ;; + -n | --n_predict | --n-predict) + n_predict="$2" + shift 2 + ;; + *) + filteredArguments+=("'$1'") + shift + ;; + esac +done + +avx_flag=$(get_avx_flags) +echo "AVX Flags: $avx_flag" + +# Perform actions based on the model_family +if [[ "$model_family" == "llama" ]]; then + llama +elif [[ "$model_family" == "gptneox" ]]; then + # TODO + gptneox +else + echo "llm-chat does not support model_family $model_family for now." + display_help +fi diff --git a/python/llm/src/bigdl/llm/cli/llm-chat.ps1 b/python/llm/src/bigdl/llm/cli/llm-chat.ps1 new file mode 100644 index 00000000..527f4129 --- /dev/null +++ b/python/llm/src/bigdl/llm/cli/llm-chat.ps1 @@ -0,0 +1,80 @@ +$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)")) +$lib_dir = Join-Path $llm_dir "libs" +$prompt_dir = Join-Path $llm_dir "cli/prompts" + +$model_family = "" +$threads = 8 +# Number of tokens to predict (made it larger than default because we want a long interaction) +$n_predict = 512 + +# Function to display help message +function Display-Help +{ + Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]" + Write-Host "" + Write-Host "options:" + Write-Host " -h, --help show this help message" + Write-Host " -x, --model_family {llama,bloom,gptneox}" + Write-Host " family name of model" + Write-Host " -t N, --threads N number of threads to use during computation (default: 8)" + Write-Host " -n N, --n_predict N number of tokens to predict (default: 128, -1 = infinity)" + Write-Host " args parameters passed to the specified model function" +} + +function llama +{ + $prompt_file = Join-Path $prompt_dir "chat-with-llm.txt" + $command = "$lib_dir/main-llama.exe -t $threads -n $n_predict -f $prompt_file -i --color --reverse-prompt 'USER:' --in-prefix ' ' $filteredArguments" + Write-Host "$command" + Invoke-Expression $command +} + +function gptneox +{ + # TODO + $command = "$lib_dir/main-gptneox.exe -t $threads -n $n_predict $filteredArguments" + Write-Host "$command" + Invoke-Expression $command +} + +# Remove model_family/x parameter +$filteredArguments = @() +for ($i = 0; $i -lt $args.Length; $i++) { + if ($args[$i] -eq '--model_family' -or $args[$i] -eq '--model-family' -or $args[$i] -eq '-x') + { + if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*') + { + $i++ + $model_family = $args[$i] + } + } + elseif ($args[$i] -eq '--threads' -or $args[$i] -eq '-t') + { + $i++ + $threads = $args[$i] + } + elseif ($args[$i] -eq '--n_predict' -or $args[$i] -eq '--n-predict' -or $args[$i] -eq '-n') + { + $i++ + $n_predict = $args[$i] + } + else + { + $filteredArguments += "`'" + $args[$i] + "`'" + } +} + +# Perform actions based on the model_family +switch ($model_family) +{ + "llama" { + llama + } + "gptneox" { + gptneox + } + default { + Write-Host "llm-chat does not support model_family $model_family for now." + Display-Help + } +} diff --git a/python/llm/src/bigdl/llm/cli/prompts/chat-with-llm.txt b/python/llm/src/bigdl/llm/cli/prompts/chat-with-llm.txt new file mode 100644 index 00000000..4e5b279a --- /dev/null +++ b/python/llm/src/bigdl/llm/cli/prompts/chat-with-llm.txt @@ -0,0 +1,5 @@ +A chat between a curious human USER and an artificial intelligence assistant ChatLLM. The assistant gives helpful, detailed, and polite answers to the human's questions. + +USER: Hello, ChatLLM. +ChatLLM: Hello. +USER: