From ea3cf6783eb71f3d144e33b58ceb952364a1ba6a Mon Sep 17 00:00:00 2001 From: xingyuan li <108672484+hoshibara@users.noreply.github.com> Date: Thu, 8 Jun 2023 14:55:22 +0800 Subject: [PATCH] LLM: Command line wrapper for llama/bloom/gptneox (#8239) * add llama/bloom/gptneox wrapper * add readme * upload binary main file --- python/llm/README.md | 46 +++++++++++++ python/llm/setup.py | 38 ++++++++--- python/llm/src/bigdl/llm/cli/llm-cli | 84 ++++++++++++++++++++++++ python/llm/src/bigdl/llm/cli/llm-cli.ps1 | 56 ++++++++++++++++ 4 files changed, 216 insertions(+), 8 deletions(-) create mode 100755 python/llm/src/bigdl/llm/cli/llm-cli create mode 100755 python/llm/src/bigdl/llm/cli/llm-cli.ps1 diff --git a/python/llm/README.md b/python/llm/README.md index 83c597cb..22bbdbe5 100644 --- a/python/llm/README.md +++ b/python/llm/README.md @@ -1 +1,47 @@ # BigDL LLM + +## llm-cli + +llm-cli is a command-line interface tool that allows easy execution of llama/gptneox/bloom models +and generates results based on the provided prompt. + +### Usage + +```bash +llm-cli -x [-h] [args] +``` + +`args` are the arguments provided to the specified model program. You can use `-x MODEL_FAMILY -h` +to retrieve the parameter list for a specific `MODEL_FAMILY`, for example: + +```bash +llm-cli.sh -x llama -h + +# Output: +# usage: main-llama [options] +# +# options: +# -h, --help show this help message and exit +# -i, --interactive run in interactive mode +# --interactive-first run in interactive mode and wait for input right away +# ... +``` + +### Examples + +Here are some examples of how to use the llm-cli tool: + +#### Completion: + +```bash +llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -p 'Once upon a time,' +``` + +#### Chatting: + +```bash +llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -i --color +``` + +Feel free to explore different options and experiment with the llama/gptneox/bloom models using +llm-cli! \ No newline at end of file diff --git a/python/llm/setup.py b/python/llm/setup.py index 153e4738..03d64008 100644 --- a/python/llm/setup.py +++ b/python/llm/setup.py @@ -25,13 +25,14 @@ # >>>> Linux: # python setup.py clean --all bdist_wheel --linux -import os -import sys import fnmatch -from setuptools import setup -import urllib.request +import os import platform import shutil +import sys +import urllib.request + +from setuptools import setup long_description = ''' BigDL LLM @@ -51,7 +52,7 @@ def get_llm_packages(): print(dirpath) package = dirpath.split(llm_home + os.sep)[1].replace(os.sep, '.') if any(fnmatch.fnmatchcase(package, pat=pattern) - for pattern in exclude_patterns): + for pattern in exclude_patterns): print("excluding", package) else: llm_packages.append(package) @@ -67,6 +68,9 @@ lib_urls["Windows"] = [ "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/bloom.dll", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom.exe", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama.exe", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom.exe", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox.exe", ] lib_urls["Linux"] = [ "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libllama_avx2.so", @@ -77,6 +81,12 @@ lib_urls["Linux"] = [ "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx2.so", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx512.so", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx2", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx2", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx2", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx512", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx512", + "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx512", "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom", ] @@ -92,7 +102,6 @@ def download_libs(url: str, change_permission=False): def setup_package(): - package_data = {} package_data["Windows"] = [ "libs/llama.dll", @@ -101,6 +110,9 @@ def setup_package(): "libs/quantize-gptneox.exe", "libs/bloom.dll", "libs/quantize-bloom.exe", + "libs/main-bloom.exe", + "libs/main-gptneox.exe", + "libs/main-llama.exe", ] package_data["Linux"] = [ "libs/libllama_avx2.so", @@ -112,6 +124,12 @@ def setup_package(): "libs/libbloom_avx2.so", "libs/libbloom_avx512.so", "libs/quantize-bloom", + "libs/main-bloom_avx2", + "libs/main-bloom_avx512", + "libs/main-gptneox_avx2", + "libs/main-gptneox_avx512", + "libs/main-llama_avx2", + "libs/main-llama_avx512", ] platform_name = None @@ -121,12 +139,12 @@ def setup_package(): if "--linux" in sys.argv: platform_name = "Linux" sys.argv.remove("--linux") - + if platform_name is None: if platform.platform().startswith('Windows'): platform_name = "Windows" else: - platform_name = "Linux" + platform_name = "Linux" change_permission = True if platform_name == "Linux" else False @@ -167,6 +185,10 @@ def setup_package(): 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: Implementation :: CPython'], + scripts={ + 'Linux': ['src/bigdl/llm/cli/llm-cli'], + 'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'], + }[platform_name], platforms=['windows'] ) diff --git a/python/llm/src/bigdl/llm/cli/llm-cli b/python/llm/src/bigdl/llm/cli/llm-cli new file mode 100755 index 00000000..4f134750 --- /dev/null +++ b/python/llm/src/bigdl/llm/cli/llm-cli @@ -0,0 +1,84 @@ +#!/bin/bash + +# Default values +model_family="" + +llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")" +lib_dir="$llm_dir/libs" + +function get_avx_flags() { + avx="avx2" + if command -v lscpu &>/dev/null; then + msg=$(lscpu) + if [[ $msg == *"avx512_vnni"* ]]; then + avx="avx512" + fi + else + echo "lscpu command not found. Please make sure it is installed." + fi + echo $avx +} + +# Function to display help message +function display_help { + echo "usage: ./llm-cli.sh -x MODEL_FAMILY [-h] [args]" + echo "" + echo "options:" + echo " -h, --help show this help message" + echo " -x, --model_family {llama,bloom,gptneox}" + echo " family name of model" + echo " args parameters passed to the specified model function" +} + +function llama { + command="$lib_dir/main-llama_$avx_flag ${filteredArguments[*]}" + echo "$command" + eval "$command" +} + +function bloom { + command="$lib_dir/main-bloom_$avx_flag ${filteredArguments[*]}" + echo "$command" + eval "$command" +} + +function gptneox { + command="$lib_dir/main-gptneox_$avx_flag ${filteredArguments[*]}" + echo "$command" + eval "$command" +} + +# Remove model_family/x parameter +filteredArguments=() +while [[ $# -gt 0 ]]; do + case "$1" in + -h | --help) + display_help + filteredArguments+=("'$1'") + shift + ;; + -x | --model_family) + model_family="$2" + shift 2 + ;; + *) + filteredArguments+=("'$1'") + shift + ;; + esac +done + +avx_flag=$(get_avx_flags) +echo "AVX Flags: $avx_flag" + +# Perform actions based on the model_family +if [[ "$model_family" == "llama" ]]; then + llama +elif [[ "$model_family" == "bloom" ]]; then + bloom +elif [[ "$model_family" == "gptneox" ]]; then + gptneox +else + echo "Invalid model_family: $model_family" + display_help +fi diff --git a/python/llm/src/bigdl/llm/cli/llm-cli.ps1 b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 new file mode 100755 index 00000000..16b08325 --- /dev/null +++ b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 @@ -0,0 +1,56 @@ +$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)")) +$lib_dir = Join-Path $llm_dir "libs" + +# Function to display help message +function Display-Help { + Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]" + Write-Host "" + Write-Host "options:" + Write-Host " -h, --help show this help message" + Write-Host " -x, --model_family {llama,bloom,gptneox}" + Write-Host " family name of model" + Write-Host " args parameters passed to the specified model function" +} + +function llama { + $command = "$lib_dir/main-llama.exe $filteredArguments" + Write-Host "$command" + Invoke-Expression $command +} + +function bloom { + $command = "$lib_dir/main-bloom.exe $filteredArguments" + Write-Host "$command" + Invoke-Expression $command +} + +function gptneox { + $command = "$lib_dir/main-gptneox.exe $filteredArguments" + Write-Host "$command" + Invoke-Expression $command +} + +# Remove model_family/x parameter +$filteredArguments = @() +for ($i = 0; $i -lt $args.Length; $i++) { + if ($args[$i] -eq '--model_family' -or $args[$i] -eq '-x') { + if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*') { + $i++ + $model_family = $args[$i] + } + } + else { + $filteredArguments += "`'"+$args[$i]+"`'" + } +} + +# Perform actions based on the model_family +switch ($model_family) { + "llama" { llama } + "bloom" { bloom } + "gptneox" { gptneox } + default { + Write-Host "Invalid model_family: $model_family" + Display-Help + } +}