LLM: Command line wrapper for llama/bloom/gptneox (#8239)

* add llama/bloom/gptneox wrapper * add readme * upload binary main file
2023-06-08 14:55:22 +08:00 · 2023-06-08 14:55:22 +08:00 · ea3cf6783e
commit ea3cf6783e
parent 08bdfce2d8
4 changed files with 216 additions and 8 deletions
--- a/python/llm/README.md
+++ b/python/llm/README.md
@ -1 +1,47 @@
 # BigDL LLM
 ## llm-cli
 llm-cli is a command-line interface tool that allows easy execution of llama/gptneox/bloom models
 and generates results based on the provided prompt.
 ### Usage
 ```bash
 llm-cli -x <llama/gptneox/bloom> [-h] [args]
 ```
 `args` are the arguments provided to the specified model program. You can use `-x MODEL_FAMILY -h`
 to retrieve the parameter list for a specific `MODEL_FAMILY`, for example:
 ```bash
 llm-cli.sh -x llama -h
 # Output:
 # usage: main-llama [options]
 #
 # options:
 #   -h, --help show this help message and exit
 #   -i, --interactive run in interactive mode
 #   --interactive-first run in interactive mode and wait for input right away
 #   ...
 ```
 ### Examples
 Here are some examples of how to use the llm-cli tool:
 #### Completion:
 ```bash
 llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -p 'Once upon a time,'
 ```
 #### Chatting:
 ```bash
 llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -i --color
 ```
 Feel free to explore different options and experiment with the llama/gptneox/bloom models using
 llm-cli!
--- a/python/llm/setup.py
+++ b/python/llm/setup.py
@ -25,13 +25,14 @@
 # >>>> Linux：
 # python setup.py clean --all bdist_wheel --linux
 import os
 import sys
 import fnmatch
-from setuptools import setup
+import os
 import urllib.request
 import platform
 import shutil
 import sys
 import urllib.request
 from setuptools import setup
 long_description = '''
    BigDL LLM
@ -51,7 +52,7 @@ def get_llm_packages():
        print(dirpath)
        package = dirpath.split(llm_home + os.sep)[1].replace(os.sep, '.')
        if any(fnmatch.fnmatchcase(package, pat=pattern)
-                for pattern in exclude_patterns):
+               for pattern in exclude_patterns):
            print("excluding", package)
        else:
            llm_packages.append(package)
@ -67,6 +68,9 @@ lib_urls["Windows"] = [
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/bloom.dll",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom.exe",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama.exe",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom.exe",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox.exe",
 ]
 lib_urls["Linux"] = [
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libllama_avx2.so",
@ -77,6 +81,12 @@ lib_urls["Linux"] = [
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx2.so",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx512.so",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx2",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx2",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx2",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx512",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx512",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx512",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom",
 ]
@ -92,7 +102,6 @@ def download_libs(url: str, change_permission=False):
 def setup_package():
    package_data = {}
    package_data["Windows"] = [
        "libs/llama.dll",
@ -101,6 +110,9 @@ def setup_package():
        "libs/quantize-gptneox.exe",
        "libs/bloom.dll",
        "libs/quantize-bloom.exe",
        "libs/main-bloom.exe",
        "libs/main-gptneox.exe",
        "libs/main-llama.exe",
    ]
    package_data["Linux"] = [
        "libs/libllama_avx2.so",
@ -112,6 +124,12 @@ def setup_package():
        "libs/libbloom_avx2.so",
        "libs/libbloom_avx512.so",
        "libs/quantize-bloom",
        "libs/main-bloom_avx2",
        "libs/main-bloom_avx512",
        "libs/main-gptneox_avx2",
        "libs/main-gptneox_avx512",
        "libs/main-llama_avx2",
        "libs/main-llama_avx512",
    ]
    platform_name = None
@ -167,6 +185,10 @@ def setup_package():
            'Programming Language :: Python :: 3',
            'Programming Language :: Python :: 3.9',
            'Programming Language :: Python :: Implementation :: CPython'],
        scripts={
            'Linux': ['src/bigdl/llm/cli/llm-cli'],
            'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'],
        }[platform_name],
        platforms=['windows']
    )
--- a/python/llm/src/bigdl/llm/cli/llm-cli
+++ b/python/llm/src/bigdl/llm/cli/llm-cli
@ -0,0 +1,84 @@
 #!/bin/bash
 # Default values
 model_family=""
 llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")"
 lib_dir="$llm_dir/libs"
 function get_avx_flags() {
  avx="avx2"
  if command -v lscpu &>/dev/null; then
    msg=$(lscpu)
    if [[ $msg == *"avx512_vnni"* ]]; then
      avx="avx512"
    fi
  else
    echo "lscpu command not found. Please make sure it is installed."
  fi
  echo $avx
 }
 # Function to display help message
 function display_help {
  echo "usage: ./llm-cli.sh -x MODEL_FAMILY [-h] [args]"
  echo ""
  echo "options:"
  echo "  -h, --help  show this help message"
  echo "  -x, --model_family {llama,bloom,gptneox}"
  echo "              family name of model"
  echo "  args        parameters passed to the specified model function"
 }
 function llama {
  command="$lib_dir/main-llama_$avx_flag ${filteredArguments[*]}"
  echo "$command"
  eval "$command"
 }
 function bloom {
  command="$lib_dir/main-bloom_$avx_flag ${filteredArguments[*]}"
  echo "$command"
  eval "$command"
 }
 function gptneox {
  command="$lib_dir/main-gptneox_$avx_flag ${filteredArguments[*]}"
  echo "$command"
  eval "$command"
 }
 # Remove model_family/x parameter
 filteredArguments=()
 while [[ $# -gt 0 ]]; do
  case "$1" in
  -h | --help)
    display_help
    filteredArguments+=("'$1'")
    shift
    ;;
  -x | --model_family)
    model_family="$2"
    shift 2
    ;;
  *)
    filteredArguments+=("'$1'")
    shift
    ;;
  esac
 done
 avx_flag=$(get_avx_flags)
 echo "AVX Flags: $avx_flag"
 # Perform actions based on the model_family
 if [[ "$model_family" == "llama" ]]; then
  llama
 elif [[ "$model_family" == "bloom" ]]; then
  bloom
 elif [[ "$model_family" == "gptneox" ]]; then
  gptneox
 else
  echo "Invalid model_family: $model_family"
  display_help
 fi
--- a/python/llm/src/bigdl/llm/cli/llm-cli.ps1
+++ b/python/llm/src/bigdl/llm/cli/llm-cli.ps1
@ -0,0 +1,56 @@
 $llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
 $lib_dir = Join-Path $llm_dir "libs"
 # Function to display help message
 function Display-Help {
  Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]"
  Write-Host ""
  Write-Host "options:"
  Write-Host "  -h, --help  show this help message"
  Write-Host "  -x, --model_family {llama,bloom,gptneox}"
  Write-Host "              family name of model"
  Write-Host "  args        parameters passed to the specified model function"
 }
 function llama {
    $command = "$lib_dir/main-llama.exe $filteredArguments"
    Write-Host "$command"
    Invoke-Expression $command
 }
 function bloom {
    $command = "$lib_dir/main-bloom.exe $filteredArguments"
    Write-Host "$command"
    Invoke-Expression $command
 }
 function gptneox {
    $command = "$lib_dir/main-gptneox.exe $filteredArguments"
    Write-Host "$command"
    Invoke-Expression $command
 }
 # Remove model_family/x parameter
 $filteredArguments = @()
 for ($i = 0; $i -lt $args.Length; $i++) {
    if ($args[$i] -eq '--model_family' -or $args[$i] -eq '-x') {
        if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*') {
            $i++
            $model_family = $args[$i]
        }
    }
    else {
        $filteredArguments += "`'"+$args[$i]+"`'"
    }
 }
 # Perform actions based on the model_family
 switch ($model_family) {
    "llama" { llama }
    "bloom" { bloom }
    "gptneox" { gptneox }
    default {
        Write-Host "Invalid model_family: $model_family"
        Display-Help
    }
 }