LLM: Command line wrapper for llama/bloom/gptneox (#8239)

* add llama/bloom/gptneox wrapper * add readme * upload binary main file
2023-06-08 14:55:22 +08:00 · 2023-06-08 14:55:22 +08:00 · ea3cf6783e
commit ea3cf6783e
parent 08bdfce2d8
4 changed files with 216 additions and 8 deletions
--- a/python/llm/README.md
+++ b/python/llm/README.md
@ -1 +1,47 @@
 # BigDL LLM
+
+## llm-cli
+
+llm-cli is a command-line interface tool that allows easy execution of llama/gptneox/bloom models
+and generates results based on the provided prompt.
+
+### Usage
+
+```bash
+llm-cli -x <llama/gptneox/bloom> [-h] [args]
+```
+
+`args` are the arguments provided to the specified model program. You can use `-x MODEL_FAMILY -h`
+to retrieve the parameter list for a specific `MODEL_FAMILY`, for example:
+
+```bash
+llm-cli.sh -x llama -h
+
+# Output:
+# usage: main-llama [options]
+#
+# options:
+#   -h, --help show this help message and exit
+#   -i, --interactive run in interactive mode
+#   --interactive-first run in interactive mode and wait for input right away
+#   ...
+```
+
+### Examples
+
+Here are some examples of how to use the llm-cli tool:
+
+#### Completion:
+
+```bash
+llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -p 'Once upon a time,'
+```
+
+#### Chatting:
+
+```bash
+llm-cli.sh -t 16 -x llama -m ./llm-llama-model.bin -i --color
+```
+
+Feel free to explore different options and experiment with the llama/gptneox/bloom models using
+llm-cli!
--- a/python/llm/setup.py
+++ b/python/llm/setup.py
@ -25,13 +25,14 @@
 # >>>> Linux：
 # python setup.py clean --all bdist_wheel --linux

-import os
-import sys
 import fnmatch
-from setuptools import setup
-import urllib.request
+import os
 import platform
 import shutil
+import sys
+import urllib.request
+
+from setuptools import setup

 long_description = '''
    BigDL LLM
@ -67,6 +68,9 @@ lib_urls["Windows"] = [
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox.exe",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/bloom.dll",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom.exe",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama.exe",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom.exe",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox.exe",
 ]
 lib_urls["Linux"] = [
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libllama_avx2.so",
@ -77,6 +81,12 @@ lib_urls["Linux"] = [
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-gptneox",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx2.so",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/libbloom_avx512.so",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx2",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx2",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx2",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-llama_avx512",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-bloom_avx512",
+    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/main-gptneox_avx512",
    "https://sourceforge.net/projects/analytics-zoo/files/bigdl-llm/quantize-bloom",
 ]

@ -92,7 +102,6 @@ def download_libs(url: str, change_permission=False):


 def setup_package():
-    
    package_data = {}
    package_data["Windows"] = [
        "libs/llama.dll",
@ -101,6 +110,9 @@ def setup_package():
        "libs/quantize-gptneox.exe",
        "libs/bloom.dll",
        "libs/quantize-bloom.exe",
+        "libs/main-bloom.exe",
+        "libs/main-gptneox.exe",
+        "libs/main-llama.exe",
    ]
    package_data["Linux"] = [
        "libs/libllama_avx2.so",
@ -112,6 +124,12 @@ def setup_package():
        "libs/libbloom_avx2.so",
        "libs/libbloom_avx512.so",
        "libs/quantize-bloom",
+        "libs/main-bloom_avx2",
+        "libs/main-bloom_avx512",
+        "libs/main-gptneox_avx2",
+        "libs/main-gptneox_avx512",
+        "libs/main-llama_avx2",
+        "libs/main-llama_avx512",
    ]

    platform_name = None
@ -167,6 +185,10 @@ def setup_package():
            'Programming Language :: Python :: 3',
            'Programming Language :: Python :: 3.9',
            'Programming Language :: Python :: Implementation :: CPython'],
+        scripts={
+            'Linux': ['src/bigdl/llm/cli/llm-cli'],
+            'Windows': ['src/bigdl/llm/cli/llm-cli.ps1'],
+        }[platform_name],
        platforms=['windows']
    )

--- a/python/llm/src/bigdl/llm/cli/llm-cli
+++ b/python/llm/src/bigdl/llm/cli/llm-cli
@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Default values
+model_family=""
+
+llm_dir="$(dirname "$(python -c "import bigdl.llm;print(bigdl.llm.__file__)")")"
+lib_dir="$llm_dir/libs"
+
+function get_avx_flags() {
+  avx="avx2"
+  if command -v lscpu &>/dev/null; then
+    msg=$(lscpu)
+    if [[ $msg == *"avx512_vnni"* ]]; then
+      avx="avx512"
+    fi
+  else
+    echo "lscpu command not found. Please make sure it is installed."
+  fi
+  echo $avx
+}
+
+# Function to display help message
+function display_help {
+  echo "usage: ./llm-cli.sh -x MODEL_FAMILY [-h] [args]"
+  echo ""
+  echo "options:"
+  echo "  -h, --help  show this help message"
+  echo "  -x, --model_family {llama,bloom,gptneox}"
+  echo "              family name of model"
+  echo "  args        parameters passed to the specified model function"
+}
+
+function llama {
+  command="$lib_dir/main-llama_$avx_flag ${filteredArguments[*]}"
+  echo "$command"
+  eval "$command"
+}
+
+function bloom {
+  command="$lib_dir/main-bloom_$avx_flag ${filteredArguments[*]}"
+  echo "$command"
+  eval "$command"
+}
+
+function gptneox {
+  command="$lib_dir/main-gptneox_$avx_flag ${filteredArguments[*]}"
+  echo "$command"
+  eval "$command"
+}
+
+# Remove model_family/x parameter
+filteredArguments=()
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+  -h | --help)
+    display_help
+    filteredArguments+=("'$1'")
+    shift
+    ;;
+  -x | --model_family)
+    model_family="$2"
+    shift 2
+    ;;
+  *)
+    filteredArguments+=("'$1'")
+    shift
+    ;;
+  esac
+done
+
+avx_flag=$(get_avx_flags)
+echo "AVX Flags: $avx_flag"
+
+# Perform actions based on the model_family
+if [[ "$model_family" == "llama" ]]; then
+  llama
+elif [[ "$model_family" == "bloom" ]]; then
+  bloom
+elif [[ "$model_family" == "gptneox" ]]; then
+  gptneox
+else
+  echo "Invalid model_family: $model_family"
+  display_help
+fi
--- a/python/llm/src/bigdl/llm/cli/llm-cli.ps1
+++ b/python/llm/src/bigdl/llm/cli/llm-cli.ps1
@ -0,0 +1,56 @@
+$llm_dir = (Split-Path -Parent (python -c "import bigdl.llm;print(bigdl.llm.__file__)"))
+$lib_dir = Join-Path $llm_dir "libs"
+
+# Function to display help message
+function Display-Help {
+  Write-Host "usage: ./llm-cli.ps1 -x MODEL_FAMILY [-h] [args]"
+  Write-Host ""
+  Write-Host "options:"
+  Write-Host "  -h, --help  show this help message"
+  Write-Host "  -x, --model_family {llama,bloom,gptneox}"
+  Write-Host "              family name of model"
+  Write-Host "  args        parameters passed to the specified model function"
+}
+
+function llama {
+    $command = "$lib_dir/main-llama.exe $filteredArguments"
+    Write-Host "$command"
+    Invoke-Expression $command
+}
+
+function bloom {
+    $command = "$lib_dir/main-bloom.exe $filteredArguments"
+    Write-Host "$command"
+    Invoke-Expression $command
+}
+
+function gptneox {
+    $command = "$lib_dir/main-gptneox.exe $filteredArguments"
+    Write-Host "$command"
+    Invoke-Expression $command
+}
+
+# Remove model_family/x parameter
+$filteredArguments = @()
+for ($i = 0; $i -lt $args.Length; $i++) {
+    if ($args[$i] -eq '--model_family' -or $args[$i] -eq '-x') {
+        if ($i + 1 -lt $args.Length -and $args[$i + 1] -notlike '-*') {
+            $i++
+            $model_family = $args[$i]
+        }
+    }
+    else {
+        $filteredArguments += "`'"+$args[$i]+"`'"
+    }
+}
+
+# Perform actions based on the model_family
+switch ($model_family) {
+    "llama" { llama }
+    "bloom" { bloom }
+    "gptneox" { gptneox }
+    default {
+        Write-Host "Invalid model_family: $model_family"
+        Display-Help
+    }
+}