[LLM] First push for Bloomz pybinding (#8252)

* Initial commit to move bloom pybinding to bigdl-llm

* Revise path for shared library

* Small fix
This commit is contained in:
Yuwen Hu 2023-06-02 14:41:04 +08:00 committed by GitHub
parent 91a1528fce
commit 286b010bf1
3 changed files with 297 additions and 0 deletions

View file

@ -0,0 +1,22 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This would makes sure Python is aware there is more than one sub-package within bigdl,
# physically located elsewhere.
# Otherwise there would be module not found error in non-pip's setting as Python would
# only search the first bigdl package and end up finding only one sub-package.
from .bloom import Bloom

View file

@ -0,0 +1,129 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===========================================================================
#
# This file is adapted from
# https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py
#
# MIT License
#
# Copyright (c) 2023 Andrei Betlen
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# This would makes sure Python is aware there is more than one sub-package within bigdl,
# physically located elsewhere.
# Otherwise there would be module not found error in non-pip's setting as Python would
# only search the first bigdl package and end up finding only one sub-package.
from .bloom_cpp import bloom_load, bloom_free, bloom_run
class Bloom:
"""High-level Python wrapper for a bloom.cpp model."""
def __init__(self,
model_path: str,
n_ctx: int = 512,
seed: int = 1337,
logits_all: bool = False,
n_threads: int = -1,
n_batch: int = 8,
last_n_tokens_size: int = 64,
verbose: bool = True,
):
"""Load a bloom.cpp model from `model_path`.
Args:
model_path: Path to the model.
n_ctx: Maximum context size.
seed: Random seed. 0 for random.
logits_all: Return logits for all tokens, not just the last token.
n_threads: Number of threads to use.
If None, the number of threads is automatically determined.
n_batch: Maximum number of prompt tokens to batch together when calling llama_eval.
last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
verbose: Print verbose output to stderr.
Raises:
ValueError: If the model path does not exist.
Returns:
A Bloom instance.
"""
self.ctx = bloom_load(bytes(model_path, encoding='utf-8'), n_ctx, n_threads)
if not self.ctx:
raise RuntimeError(f"Failed to load model from {model_path}")
self.n_ctx = n_ctx
self.seed = seed
self.logits_all = logits_all
self.n_threads = n_threads
self.n_batch = n_batch
self.last_n_tokens_size = last_n_tokens_size
self.verbose = verbose
def __call__(self, prompt: str, max_tokens: int, stream: bool = False):
if stream:
return self.stream(prompt, max_tokens)
else:
return self._eval(prompt, max_tokens, False)
def _eval(self, prompt: str, max_tokens: int, match_str: bool):
if prompt.endswith("</s>") or max_tokens < 1:
return prompt
# use `buf` to store prompt and generated string,
# assume the average length of words is less than 20 bytes
buf = bytes((len(prompt) + max_tokens) * 20)
ret = bloom_run(ctx=self.ctx,
seed=self.seed,
n_threads=self.n_threads,
n_batch=self.n_batch,
n_predict=max_tokens,
match_str=match_str,
prompt=bytes(prompt, encoding='utf-8'),
buf=buf)
s = str(buf, encoding='utf-8').rstrip("\x00")
return s
def stream(self, prompt: str, max_tokens: int):
if prompt.endswith("</s>") or max_tokens < 1:
yield prompt
else:
for i in range(max_tokens):
if prompt.endswith("</s>"):
break
else:
prompt = self._eval(prompt, 1, i != 0)
yield prompt
def free(self):
bloom_free(self.ctx)

View file

@ -0,0 +1,146 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===========================================================================
#
# This file is adapted from
# https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama_cpp.py
#
# MIT License
#
# Copyright (c) 2023 Andrei Betlen
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# This would makes sure Python is aware there is more than one sub-package within bigdl,
# physically located elsewhere.
# Otherwise there would be module not found error in non-pip's setting as Python would
# only search the first bigdl package and end up finding only one sub-package.
import sys
import os
import ctypes
from ctypes import (
c_int,
c_float,
c_char_p,
c_void_p,
c_bool,
POINTER,
Structure,
Array,
c_uint8,
c_size_t,
)
import pathlib
# Load the library
def _load_shared_library(lib_base_name: str):
# Determine the file extension based on the platform
if sys.platform.startswith("linux"):
lib_ext = ".so"
elif sys.platform == "darwin":
lib_ext = ".so"
elif sys.platform == "win32":
lib_ext = ".dll"
else:
raise RuntimeError("Unsupported platform")
# Construct the paths to the possible shared library names (python/llm/src/bigdl/llm/libs)
_base_path = pathlib.Path(__file__).parent.parent.parent.parent.resolve()
_base_path = _base_path / 'libs'
# Searching for the library in the current directory under the name "libbloom" (default name
# for bloomcpp) and "bloom" (default name for this repo)
_lib_paths = [
_base_path / f"lib{lib_base_name}{lib_ext}",
_base_path / f"{lib_base_name}{lib_ext}",
]
if "BLOOM_CPP_LIB" in os.environ:
lib_base_name = os.environ["BLOOM_CPP_LIB"]
_lib = pathlib.Path(lib_base_name)
_base_path = _lib.parent.resolve()
_lib_paths = [_lib.resolve()]
# Add the library directory to the DLL search path on Windows (if needed)
if sys.platform == "win32" and sys.version_info >= (3, 8):
os.add_dll_directory(str(_base_path))
# Try to load the shared library, handling potential errors
for _lib_path in _lib_paths:
if _lib_path.exists():
try:
return ctypes.CDLL(str(_lib_path))
except Exception as e:
raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}")
raise FileNotFoundError(f"Shared library with base name '{lib_base_name}' not found")
# Specify the base name of the shared library to load
_lib_base_name = "bloom"
# Load the library
_lib = _load_shared_library(_lib_base_name)
def bloom_load(fname: bytes, n_ctx: c_int, n_threads: c_int) -> c_void_p:
return _lib.bloom_load(fname, n_ctx, n_threads)
_lib.bloom_load.argtypes = [c_char_p, c_int, c_int]
_lib.bloom_load.restype = c_void_p
def bloom_free(ctx: c_void_p):
return _lib.bloom_free(ctx)
_lib.bloom_free.argtypes = [c_void_p]
_lib.bloom_free.restype = None
def bloom_run(ctx: c_void_p,
seed: c_int,
n_threads: c_int,
n_batch: c_int,
n_predict: c_int,
match_str: c_bool,
prompt: bytes,
buf: bytes) -> c_int:
return _lib.bloom_run(ctx, seed, n_threads, n_batch, n_predict, match_str, prompt, buf)
_lib.bloom_run.argtypes = [c_void_p, c_int, c_int, c_int, c_int, c_bool, c_char_p, c_char_p]
_lib.bloom_run.restype = c_int
# ------------------------------------------------------------------- #