diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/__init__.py b/python/llm/src/bigdl/llm/ggml/model/bloom/__init__.py new file mode 100644 index 00000000..b578caae --- /dev/null +++ b/python/llm/src/bigdl/llm/ggml/model/bloom/__init__.py @@ -0,0 +1,22 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +from .bloom import Bloom diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py new file mode 100644 index 00000000..52c883ee --- /dev/null +++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py @@ -0,0 +1,129 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# =========================================================================== +# +# This file is adapted from +# https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py +# +# MIT License +# +# Copyright (c) 2023 Andrei Betlen +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +from .bloom_cpp import bloom_load, bloom_free, bloom_run + + +class Bloom: + """High-level Python wrapper for a bloom.cpp model.""" + + def __init__(self, + model_path: str, + n_ctx: int = 512, + seed: int = 1337, + logits_all: bool = False, + n_threads: int = -1, + n_batch: int = 8, + last_n_tokens_size: int = 64, + verbose: bool = True, + ): + """Load a bloom.cpp model from `model_path`. + + Args: + model_path: Path to the model. + n_ctx: Maximum context size. + seed: Random seed. 0 for random. + logits_all: Return logits for all tokens, not just the last token. + n_threads: Number of threads to use. + If None, the number of threads is automatically determined. + n_batch: Maximum number of prompt tokens to batch together when calling llama_eval. + last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque. + verbose: Print verbose output to stderr. + + Raises: + ValueError: If the model path does not exist. + + Returns: + A Bloom instance. + """ + self.ctx = bloom_load(bytes(model_path, encoding='utf-8'), n_ctx, n_threads) + if not self.ctx: + raise RuntimeError(f"Failed to load model from {model_path}") + self.n_ctx = n_ctx + self.seed = seed + self.logits_all = logits_all + self.n_threads = n_threads + self.n_batch = n_batch + self.last_n_tokens_size = last_n_tokens_size + self.verbose = verbose + + def __call__(self, prompt: str, max_tokens: int, stream: bool = False): + if stream: + return self.stream(prompt, max_tokens) + else: + return self._eval(prompt, max_tokens, False) + + def _eval(self, prompt: str, max_tokens: int, match_str: bool): + if prompt.endswith("") or max_tokens < 1: + return prompt + # use `buf` to store prompt and generated string, + # assume the average length of words is less than 20 bytes + buf = bytes((len(prompt) + max_tokens) * 20) + ret = bloom_run(ctx=self.ctx, + seed=self.seed, + n_threads=self.n_threads, + n_batch=self.n_batch, + n_predict=max_tokens, + match_str=match_str, + prompt=bytes(prompt, encoding='utf-8'), + buf=buf) + s = str(buf, encoding='utf-8').rstrip("\x00") + return s + + def stream(self, prompt: str, max_tokens: int): + if prompt.endswith("") or max_tokens < 1: + yield prompt + else: + for i in range(max_tokens): + if prompt.endswith(""): + break + else: + prompt = self._eval(prompt, 1, i != 0) + yield prompt + + def free(self): + bloom_free(self.ctx) diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py new file mode 100644 index 00000000..e46cc352 --- /dev/null +++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py @@ -0,0 +1,146 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# =========================================================================== +# +# This file is adapted from +# https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama_cpp.py +# +# MIT License +# +# Copyright (c) 2023 Andrei Betlen +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +import sys +import os +import ctypes +from ctypes import ( + c_int, + c_float, + c_char_p, + c_void_p, + c_bool, + POINTER, + Structure, + Array, + c_uint8, + c_size_t, +) +import pathlib + + +# Load the library +def _load_shared_library(lib_base_name: str): + # Determine the file extension based on the platform + if sys.platform.startswith("linux"): + lib_ext = ".so" + elif sys.platform == "darwin": + lib_ext = ".so" + elif sys.platform == "win32": + lib_ext = ".dll" + else: + raise RuntimeError("Unsupported platform") + + # Construct the paths to the possible shared library names (python/llm/src/bigdl/llm/libs) + _base_path = pathlib.Path(__file__).parent.parent.parent.parent.resolve() + _base_path = _base_path / 'libs' + # Searching for the library in the current directory under the name "libbloom" (default name + # for bloomcpp) and "bloom" (default name for this repo) + _lib_paths = [ + _base_path / f"lib{lib_base_name}{lib_ext}", + _base_path / f"{lib_base_name}{lib_ext}", + ] + + if "BLOOM_CPP_LIB" in os.environ: + lib_base_name = os.environ["BLOOM_CPP_LIB"] + _lib = pathlib.Path(lib_base_name) + _base_path = _lib.parent.resolve() + _lib_paths = [_lib.resolve()] + + # Add the library directory to the DLL search path on Windows (if needed) + if sys.platform == "win32" and sys.version_info >= (3, 8): + os.add_dll_directory(str(_base_path)) + + # Try to load the shared library, handling potential errors + for _lib_path in _lib_paths: + if _lib_path.exists(): + try: + return ctypes.CDLL(str(_lib_path)) + except Exception as e: + raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}") + + raise FileNotFoundError(f"Shared library with base name '{lib_base_name}' not found") + + +# Specify the base name of the shared library to load +_lib_base_name = "bloom" + +# Load the library +_lib = _load_shared_library(_lib_base_name) + + +def bloom_load(fname: bytes, n_ctx: c_int, n_threads: c_int) -> c_void_p: + return _lib.bloom_load(fname, n_ctx, n_threads) + + +_lib.bloom_load.argtypes = [c_char_p, c_int, c_int] +_lib.bloom_load.restype = c_void_p + + +def bloom_free(ctx: c_void_p): + return _lib.bloom_free(ctx) + + +_lib.bloom_free.argtypes = [c_void_p] +_lib.bloom_free.restype = None + + +def bloom_run(ctx: c_void_p, + seed: c_int, + n_threads: c_int, + n_batch: c_int, + n_predict: c_int, + match_str: c_bool, + prompt: bytes, + buf: bytes) -> c_int: + return _lib.bloom_run(ctx, seed, n_threads, n_batch, n_predict, match_str, prompt, buf) + + +_lib.bloom_run.argtypes = [c_void_p, c_int, c_int, c_int, c_int, c_bool, c_char_p, c_char_p] +_lib.bloom_run.restype = c_int + +# ------------------------------------------------------------------- #