From 02c583144c0884db3efe352688e2fe21b4e9cd38 Mon Sep 17 00:00:00 2001 From: Shengsheng Huang Date: Mon, 12 Jun 2023 19:22:07 +0800 Subject: [PATCH] [LLM] langchain integrations and examples (#8256) * langchain intergrations and examples * add licences and rename * add licences * fix license issues and change backbone to model_family * update examples to use model_family param * fix linting * fix code style * exclude langchain integration from stylecheck * update langchain examples and update integrations based on latets changes * update simple llama-cpp-python style API example * remove bloom in README * change default n_threads to 2 and remove redundant code --------- Co-authored-by: leonardozcm --- python/llm/dev/test/lint-python | 3 +- python/llm/example/langchain/README.md | 46 +++ python/llm/example/langchain/docqa.py | 89 +++++ python/llm/example/langchain/streamchat.py | 70 ++++ python/llm/example/simple.py | 56 +++ .../src/bigdl/llm/ggml/model/bloom/bloom.py | 2 +- .../llm/src/bigdl/llm/langchain/__init__.py | 20 + .../llm/langchain/embeddings/__init__.py | 26 ++ .../llm/langchain/embeddings/bigdlllm.py | 192 ++++++++++ .../src/bigdl/llm/langchain/llms/__init__.py | 34 ++ .../src/bigdl/llm/langchain/llms/bigdlllm.py | 353 ++++++++++++++++++ 11 files changed, 889 insertions(+), 2 deletions(-) create mode 100644 python/llm/example/langchain/README.md create mode 100644 python/llm/example/langchain/docqa.py create mode 100644 python/llm/example/langchain/streamchat.py create mode 100644 python/llm/example/simple.py create mode 100644 python/llm/src/bigdl/llm/langchain/__init__.py create mode 100644 python/llm/src/bigdl/llm/langchain/embeddings/__init__.py create mode 100644 python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py create mode 100644 python/llm/src/bigdl/llm/langchain/llms/__init__.py create mode 100644 python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py diff --git a/python/llm/dev/test/lint-python b/python/llm/dev/test/lint-python index df5e8def..2cc866c8 100755 --- a/python/llm/dev/test/lint-python +++ b/python/llm/dev/test/lint-python @@ -21,6 +21,7 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" PYTHON_ROOT_DIR="$SCRIPT_DIR/.." echo $PYTHON_ROOT_DIR PATHS_TO_CHECK="$SCRIPT_DIR/../../src" +PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/bigdl/llm/langchain/*" PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt" PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt" PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt" @@ -52,7 +53,7 @@ export "PATH=$PYTHONPATH:$PATH" #+ first, but we do so so that the check status can #+ be output before the report, like with the #+ scalastyle and RAT checks. -python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --exclude=__init__.py,log4Error.py --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" +python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --exclude=$PATTERNS_TO_EXCLUDE --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" pep8_status="${PIPESTATUS[0]}" if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then diff --git a/python/llm/example/langchain/README.md b/python/llm/example/langchain/README.md new file mode 100644 index 00000000..c41084a3 --- /dev/null +++ b/python/llm/example/langchain/README.md @@ -0,0 +1,46 @@ +# Langchain examples + +The examples here shows how to use langchain with `bigdl-llm`. + +## Install bigdl-llm +Follow the instructions in [bigdl-llm docs: Install](). + +## Install Required Dependencies for langchain examples. + +```bash +pip install langchain==0.0.184 +pip install -U chromadb==0.3.25 +pip install -U typing_extensions==4.5.0 +``` + +Note that typing_extensions==4.5.0 is required, or you may encounter error `TypeError: dataclass_transform() got an unexpected keyword argument 'field_specifiers'` when running the examples. + + +## Convert Models using bigdl-llm +Follow the instructions in [bigdl-llm docs: Convert Models](). + + +## Run the examples + +### 1. Streaming Chat + +```bash +python ./streamchat.py -m MODEL_PATH -x MODEL_FAMILY -t THREAD_NUM -q "What is AI?" +``` +arguments info: +- `-m MODEL_PATH`: path to the converted model +- `-x MODEL_FAMILY`: the model family of the model specified in `-m`, available options are `llama`, `gptneox` +- `-q QUESTION `: question to ask. Default is `What is AI?`. +- `-t THREAD_NUM`: required argument defining the number of threads to use for inference. Default is `2`. + +### 2. Question Answering over Docs +```bash +python ./docqa.py --t THREAD_NUM -m -x +``` +arguments info: +- `-m CONVERTED_MODEL_PATH`: path to the converted model in above step +- `-x MODEL_FAMILY`: the model family of the model specified in `-m`, available options are `llama`, `gptneox` +- `-q QUESTION `: question to ask, default question is `What is AI?`. +- `-t THREAD_NUM`: required argument defining the number of threads to use for inference. Default is `2`. + + diff --git a/python/llm/example/langchain/docqa.py b/python/llm/example/langchain/docqa.py new file mode 100644 index 00000000..839edd53 --- /dev/null +++ b/python/llm/example/langchain/docqa.py @@ -0,0 +1,89 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +import argparse + +from langchain.vectorstores import Chroma +from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT, + QA_PROMPT) +from langchain.text_splitter import CharacterTextSplitter +from langchain.chains.question_answering import load_qa_chain +from langchain.callbacks.manager import CallbackManager +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler + +from bigdl.llm.langchain.llms import BigdlLLM +from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings + + + +def main(args): + + input_path = args.input_path + model_path = args.model_path + model_family = args.model_family + query = args.question + n_ctx = args.n_ctx + n_threads=args.thread_num + + + callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) + + # split texts of input doc + with open(input_path) as f: + input_doc = f.read() + text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + texts = text_splitter.split_text(input_doc) + + # create embeddings and store into vectordb + embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx) + docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever() + + #get relavant texts + docs = docsearch.get_relevant_documents(query) + + bigdl_llm = BigdlLLM( + model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager + ) + + doc_chain = load_qa_chain( + bigdl_llm, chain_type="stuff", prompt=QA_PROMPT, callback_manager=callback_manager + ) + + doc_chain.run(input_documents=docs, question=query) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Llama-CPP-Python style API Simple Example') + parser.add_argument('-x','--model-family', type=str, required=True, + help='the model family') + parser.add_argument('-m','--model-path', type=str, required=True, + help='the path to the converted llm model') + parser.add_argument('-i', '--input-path', type=str, + help='the path to the input doc.') + parser.add_argument('-q', '--question', type=str, default='What is AI?', + help='qustion you want to ask.') + parser.add_argument('-c','--n-ctx', type=int, default=2048, + help='number of threads to use for inference') + parser.add_argument('-t','--thread-num', type=int, default=2, + help='number of threads to use for inference') + args = parser.parse_args() + + main(args) diff --git a/python/llm/example/langchain/streamchat.py b/python/llm/example/langchain/streamchat.py new file mode 100644 index 00000000..4ee80211 --- /dev/null +++ b/python/llm/example/langchain/streamchat.py @@ -0,0 +1,70 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +import argparse + +from bigdl.llm.langchain.llms import BigdlLLM +from langchain import PromptTemplate, LLMChain +from langchain.callbacks.manager import CallbackManager +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler + + +def main(args): + + question = args.question + model_path = args.model_path + model_family = args.model_family + n_threads = args.thread_num + + template ="""{question}""" + + prompt = PromptTemplate(template=template, input_variables=["question"]) + + # Callbacks support token-wise streaming + callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) + + # Verbose is required to pass to the callback manager + llm = BigdlLLM( + model_path=model_path, + model_family=model_family, + n_threads=n_threads, + callback_manager=callback_manager, + verbose=True + ) + + llm_chain = LLMChain(prompt=prompt, llm=llm) + + llm_chain.run(question) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Llama-CPP-Python style API Simple Example') + parser.add_argument('-x','--model-family', type=str, required=True, + help='the model family') + parser.add_argument('-m','--model-path', type=str, required=True, + help='the path to the converted llm model') + parser.add_argument('-q', '--question', type=str, default='What is AI?', + help='qustion you want to ask.') + parser.add_argument('-t','--thread-num', type=int, default=2, + help='Number of threads to use for inference') + args = parser.parse_args() + + main(args) diff --git a/python/llm/example/simple.py b/python/llm/example/simple.py new file mode 100644 index 00000000..9e7b8ee1 --- /dev/null +++ b/python/llm/example/simple.py @@ -0,0 +1,56 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +import argparse + +def main(args): + model_family = args.model_family + model_path = args.model_path + prompt = args.prompt + n_threads = args.thread_num + + if model_family == "llama": + from bigdl.llm.models import Llama + modelclass = Llama + if model_family == "bloom": + from bigdl.llm.models import Bloom + modelclass = Bloom + if model_family == "gptneox": + from bigdl.llm.models import Gptneox + modelclass = Gptneox + + model = modelclass(model_path, n_threads=n_threads) + response=model(prompt) + print(response) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Llama-CPP-Python style API Simple Example') + parser.add_argument('-x','--model-family', type=str, required=True, + help='the model family') + parser.add_argument('-m','--model-path', type=str, required=True, + help='the path to the converted llm model') + parser.add_argument('-p', '--prompt', type=str, default='What is AI?', + help='qustion you want to ask.') + parser.add_argument('-t','--thread-num', type=int, default=2, + help='number of threads to use for inference') + args = parser.parse_args() + + main(args) \ No newline at end of file diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py index 376e2fbc..09432482 100644 --- a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py +++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py @@ -94,7 +94,7 @@ class Bloom: self.last_n_tokens_size = last_n_tokens_size self.verbose = verbose - def __call__(self, prompt: str, max_tokens: int, stream: bool = False, + def __call__(self, prompt: str, max_tokens: int = 128, stream: bool = False, stop: Optional[List[str]] = []): if stream: return self.stream(prompt, max_tokens, stop) diff --git a/python/llm/src/bigdl/llm/langchain/__init__.py b/python/llm/src/bigdl/llm/langchain/__init__.py new file mode 100644 index 00000000..dbdafd2a --- /dev/null +++ b/python/llm/src/bigdl/llm/langchain/__init__.py @@ -0,0 +1,20 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py b/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py new file mode 100644 index 00000000..1b19ac26 --- /dev/null +++ b/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py @@ -0,0 +1,26 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +from .bigdlllm import BigdlLLMEmbeddings + +__all__ = [ + "BigdlLLMEmbeddings", +] diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py new file mode 100644 index 00000000..f9c90da3 --- /dev/null +++ b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py @@ -0,0 +1,192 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +# This file is adapted from +# https://github.com/hwchase17/langchain/blob/master/langchain/embeddings/llamacpp.py + +# The MIT License + +# Copyright (c) Harrison Chase + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +"""Wrapper around BigdlLLM embedding models.""" +import importlib +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.embeddings.base import Embeddings + + +class BigdlLLMEmbeddings(BaseModel, Embeddings): + """Wrapper around bigdl-llm embedding models. + + Example: + .. code-block:: python + + from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings + llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin") + """ + + model_family: str = "llama" + """the model family: currently supports llama, gptneox, and bloom.""" + + family_info = { + 'llama': {'module': "bigdl.llm.models", 'class': "Llama"}, + 'bloom': {'module': "bigdl.llm.models", 'class': "Bloom"}, + 'gptneox': {'module': "bigdl.llm.models", 'class': "Gptneox"}, + } #: :meta private: + """info necessary for different model family initiation and configure""" + + client: Any #: :meta private: + model_path: str + + n_ctx: int = Field(512, alias="n_ctx") + """Token context window.""" + + n_parts: int = Field(-1, alias="n_parts") + """Number of parts to split the model into. + If -1, the number of parts is automatically determined.""" + + seed: int = Field(-1, alias="seed") + """Seed. If -1, a random seed is used.""" + + f16_kv: bool = Field(False, alias="f16_kv") + """Use half-precision for key/value cache.""" + + logits_all: bool = Field(False, alias="logits_all") + """Return logits for all tokens, not just the last token.""" + + vocab_only: bool = Field(False, alias="vocab_only") + """Only load the vocabulary, no weights.""" + + use_mlock: bool = Field(False, alias="use_mlock") + """Force system to keep model in RAM.""" + + n_threads: Optional[int] = Field(2, alias="n_threads") + """Number of threads to use.""" + + n_batch: Optional[int] = Field(8, alias="n_batch") + """Number of tokens to process in parallel. + Should be a number between 1 and n_ctx.""" + + n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers") + """Number of layers to be loaded into gpu memory. Default None.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that bigdl-llm library is installed.""" + model_path = values["model_path"] + model_param_names = [ + "n_ctx", + "n_parts", + "seed", + "f16_kv", + "logits_all", + "vocab_only", + "use_mlock", + "n_threads", + "n_batch", + ] + model_params = {k: values[k] for k in model_param_names} + # For backwards compatibility, only include if non-null. + if values["n_gpu_layers"] is not None: + model_params["n_gpu_layers"] = values["n_gpu_layers"] + + model_family = values["model_family"].lower() + if model_family not in list(values["family_info"].keys()): + raise ValueError("Model family '%s' is not supported. Valid" \ + " values are %s" % (values["model_family"], + ','.join(list(values["family_info"].keys())))) + + try: + + b_info = values["family_info"][model_family] + module = importlib.import_module(b_info['module']) + class_ = getattr(module, b_info['class']) + + values["client"] = class_(model_path, embedding=True, **model_params) + + # from bigdl.llm.ggml.model.llama import Llama + + # values["client"] = Llama(model_path, embedding=True, **model_params) + + except ImportError: + raise ModuleNotFoundError( + "Could not import bigdl-llm library. " + "Please install the bigdl-llm library to " + "use this embedding model: pip install bigdl-llm" + ) + except Exception as e: + raise ValueError( + f"Could not load Llama model from path: {model_path}. " + f"Please make sure the model family {model_family} matches " + "the model you want to load." + f"Received error {e}" + ) + + return values + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed a list of documents using the Llama model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + embeddings = [self.client.embed(text) for text in texts] + return [list(map(float, e)) for e in embeddings] + + def embed_query(self, text: str) -> List[float]: + """Embed a query using the Llama model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + embedding = self.client.embed(text) + return list(map(float, embedding)) diff --git a/python/llm/src/bigdl/llm/langchain/llms/__init__.py b/python/llm/src/bigdl/llm/langchain/llms/__init__.py new file mode 100644 index 00000000..dbc75f61 --- /dev/null +++ b/python/llm/src/bigdl/llm/langchain/llms/__init__.py @@ -0,0 +1,34 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +"""Wrappers on top of large language models APIs.""" +from typing import Dict, Type +from langchain.llms.base import BaseLLM + +from .bigdlllm import BigdlLLM + +__all__ = [ + "BigdlLLM", +] + +type_to_cls_dict: Dict[str, Type[BaseLLM]] = { + "BigdlLLM": BigdlLLM, +} \ No newline at end of file diff --git a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py new file mode 100644 index 00000000..b732311c --- /dev/null +++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py @@ -0,0 +1,353 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This would makes sure Python is aware there is more than one sub-package within bigdl, +# physically located elsewhere. +# Otherwise there would be module not found error in non-pip's setting as Python would +# only search the first bigdl package and end up finding only one sub-package. + +# This file is adapted from +# https://github.com/hwchase17/langchain/blob/master/langchain/llms/llamacpp.py + +# The MIT License + +# Copyright (c) Harrison Chase + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import importlib +from typing import Any, Dict, Generator, List, Optional + +from pydantic import Field, root_validator + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM + + + +class BigdlLLM(LLM): + """Wrapper around the BigDL-LLM + + Example: + .. code-block:: python + + from langchain.llms import BigdlLLM + llm = BigdlLLM(model_path="/path/to/llama/model") + """ + + + model_family: str = "llama" + """the model family: currently supports llama, gptneox, and bloom.""" + + + family_info = { + 'llama': {'module': "bigdl.llm.models" , 'class': "Llama"}, + 'bloom': {'module': "bigdl.llm.models", 'class': "Bloom"}, + 'gptneox': {'module': "bigdl.llm.models", 'class': "Gptneox"}, + } #: :meta private: + """info necessary for different model families initiation and configure""" + + client: Any #: :meta private: + """the actual model""" + + model_path: str + """The path to the Llama model file.""" + + lora_base: Optional[str] = None + """The path to the Llama LoRA base model.""" + + lora_path: Optional[str] = None + """The path to the Llama LoRA. If None, no LoRa is loaded.""" + + n_ctx: int = Field(512, alias="n_ctx") + """Token context window.""" + + n_parts: int = Field(-1, alias="n_parts") + """Number of parts to split the model into. + If -1, the number of parts is automatically determined.""" + + seed: int = Field(-1, alias="seed") + """Seed. If -1, a random seed is used.""" + + f16_kv: bool = Field(True, alias="f16_kv") + """Use half-precision for key/value cache.""" + + logits_all: bool = Field(False, alias="logits_all") + """Return logits for all tokens, not just the last token.""" + + vocab_only: bool = Field(False, alias="vocab_only") + """Only load the vocabulary, no weights.""" + + use_mlock: bool = Field(False, alias="use_mlock") + """Force system to keep model in RAM.""" + + n_threads: Optional[int] = Field(2, alias="n_threads") + """Number of threads to use.""" + + n_batch: Optional[int] = Field(8, alias="n_batch") + """Number of tokens to process in parallel. + Should be a number between 1 and n_ctx.""" + + n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers") + """Number of layers to be loaded into gpu memory. Default None.""" + + suffix: Optional[str] = Field(None) + """A suffix to append to the generated text. If None, no suffix is appended.""" + + max_tokens: Optional[int] = 256 + """The maximum number of tokens to generate.""" + + temperature: Optional[float] = 0.8 + """The temperature to use for sampling.""" + + top_p: Optional[float] = 0.95 + """The top-p value to use for sampling.""" + + logprobs: Optional[int] = Field(None) + """The number of logprobs to return. If None, no logprobs are returned.""" + + echo: Optional[bool] = False + """Whether to echo the prompt.""" + + stop: Optional[List[str]] = [] + """A list of strings to stop generation when encountered.""" + + repeat_penalty: Optional[float] = 1.1 + """The penalty to apply to repeated tokens.""" + + top_k: Optional[int] = 40 + """The top-k value to use for sampling.""" + + last_n_tokens_size: Optional[int] = 64 + """The number of tokens to look back when applying the repeat_penalty.""" + + use_mmap: Optional[bool] = True + """Whether to keep the model loaded in RAM""" + + streaming: bool = True + """Whether to stream the results, token by token.""" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that bigdl-llm is installed, family is supported""" + + model_path = values["model_path"] + model_param_names = [ + "lora_path", + "lora_base", + "n_ctx", + "n_parts", + "seed", + "f16_kv", + "logits_all", + "vocab_only", + "use_mlock", + "n_threads", + "n_batch", + "use_mmap", + "last_n_tokens_size", + ] + model_params = {k: values[k] for k in model_param_names} + # For backwards compatibility, only include if non-null. + if values["n_gpu_layers"] is not None: + model_params["n_gpu_layers"] = values["n_gpu_layers"] + + model_family = values["model_family"].lower() + if model_family not in list(values["family_info"].keys()): + raise ValueError("Model family '%s' is not supported. Valid" \ + " values are %s"%(values["model_family"], + ','.join(list(values["family_info"].keys())))) + + try: + + b_info = values["family_info"][model_family] + module = importlib.import_module(b_info['module']) + class_ = getattr(module, b_info['class']) + + values["client"] = class_(model_path, **model_params) + + except ImportError: + raise ModuleNotFoundError( + "Could not import llama-cpp-python library. " + "Please install the llama-cpp-python library to " + "use this embedding model: pip install llama-cpp-python" + ) + except Exception as e: + raise ValueError( + f"Could not load model from path: {model_path}. " + f"Please make sure the model family {model_family} matches " + "the model you want to load." + f"Received error {e}" + ) + + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling llama_cpp.""" + return { + "suffix": self.suffix, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "top_p": self.top_p, + "logprobs": self.logprobs, + "echo": self.echo, + "stop_sequences": self.stop, # key here is convention among LLM classes + "repeat_penalty": self.repeat_penalty, + "top_k": self.top_k, + } + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return {**{"model_path": self.model_path, + "model_family": self.model_family}, + **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "BigDL" + + def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]: + """ + Performs sanity check, preparing parameters in format needed by llama_cpp. + + Args: + stop (Optional[List[str]]): List of stop sequences for llama_cpp. + + Returns: + Dictionary containing the combined parameters. + """ + + # Raise error if stop sequences are in both input and default params + if self.stop and stop is not None: + raise ValueError("`stop` found in both the input and default params.") + + params = self._default_params + + # llama_cpp expects the "stop" key not this, so we remove it: + params.pop("stop_sequences") + + # then sets it as configured, or default to an empty list: + params["stop"] = self.stop or stop or [] + + return params + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + ) -> str: + """Call the Llama model and return the output. + + Args: + prompt: The prompt to use for generation. + stop: A list of strings to stop generation when encountered. + + Returns: + The generated text. + + Example: + .. code-block:: python + + from langchain.llms import BigdlLLM + llm = BigdlLLM(model_path="/path/to/local/llama/model.bin") + llm("This is a prompt.") + """ + if self.streaming: + # If streaming is enabled, we use the stream + # method that yields as they are generated + # and return the combined strings from the first choices's text: + combined_text_output = "" + for token in self.stream(prompt=prompt, stop=stop, run_manager=run_manager): + combined_text_output += token["choices"][0]["text"] + return combined_text_output + else: + params = self._get_parameters(stop) + result = self.client(prompt=prompt, **params) + return result["choices"][0]["text"] + + def stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + ) -> Generator[Dict, None, None]: + """Yields results objects as they are generated in real time. + + BETA: this is a beta feature while we figure out the right abstraction. + Once that happens, this interface could change. + + It also calls the callback manager's on_llm_new_token event with + similar parameters to the OpenAI LLM class method of the same name. + + Args: + prompt: The prompts to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + A generator representing the stream of tokens being generated. + + Yields: + A dictionary like objects containing a string token and metadata. + See llama-cpp-python docs and below for more. + + Example: + .. code-block:: python + + from langchain.llms import BigdlLLM + llm = BigdlLLM( + model_path="/path/to/local/model.bin", + temperature = 0.5 + ) + for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'", + stop=["'","\n"]): + result = chunk["choices"][0] + print(result["text"], end='', flush=True) + + """ + params = self._get_parameters(stop) + result = self.client(prompt=prompt, stream=True, **params) + for chunk in result: + token = chunk["choices"][0]["text"] + log_probs = chunk["choices"][0].get("logprobs", None) + if run_manager: + run_manager.on_llm_new_token( + token=token, verbose=self.verbose, log_probs=log_probs + ) + yield chunk + + def get_num_tokens(self, text: str) -> int: + tokenized_text = self.client.tokenize(text.encode("utf-8")) + return len(tokenized_text)