[LLM] langchain integrations and examples (#8256)
* langchain intergrations and examples * add licences and rename * add licences * fix license issues and change backbone to model_family * update examples to use model_family param * fix linting * fix code style * exclude langchain integration from stylecheck * update langchain examples and update integrations based on latets changes * update simple llama-cpp-python style API example * remove bloom in README * change default n_threads to 2 and remove redundant code --------- Co-authored-by: leonardozcm <changmin.zhao@intel.com>
This commit is contained in:
parent
f83c48280f
commit
02c583144c
11 changed files with 889 additions and 2 deletions
|
|
@ -21,6 +21,7 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
|
|||
PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
|
||||
echo $PYTHON_ROOT_DIR
|
||||
PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
|
||||
PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/bigdl/llm/langchain/*"
|
||||
PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
|
||||
PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
|
||||
PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"
|
||||
|
|
@ -52,7 +53,7 @@ export "PATH=$PYTHONPATH:$PATH"
|
|||
#+ first, but we do so so that the check status can
|
||||
#+ be output before the report, like with the
|
||||
#+ scalastyle and RAT checks.
|
||||
python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --exclude=__init__.py,log4Error.py --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH"
|
||||
python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --exclude=$PATTERNS_TO_EXCLUDE --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH"
|
||||
pep8_status="${PIPESTATUS[0]}"
|
||||
|
||||
if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then
|
||||
|
|
|
|||
46
python/llm/example/langchain/README.md
Normal file
46
python/llm/example/langchain/README.md
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# Langchain examples
|
||||
|
||||
The examples here shows how to use langchain with `bigdl-llm`.
|
||||
|
||||
## Install bigdl-llm
|
||||
Follow the instructions in [bigdl-llm docs: Install]().
|
||||
|
||||
## Install Required Dependencies for langchain examples.
|
||||
|
||||
```bash
|
||||
pip install langchain==0.0.184
|
||||
pip install -U chromadb==0.3.25
|
||||
pip install -U typing_extensions==4.5.0
|
||||
```
|
||||
|
||||
Note that typing_extensions==4.5.0 is required, or you may encounter error `TypeError: dataclass_transform() got an unexpected keyword argument 'field_specifiers'` when running the examples.
|
||||
|
||||
|
||||
## Convert Models using bigdl-llm
|
||||
Follow the instructions in [bigdl-llm docs: Convert Models]().
|
||||
|
||||
|
||||
## Run the examples
|
||||
|
||||
### 1. Streaming Chat
|
||||
|
||||
```bash
|
||||
python ./streamchat.py -m MODEL_PATH -x MODEL_FAMILY -t THREAD_NUM -q "What is AI?"
|
||||
```
|
||||
arguments info:
|
||||
- `-m MODEL_PATH`: path to the converted model
|
||||
- `-x MODEL_FAMILY`: the model family of the model specified in `-m`, available options are `llama`, `gptneox`
|
||||
- `-q QUESTION `: question to ask. Default is `What is AI?`.
|
||||
- `-t THREAD_NUM`: required argument defining the number of threads to use for inference. Default is `2`.
|
||||
|
||||
### 2. Question Answering over Docs
|
||||
```bash
|
||||
python ./docqa.py --t THREAD_NUM -m -x
|
||||
```
|
||||
arguments info:
|
||||
- `-m CONVERTED_MODEL_PATH`: path to the converted model in above step
|
||||
- `-x MODEL_FAMILY`: the model family of the model specified in `-m`, available options are `llama`, `gptneox`
|
||||
- `-q QUESTION `: question to ask, default question is `What is AI?`.
|
||||
- `-t THREAD_NUM`: required argument defining the number of threads to use for inference. Default is `2`.
|
||||
|
||||
|
||||
89
python/llm/example/langchain/docqa.py
Normal file
89
python/llm/example/langchain/docqa.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
import argparse
|
||||
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT,
|
||||
QA_PROMPT)
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.callbacks.manager import CallbackManager
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
|
||||
from bigdl.llm.langchain.llms import BigdlLLM
|
||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
||||
|
||||
|
||||
|
||||
def main(args):
|
||||
|
||||
input_path = args.input_path
|
||||
model_path = args.model_path
|
||||
model_family = args.model_family
|
||||
query = args.question
|
||||
n_ctx = args.n_ctx
|
||||
n_threads=args.thread_num
|
||||
|
||||
|
||||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
||||
|
||||
# split texts of input doc
|
||||
with open(input_path) as f:
|
||||
input_doc = f.read()
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
texts = text_splitter.split_text(input_doc)
|
||||
|
||||
# create embeddings and store into vectordb
|
||||
embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
|
||||
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
|
||||
|
||||
#get relavant texts
|
||||
docs = docsearch.get_relevant_documents(query)
|
||||
|
||||
bigdl_llm = BigdlLLM(
|
||||
model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
|
||||
)
|
||||
|
||||
doc_chain = load_qa_chain(
|
||||
bigdl_llm, chain_type="stuff", prompt=QA_PROMPT, callback_manager=callback_manager
|
||||
)
|
||||
|
||||
doc_chain.run(input_documents=docs, question=query)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Llama-CPP-Python style API Simple Example')
|
||||
parser.add_argument('-x','--model-family', type=str, required=True,
|
||||
help='the model family')
|
||||
parser.add_argument('-m','--model-path', type=str, required=True,
|
||||
help='the path to the converted llm model')
|
||||
parser.add_argument('-i', '--input-path', type=str,
|
||||
help='the path to the input doc.')
|
||||
parser.add_argument('-q', '--question', type=str, default='What is AI?',
|
||||
help='qustion you want to ask.')
|
||||
parser.add_argument('-c','--n-ctx', type=int, default=2048,
|
||||
help='number of threads to use for inference')
|
||||
parser.add_argument('-t','--thread-num', type=int, default=2,
|
||||
help='number of threads to use for inference')
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args)
|
||||
70
python/llm/example/langchain/streamchat.py
Normal file
70
python/llm/example/langchain/streamchat.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
import argparse
|
||||
|
||||
from bigdl.llm.langchain.llms import BigdlLLM
|
||||
from langchain import PromptTemplate, LLMChain
|
||||
from langchain.callbacks.manager import CallbackManager
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
|
||||
|
||||
def main(args):
|
||||
|
||||
question = args.question
|
||||
model_path = args.model_path
|
||||
model_family = args.model_family
|
||||
n_threads = args.thread_num
|
||||
|
||||
template ="""{question}"""
|
||||
|
||||
prompt = PromptTemplate(template=template, input_variables=["question"])
|
||||
|
||||
# Callbacks support token-wise streaming
|
||||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
||||
|
||||
# Verbose is required to pass to the callback manager
|
||||
llm = BigdlLLM(
|
||||
model_path=model_path,
|
||||
model_family=model_family,
|
||||
n_threads=n_threads,
|
||||
callback_manager=callback_manager,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
||||
|
||||
llm_chain.run(question)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Llama-CPP-Python style API Simple Example')
|
||||
parser.add_argument('-x','--model-family', type=str, required=True,
|
||||
help='the model family')
|
||||
parser.add_argument('-m','--model-path', type=str, required=True,
|
||||
help='the path to the converted llm model')
|
||||
parser.add_argument('-q', '--question', type=str, default='What is AI?',
|
||||
help='qustion you want to ask.')
|
||||
parser.add_argument('-t','--thread-num', type=int, default=2,
|
||||
help='Number of threads to use for inference')
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args)
|
||||
56
python/llm/example/simple.py
Normal file
56
python/llm/example/simple.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
import argparse
|
||||
|
||||
def main(args):
|
||||
model_family = args.model_family
|
||||
model_path = args.model_path
|
||||
prompt = args.prompt
|
||||
n_threads = args.thread_num
|
||||
|
||||
if model_family == "llama":
|
||||
from bigdl.llm.models import Llama
|
||||
modelclass = Llama
|
||||
if model_family == "bloom":
|
||||
from bigdl.llm.models import Bloom
|
||||
modelclass = Bloom
|
||||
if model_family == "gptneox":
|
||||
from bigdl.llm.models import Gptneox
|
||||
modelclass = Gptneox
|
||||
|
||||
model = modelclass(model_path, n_threads=n_threads)
|
||||
response=model(prompt)
|
||||
print(response)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Llama-CPP-Python style API Simple Example')
|
||||
parser.add_argument('-x','--model-family', type=str, required=True,
|
||||
help='the model family')
|
||||
parser.add_argument('-m','--model-path', type=str, required=True,
|
||||
help='the path to the converted llm model')
|
||||
parser.add_argument('-p', '--prompt', type=str, default='What is AI?',
|
||||
help='qustion you want to ask.')
|
||||
parser.add_argument('-t','--thread-num', type=int, default=2,
|
||||
help='number of threads to use for inference')
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args)
|
||||
|
|
@ -94,7 +94,7 @@ class Bloom:
|
|||
self.last_n_tokens_size = last_n_tokens_size
|
||||
self.verbose = verbose
|
||||
|
||||
def __call__(self, prompt: str, max_tokens: int, stream: bool = False,
|
||||
def __call__(self, prompt: str, max_tokens: int = 128, stream: bool = False,
|
||||
stop: Optional[List[str]] = []):
|
||||
if stream:
|
||||
return self.stream(prompt, max_tokens, stop)
|
||||
|
|
|
|||
20
python/llm/src/bigdl/llm/langchain/__init__.py
Normal file
20
python/llm/src/bigdl/llm/langchain/__init__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
26
python/llm/src/bigdl/llm/langchain/embeddings/__init__.py
Normal file
26
python/llm/src/bigdl/llm/langchain/embeddings/__init__.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
from .bigdlllm import BigdlLLMEmbeddings
|
||||
|
||||
__all__ = [
|
||||
"BigdlLLMEmbeddings",
|
||||
]
|
||||
192
python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
Normal file
192
python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
# This file is adapted from
|
||||
# https://github.com/hwchase17/langchain/blob/master/langchain/embeddings/llamacpp.py
|
||||
|
||||
# The MIT License
|
||||
|
||||
# Copyright (c) Harrison Chase
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
"""Wrapper around BigdlLLM embedding models."""
|
||||
import importlib
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class BigdlLLMEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around bigdl-llm embedding models.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
||||
llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin")
|
||||
"""
|
||||
|
||||
model_family: str = "llama"
|
||||
"""the model family: currently supports llama, gptneox, and bloom."""
|
||||
|
||||
family_info = {
|
||||
'llama': {'module': "bigdl.llm.models", 'class': "Llama"},
|
||||
'bloom': {'module': "bigdl.llm.models", 'class': "Bloom"},
|
||||
'gptneox': {'module': "bigdl.llm.models", 'class': "Gptneox"},
|
||||
} #: :meta private:
|
||||
"""info necessary for different model family initiation and configure"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
model_path: str
|
||||
|
||||
n_ctx: int = Field(512, alias="n_ctx")
|
||||
"""Token context window."""
|
||||
|
||||
n_parts: int = Field(-1, alias="n_parts")
|
||||
"""Number of parts to split the model into.
|
||||
If -1, the number of parts is automatically determined."""
|
||||
|
||||
seed: int = Field(-1, alias="seed")
|
||||
"""Seed. If -1, a random seed is used."""
|
||||
|
||||
f16_kv: bool = Field(False, alias="f16_kv")
|
||||
"""Use half-precision for key/value cache."""
|
||||
|
||||
logits_all: bool = Field(False, alias="logits_all")
|
||||
"""Return logits for all tokens, not just the last token."""
|
||||
|
||||
vocab_only: bool = Field(False, alias="vocab_only")
|
||||
"""Only load the vocabulary, no weights."""
|
||||
|
||||
use_mlock: bool = Field(False, alias="use_mlock")
|
||||
"""Force system to keep model in RAM."""
|
||||
|
||||
n_threads: Optional[int] = Field(2, alias="n_threads")
|
||||
"""Number of threads to use."""
|
||||
|
||||
n_batch: Optional[int] = Field(8, alias="n_batch")
|
||||
"""Number of tokens to process in parallel.
|
||||
Should be a number between 1 and n_ctx."""
|
||||
|
||||
n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
|
||||
"""Number of layers to be loaded into gpu memory. Default None."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that bigdl-llm library is installed."""
|
||||
model_path = values["model_path"]
|
||||
model_param_names = [
|
||||
"n_ctx",
|
||||
"n_parts",
|
||||
"seed",
|
||||
"f16_kv",
|
||||
"logits_all",
|
||||
"vocab_only",
|
||||
"use_mlock",
|
||||
"n_threads",
|
||||
"n_batch",
|
||||
]
|
||||
model_params = {k: values[k] for k in model_param_names}
|
||||
# For backwards compatibility, only include if non-null.
|
||||
if values["n_gpu_layers"] is not None:
|
||||
model_params["n_gpu_layers"] = values["n_gpu_layers"]
|
||||
|
||||
model_family = values["model_family"].lower()
|
||||
if model_family not in list(values["family_info"].keys()):
|
||||
raise ValueError("Model family '%s' is not supported. Valid" \
|
||||
" values are %s" % (values["model_family"],
|
||||
','.join(list(values["family_info"].keys()))))
|
||||
|
||||
try:
|
||||
|
||||
b_info = values["family_info"][model_family]
|
||||
module = importlib.import_module(b_info['module'])
|
||||
class_ = getattr(module, b_info['class'])
|
||||
|
||||
values["client"] = class_(model_path, embedding=True, **model_params)
|
||||
|
||||
# from bigdl.llm.ggml.model.llama import Llama
|
||||
|
||||
# values["client"] = Llama(model_path, embedding=True, **model_params)
|
||||
|
||||
except ImportError:
|
||||
raise ModuleNotFoundError(
|
||||
"Could not import bigdl-llm library. "
|
||||
"Please install the bigdl-llm library to "
|
||||
"use this embedding model: pip install bigdl-llm"
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Could not load Llama model from path: {model_path}. "
|
||||
f"Please make sure the model family {model_family} matches "
|
||||
"the model you want to load."
|
||||
f"Received error {e}"
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Embed a list of documents using the Llama model.
|
||||
|
||||
Args:
|
||||
texts: The list of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embeddings, one for each text.
|
||||
"""
|
||||
embeddings = [self.client.embed(text) for text in texts]
|
||||
return [list(map(float, e)) for e in embeddings]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Embed a query using the Llama model.
|
||||
|
||||
Args:
|
||||
text: The text to embed.
|
||||
|
||||
Returns:
|
||||
Embeddings for the text.
|
||||
"""
|
||||
embedding = self.client.embed(text)
|
||||
return list(map(float, embedding))
|
||||
34
python/llm/src/bigdl/llm/langchain/llms/__init__.py
Normal file
34
python/llm/src/bigdl/llm/langchain/llms/__init__.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
"""Wrappers on top of large language models APIs."""
|
||||
from typing import Dict, Type
|
||||
from langchain.llms.base import BaseLLM
|
||||
|
||||
from .bigdlllm import BigdlLLM
|
||||
|
||||
__all__ = [
|
||||
"BigdlLLM",
|
||||
]
|
||||
|
||||
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||
"BigdlLLM": BigdlLLM,
|
||||
}
|
||||
353
python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
Normal file
353
python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
Normal file
|
|
@ -0,0 +1,353 @@
|
|||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# This would makes sure Python is aware there is more than one sub-package within bigdl,
|
||||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
# This file is adapted from
|
||||
# https://github.com/hwchase17/langchain/blob/master/langchain/llms/llamacpp.py
|
||||
|
||||
# The MIT License
|
||||
|
||||
# Copyright (c) Harrison Chase
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
import importlib
|
||||
from typing import Any, Dict, Generator, List, Optional
|
||||
|
||||
from pydantic import Field, root_validator
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain.llms.base import LLM
|
||||
|
||||
|
||||
|
||||
class BigdlLLM(LLM):
|
||||
"""Wrapper around the BigDL-LLM
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import BigdlLLM
|
||||
llm = BigdlLLM(model_path="/path/to/llama/model")
|
||||
"""
|
||||
|
||||
|
||||
model_family: str = "llama"
|
||||
"""the model family: currently supports llama, gptneox, and bloom."""
|
||||
|
||||
|
||||
family_info = {
|
||||
'llama': {'module': "bigdl.llm.models" , 'class': "Llama"},
|
||||
'bloom': {'module': "bigdl.llm.models", 'class': "Bloom"},
|
||||
'gptneox': {'module': "bigdl.llm.models", 'class': "Gptneox"},
|
||||
} #: :meta private:
|
||||
"""info necessary for different model families initiation and configure"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
"""the actual model"""
|
||||
|
||||
model_path: str
|
||||
"""The path to the Llama model file."""
|
||||
|
||||
lora_base: Optional[str] = None
|
||||
"""The path to the Llama LoRA base model."""
|
||||
|
||||
lora_path: Optional[str] = None
|
||||
"""The path to the Llama LoRA. If None, no LoRa is loaded."""
|
||||
|
||||
n_ctx: int = Field(512, alias="n_ctx")
|
||||
"""Token context window."""
|
||||
|
||||
n_parts: int = Field(-1, alias="n_parts")
|
||||
"""Number of parts to split the model into.
|
||||
If -1, the number of parts is automatically determined."""
|
||||
|
||||
seed: int = Field(-1, alias="seed")
|
||||
"""Seed. If -1, a random seed is used."""
|
||||
|
||||
f16_kv: bool = Field(True, alias="f16_kv")
|
||||
"""Use half-precision for key/value cache."""
|
||||
|
||||
logits_all: bool = Field(False, alias="logits_all")
|
||||
"""Return logits for all tokens, not just the last token."""
|
||||
|
||||
vocab_only: bool = Field(False, alias="vocab_only")
|
||||
"""Only load the vocabulary, no weights."""
|
||||
|
||||
use_mlock: bool = Field(False, alias="use_mlock")
|
||||
"""Force system to keep model in RAM."""
|
||||
|
||||
n_threads: Optional[int] = Field(2, alias="n_threads")
|
||||
"""Number of threads to use."""
|
||||
|
||||
n_batch: Optional[int] = Field(8, alias="n_batch")
|
||||
"""Number of tokens to process in parallel.
|
||||
Should be a number between 1 and n_ctx."""
|
||||
|
||||
n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
|
||||
"""Number of layers to be loaded into gpu memory. Default None."""
|
||||
|
||||
suffix: Optional[str] = Field(None)
|
||||
"""A suffix to append to the generated text. If None, no suffix is appended."""
|
||||
|
||||
max_tokens: Optional[int] = 256
|
||||
"""The maximum number of tokens to generate."""
|
||||
|
||||
temperature: Optional[float] = 0.8
|
||||
"""The temperature to use for sampling."""
|
||||
|
||||
top_p: Optional[float] = 0.95
|
||||
"""The top-p value to use for sampling."""
|
||||
|
||||
logprobs: Optional[int] = Field(None)
|
||||
"""The number of logprobs to return. If None, no logprobs are returned."""
|
||||
|
||||
echo: Optional[bool] = False
|
||||
"""Whether to echo the prompt."""
|
||||
|
||||
stop: Optional[List[str]] = []
|
||||
"""A list of strings to stop generation when encountered."""
|
||||
|
||||
repeat_penalty: Optional[float] = 1.1
|
||||
"""The penalty to apply to repeated tokens."""
|
||||
|
||||
top_k: Optional[int] = 40
|
||||
"""The top-k value to use for sampling."""
|
||||
|
||||
last_n_tokens_size: Optional[int] = 64
|
||||
"""The number of tokens to look back when applying the repeat_penalty."""
|
||||
|
||||
use_mmap: Optional[bool] = True
|
||||
"""Whether to keep the model loaded in RAM"""
|
||||
|
||||
streaming: bool = True
|
||||
"""Whether to stream the results, token by token."""
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that bigdl-llm is installed, family is supported"""
|
||||
|
||||
model_path = values["model_path"]
|
||||
model_param_names = [
|
||||
"lora_path",
|
||||
"lora_base",
|
||||
"n_ctx",
|
||||
"n_parts",
|
||||
"seed",
|
||||
"f16_kv",
|
||||
"logits_all",
|
||||
"vocab_only",
|
||||
"use_mlock",
|
||||
"n_threads",
|
||||
"n_batch",
|
||||
"use_mmap",
|
||||
"last_n_tokens_size",
|
||||
]
|
||||
model_params = {k: values[k] for k in model_param_names}
|
||||
# For backwards compatibility, only include if non-null.
|
||||
if values["n_gpu_layers"] is not None:
|
||||
model_params["n_gpu_layers"] = values["n_gpu_layers"]
|
||||
|
||||
model_family = values["model_family"].lower()
|
||||
if model_family not in list(values["family_info"].keys()):
|
||||
raise ValueError("Model family '%s' is not supported. Valid" \
|
||||
" values are %s"%(values["model_family"],
|
||||
','.join(list(values["family_info"].keys()))))
|
||||
|
||||
try:
|
||||
|
||||
b_info = values["family_info"][model_family]
|
||||
module = importlib.import_module(b_info['module'])
|
||||
class_ = getattr(module, b_info['class'])
|
||||
|
||||
values["client"] = class_(model_path, **model_params)
|
||||
|
||||
except ImportError:
|
||||
raise ModuleNotFoundError(
|
||||
"Could not import llama-cpp-python library. "
|
||||
"Please install the llama-cpp-python library to "
|
||||
"use this embedding model: pip install llama-cpp-python"
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Could not load model from path: {model_path}. "
|
||||
f"Please make sure the model family {model_family} matches "
|
||||
"the model you want to load."
|
||||
f"Received error {e}"
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
@property
|
||||
def _default_params(self) -> Dict[str, Any]:
|
||||
"""Get the default parameters for calling llama_cpp."""
|
||||
return {
|
||||
"suffix": self.suffix,
|
||||
"max_tokens": self.max_tokens,
|
||||
"temperature": self.temperature,
|
||||
"top_p": self.top_p,
|
||||
"logprobs": self.logprobs,
|
||||
"echo": self.echo,
|
||||
"stop_sequences": self.stop, # key here is convention among LLM classes
|
||||
"repeat_penalty": self.repeat_penalty,
|
||||
"top_k": self.top_k,
|
||||
}
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Dict[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
return {**{"model_path": self.model_path,
|
||||
"model_family": self.model_family},
|
||||
**self._default_params}
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
"""Return type of llm."""
|
||||
return "BigDL"
|
||||
|
||||
def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Performs sanity check, preparing parameters in format needed by llama_cpp.
|
||||
|
||||
Args:
|
||||
stop (Optional[List[str]]): List of stop sequences for llama_cpp.
|
||||
|
||||
Returns:
|
||||
Dictionary containing the combined parameters.
|
||||
"""
|
||||
|
||||
# Raise error if stop sequences are in both input and default params
|
||||
if self.stop and stop is not None:
|
||||
raise ValueError("`stop` found in both the input and default params.")
|
||||
|
||||
params = self._default_params
|
||||
|
||||
# llama_cpp expects the "stop" key not this, so we remove it:
|
||||
params.pop("stop_sequences")
|
||||
|
||||
# then sets it as configured, or default to an empty list:
|
||||
params["stop"] = self.stop or stop or []
|
||||
|
||||
return params
|
||||
|
||||
def _call(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
) -> str:
|
||||
"""Call the Llama model and return the output.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to use for generation.
|
||||
stop: A list of strings to stop generation when encountered.
|
||||
|
||||
Returns:
|
||||
The generated text.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import BigdlLLM
|
||||
llm = BigdlLLM(model_path="/path/to/local/llama/model.bin")
|
||||
llm("This is a prompt.")
|
||||
"""
|
||||
if self.streaming:
|
||||
# If streaming is enabled, we use the stream
|
||||
# method that yields as they are generated
|
||||
# and return the combined strings from the first choices's text:
|
||||
combined_text_output = ""
|
||||
for token in self.stream(prompt=prompt, stop=stop, run_manager=run_manager):
|
||||
combined_text_output += token["choices"][0]["text"]
|
||||
return combined_text_output
|
||||
else:
|
||||
params = self._get_parameters(stop)
|
||||
result = self.client(prompt=prompt, **params)
|
||||
return result["choices"][0]["text"]
|
||||
|
||||
def stream(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
) -> Generator[Dict, None, None]:
|
||||
"""Yields results objects as they are generated in real time.
|
||||
|
||||
BETA: this is a beta feature while we figure out the right abstraction.
|
||||
Once that happens, this interface could change.
|
||||
|
||||
It also calls the callback manager's on_llm_new_token event with
|
||||
similar parameters to the OpenAI LLM class method of the same name.
|
||||
|
||||
Args:
|
||||
prompt: The prompts to pass into the model.
|
||||
stop: Optional list of stop words to use when generating.
|
||||
|
||||
Returns:
|
||||
A generator representing the stream of tokens being generated.
|
||||
|
||||
Yields:
|
||||
A dictionary like objects containing a string token and metadata.
|
||||
See llama-cpp-python docs and below for more.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import BigdlLLM
|
||||
llm = BigdlLLM(
|
||||
model_path="/path/to/local/model.bin",
|
||||
temperature = 0.5
|
||||
)
|
||||
for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
|
||||
stop=["'","\n"]):
|
||||
result = chunk["choices"][0]
|
||||
print(result["text"], end='', flush=True)
|
||||
|
||||
"""
|
||||
params = self._get_parameters(stop)
|
||||
result = self.client(prompt=prompt, stream=True, **params)
|
||||
for chunk in result:
|
||||
token = chunk["choices"][0]["text"]
|
||||
log_probs = chunk["choices"][0].get("logprobs", None)
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(
|
||||
token=token, verbose=self.verbose, log_probs=log_probs
|
||||
)
|
||||
yield chunk
|
||||
|
||||
def get_num_tokens(self, text: str) -> int:
|
||||
tokenized_text = self.client.tokenize(text.encode("utf-8"))
|
||||
return len(tokenized_text)
|
||||
Loading…
Reference in a new issue