LLM: refactor transformers and langchain class name (#8470)

This commit is contained in:
binbin Deng 2023-07-06 17:16:44 +08:00 committed by GitHub
parent 70bc8ea8ae
commit 14626fe05b
11 changed files with 41 additions and 41 deletions

View file

@ -55,7 +55,7 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
```bash ```bash
#convert PyTorch (fp16 or fp32) model; #convert PyTorch (fp16 or fp32) model;
#llama/bloom/gptneox/starcoder model family is currently supported #llama/bloom/gptneox/starcoder model family is currently supported
lm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/" llm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
#convert GPTQ-4bit model #convert GPTQ-4bit model
#only llama model family is currently supported #only llama model family is currently supported
@ -115,8 +115,8 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
outfile='/path/to/output/', outtype='int4', model_family="llama") outfile='/path/to/output/', outtype='int4', model_family="llama")
#load the converted model #load the converted model
from bigdl.llm.transformers import BigdlForCausalLM from bigdl.llm.transformers import BigdlNativeForCausalLM
llm = BigdlForCausalLM.from_pretrained("/path/to/output/model.bin",...) llm = BigdlNativeForCausalLM.from_pretrained("/path/to/output/model.bin",...)
#run the converted model #run the converted model
input_ids = llm.tokenize(prompt) input_ids = llm.tokenize(prompt)
@ -130,13 +130,13 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows. You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
```python ```python
from bigdl.llm.langchain.llms import BigdlLLM from bigdl.llm.langchain.llms import BigdlNativeLLM
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
from langchain.chains.question_answering import load_qa_chain from langchain.chains.question_answering import load_qa_chain
embeddings = BigdlLLMEmbeddings(model_path='/path/to/converted/model.bin', embeddings = BigdlNativeEmbeddings(model_path='/path/to/converted/model.bin',
model_family="llama",...) model_family="llama",...)
bigdl_llm = BigdlLLM(model_path='/path/to/converted/model.bin', bigdl_llm = BigdlNativeLLM(model_path='/path/to/converted/model.bin',
model_family="llama",...) model_family="llama",...)
doc_chain = load_qa_chain(bigdl_llm, ...) doc_chain = load_qa_chain(bigdl_llm, ...)

View file

@ -29,8 +29,8 @@ from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks.manager import CallbackManager from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from bigdl.llm.langchain.llms import BigdlLLM from bigdl.llm.langchain.llms import BigdlNativeLLM
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
@ -53,13 +53,13 @@ def main(args):
texts = text_splitter.split_text(input_doc) texts = text_splitter.split_text(input_doc)
# create embeddings and store into vectordb # create embeddings and store into vectordb
embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx) embeddings = BigdlNativeEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever() docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
#get relavant texts #get relavant texts
docs = docsearch.get_relevant_documents(query) docs = docsearch.get_relevant_documents(query)
bigdl_llm = BigdlLLM( bigdl_llm = BigdlNativeLLM(
model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
) )

View file

@ -21,7 +21,7 @@
import argparse import argparse
from bigdl.llm.langchain.llms import BigdlLLM from bigdl.llm.langchain.llms import BigdlNativeLLM
from langchain import PromptTemplate, LLMChain from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
@ -42,7 +42,7 @@ def main(args):
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Verbose is required to pass to the callback manager # Verbose is required to pass to the callback manager
llm = BigdlLLM( llm = BigdlNativeLLM(
model_path=model_path, model_path=model_path,
model_family=model_family, model_family=model_family,
n_threads=n_threads, n_threads=n_threads,

View file

@ -31,8 +31,8 @@ def convert(repo_id_or_model_path, model_family, tmp_path):
return bigdl_llm_path return bigdl_llm_path
def load(model_path, model_family, n_threads): def load(model_path, model_family, n_threads):
from bigdl.llm.transformers import BigdlForCausalLM from bigdl.llm.transformers import BigdlNativeForCausalLM
llm = BigdlForCausalLM.from_pretrained( llm = BigdlNativeForCausalLM.from_pretrained(
pretrained_model_name_or_path=model_path, pretrained_model_name_or_path=model_path,
model_family=model_family, model_family=model_family,
n_threads=n_threads) n_threads=n_threads)

View file

@ -19,8 +19,8 @@
# Otherwise there would be module not found error in non-pip's setting as Python would # Otherwise there would be module not found error in non-pip's setting as Python would
# only search the first bigdl package and end up finding only one sub-package. # only search the first bigdl package and end up finding only one sub-package.
from .bigdlllm import BigdlLLMEmbeddings from .bigdlllm import BigdlNativeEmbeddings
__all__ = [ __all__ = [
"BigdlLLMEmbeddings", "BigdlNativeEmbeddings",
] ]

View file

@ -44,7 +44,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
"""Wrapper around BigdlLLM embedding models.""" """Wrapper around BigdlNative embedding models."""
import importlib import importlib
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@ -53,14 +53,14 @@ from pydantic import BaseModel, Extra, Field, root_validator
from langchain.embeddings.base import Embeddings from langchain.embeddings.base import Embeddings
class BigdlLLMEmbeddings(BaseModel, Embeddings): class BigdlNativeEmbeddings(BaseModel, Embeddings):
"""Wrapper around bigdl-llm embedding models. """Wrapper around bigdl-llm embedding models.
Example: Example:
.. code-block:: python .. code-block:: python
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin") llama = BigdlNativeEmbeddings(model_path="/path/to/model.bin")
""" """
model_family: str = "llama" model_family: str = "llama"

View file

@ -23,12 +23,12 @@
from typing import Dict, Type from typing import Dict, Type
from langchain.llms.base import BaseLLM from langchain.llms.base import BaseLLM
from .bigdlllm import BigdlLLM from .bigdlllm import BigdlNativeLLM
__all__ = [ __all__ = [
"BigdlLLM", "BigdlNativeLLM",
] ]
type_to_cls_dict: Dict[str, Type[BaseLLM]] = { type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
"BigdlLLM": BigdlLLM, "BigdlNativeLLM": BigdlNativeLLM,
} }

View file

@ -54,14 +54,14 @@ from langchain.llms.base import LLM
class BigdlLLM(LLM): class BigdlNativeLLM(LLM):
"""Wrapper around the BigDL-LLM """Wrapper around the BigDL-LLM
Example: Example:
.. code-block:: python .. code-block:: python
from langchain.llms import BigdlLLM from langchain.llms import BigdlNativeLLM
llm = BigdlLLM(model_path="/path/to/llama/model") llm = BigdlNativeLLM(model_path="/path/to/llama/model")
""" """
@ -281,8 +281,8 @@ class BigdlLLM(LLM):
Example: Example:
.. code-block:: python .. code-block:: python
from langchain.llms import BigdlLLM from langchain.llms import BigdlNativeLLM
llm = BigdlLLM(model_path="/path/to/local/llama/model.bin") llm = BigdlNativeLLM(model_path="/path/to/local/llama/model.bin")
llm("This is a prompt.") llm("This is a prompt.")
""" """
if self.streaming: if self.streaming:
@ -326,8 +326,8 @@ class BigdlLLM(LLM):
Example: Example:
.. code-block:: python .. code-block:: python
from langchain.llms import BigdlLLM from langchain.llms import BigdlNativeLLM
llm = BigdlLLM( llm = BigdlNativeLLM(
model_path="/path/to/local/model.bin", model_path="/path/to/local/model.bin",
temperature = 0.5 temperature = 0.5
) )

View file

@ -16,4 +16,4 @@
from .convert import ggml_convert_int4 from .convert import ggml_convert_int4
from .model import AutoModelForCausalLM, AutoModel from .model import AutoModelForCausalLM, AutoModel
from .modelling_bigdl import BigdlForCausalLM from .modelling_bigdl import BigdlNativeForCausalLM

View file

@ -22,7 +22,7 @@
from bigdl.llm.utils.common import invalidInputError from bigdl.llm.utils.common import invalidInputError
class BigdlForCausalLM: class BigdlNativeForCausalLM:
""" """
A generic model class that mimics the behavior of A generic model class that mimics the behavior of
``transformers.LlamaForCausalLM.from_pretrained`` API ``transformers.LlamaForCausalLM.from_pretrained`` API

View file

@ -15,8 +15,8 @@
# #
from bigdl.llm.utils import get_avx_flags from bigdl.llm.utils import get_avx_flags
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
from bigdl.llm.langchain.llms import BigdlLLM from bigdl.llm.langchain.llms import BigdlNativeLLM
import pytest import pytest
from unittest import TestCase from unittest import TestCase
import os import os
@ -35,7 +35,7 @@ class Test_Models_Basics(TestCase):
def test_langchain_llm_embedding_llama(self): def test_langchain_llm_embedding_llama(self):
bigdl_embeddings = BigdlLLMEmbeddings( bigdl_embeddings = BigdlNativeEmbeddings(
model_path=self.llama_model_path, model_path=self.llama_model_path,
model_family="llama") model_family="llama")
text = "This is a test document." text = "This is a test document."
@ -43,7 +43,7 @@ class Test_Models_Basics(TestCase):
doc_result = bigdl_embeddings.embed_documents([text]) doc_result = bigdl_embeddings.embed_documents([text])
def test_langchain_llm_embedding_gptneox(self): def test_langchain_llm_embedding_gptneox(self):
bigdl_embeddings = BigdlLLMEmbeddings( bigdl_embeddings = BigdlNativeEmbeddings(
model_path=self.gptneox_model_path, model_path=self.gptneox_model_path,
model_family="gptneox") model_family="gptneox")
text = "This is a test document." text = "This is a test document."
@ -51,7 +51,7 @@ class Test_Models_Basics(TestCase):
doc_result = bigdl_embeddings.embed_documents([text]) doc_result = bigdl_embeddings.embed_documents([text])
def test_langchain_llm_llama(self): def test_langchain_llm_llama(self):
llm = BigdlLLM( llm = BigdlNativeLLM(
model_path=self.llama_model_path, model_path=self.llama_model_path,
max_tokens=32, max_tokens=32,
n_threads=self.n_threads) n_threads=self.n_threads)
@ -59,7 +59,7 @@ class Test_Models_Basics(TestCase):
result = llm(question) result = llm(question)
def test_langchain_llm_gptneox(self): def test_langchain_llm_gptneox(self):
llm = BigdlLLM( llm = BigdlNativeLLM(
model_path=self.gptneox_model_path, model_path=self.gptneox_model_path,
model_family="gptneox", model_family="gptneox",
max_tokens=32, max_tokens=32,
@ -68,7 +68,7 @@ class Test_Models_Basics(TestCase):
result = llm(question) result = llm(question)
def test_langchain_llm_bloom(self): def test_langchain_llm_bloom(self):
llm = BigdlLLM( llm = BigdlNativeLLM(
model_path=self.bloom_model_path, model_path=self.bloom_model_path,
model_family="bloom", model_family="bloom",
max_tokens=32, max_tokens=32,