LLM: refactor transformers and langchain class name (#8470)
This commit is contained in:
parent
70bc8ea8ae
commit
14626fe05b
11 changed files with 41 additions and 41 deletions
|
|
@ -55,7 +55,7 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
|
|||
```bash
|
||||
#convert PyTorch (fp16 or fp32) model;
|
||||
#llama/bloom/gptneox/starcoder model family is currently supported
|
||||
lm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
|
||||
llm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
|
||||
|
||||
#convert GPTQ-4bit model
|
||||
#only llama model family is currently supported
|
||||
|
|
@ -102,7 +102,7 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
|
|||
|
||||
See the complete example [here](example/transformers/transformers_int4_pipeline.py).
|
||||
|
||||
- ##### Using native INT4 format
|
||||
- ##### Using native INT4 format
|
||||
|
||||
You may also convert Hugging Face *Transformers* models into native INT4 format for maximum performance as follows.
|
||||
|
||||
|
|
@ -115,8 +115,8 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
|
|||
outfile='/path/to/output/', outtype='int4', model_family="llama")
|
||||
|
||||
#load the converted model
|
||||
from bigdl.llm.transformers import BigdlForCausalLM
|
||||
llm = BigdlForCausalLM.from_pretrained("/path/to/output/model.bin",...)
|
||||
from bigdl.llm.transformers import BigdlNativeForCausalLM
|
||||
llm = BigdlNativeForCausalLM.from_pretrained("/path/to/output/model.bin",...)
|
||||
|
||||
#run the converted model
|
||||
input_ids = llm.tokenize(prompt)
|
||||
|
|
@ -130,13 +130,13 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
|
|||
You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
|
||||
|
||||
```python
|
||||
from bigdl.llm.langchain.llms import BigdlLLM
|
||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
|
||||
embeddings = BigdlLLMEmbeddings(model_path='/path/to/converted/model.bin',
|
||||
embeddings = BigdlNativeEmbeddings(model_path='/path/to/converted/model.bin',
|
||||
model_family="llama",...)
|
||||
bigdl_llm = BigdlLLM(model_path='/path/to/converted/model.bin',
|
||||
bigdl_llm = BigdlNativeLLM(model_path='/path/to/converted/model.bin',
|
||||
model_family="llama",...)
|
||||
|
||||
doc_chain = load_qa_chain(bigdl_llm, ...)
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ from langchain.chains.question_answering import load_qa_chain
|
|||
from langchain.callbacks.manager import CallbackManager
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
|
||||
from bigdl.llm.langchain.llms import BigdlLLM
|
||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||
|
||||
|
||||
|
||||
|
|
@ -53,13 +53,13 @@ def main(args):
|
|||
texts = text_splitter.split_text(input_doc)
|
||||
|
||||
# create embeddings and store into vectordb
|
||||
embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
|
||||
embeddings = BigdlNativeEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
|
||||
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
|
||||
|
||||
#get relavant texts
|
||||
docs = docsearch.get_relevant_documents(query)
|
||||
|
||||
bigdl_llm = BigdlLLM(
|
||||
bigdl_llm = BigdlNativeLLM(
|
||||
model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from bigdl.llm.langchain.llms import BigdlLLM
|
||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||
from langchain import PromptTemplate, LLMChain
|
||||
from langchain.callbacks.manager import CallbackManager
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
|
|
@ -42,7 +42,7 @@ def main(args):
|
|||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
||||
|
||||
# Verbose is required to pass to the callback manager
|
||||
llm = BigdlLLM(
|
||||
llm = BigdlNativeLLM(
|
||||
model_path=model_path,
|
||||
model_family=model_family,
|
||||
n_threads=n_threads,
|
||||
|
|
|
|||
|
|
@ -31,8 +31,8 @@ def convert(repo_id_or_model_path, model_family, tmp_path):
|
|||
return bigdl_llm_path
|
||||
|
||||
def load(model_path, model_family, n_threads):
|
||||
from bigdl.llm.transformers import BigdlForCausalLM
|
||||
llm = BigdlForCausalLM.from_pretrained(
|
||||
from bigdl.llm.transformers import BigdlNativeForCausalLM
|
||||
llm = BigdlNativeForCausalLM.from_pretrained(
|
||||
pretrained_model_name_or_path=model_path,
|
||||
model_family=model_family,
|
||||
n_threads=n_threads)
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@
|
|||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
from .bigdlllm import BigdlLLMEmbeddings
|
||||
from .bigdlllm import BigdlNativeEmbeddings
|
||||
|
||||
__all__ = [
|
||||
"BigdlLLMEmbeddings",
|
||||
"BigdlNativeEmbeddings",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
"""Wrapper around BigdlLLM embedding models."""
|
||||
"""Wrapper around BigdlNative embedding models."""
|
||||
import importlib
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
|
@ -53,14 +53,14 @@ from pydantic import BaseModel, Extra, Field, root_validator
|
|||
from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class BigdlLLMEmbeddings(BaseModel, Embeddings):
|
||||
class BigdlNativeEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around bigdl-llm embedding models.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
||||
llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin")
|
||||
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||
llama = BigdlNativeEmbeddings(model_path="/path/to/model.bin")
|
||||
"""
|
||||
|
||||
model_family: str = "llama"
|
||||
|
|
|
|||
|
|
@ -23,12 +23,12 @@
|
|||
from typing import Dict, Type
|
||||
from langchain.llms.base import BaseLLM
|
||||
|
||||
from .bigdlllm import BigdlLLM
|
||||
from .bigdlllm import BigdlNativeLLM
|
||||
|
||||
__all__ = [
|
||||
"BigdlLLM",
|
||||
"BigdlNativeLLM",
|
||||
]
|
||||
|
||||
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||
"BigdlLLM": BigdlLLM,
|
||||
"BigdlNativeLLM": BigdlNativeLLM,
|
||||
}
|
||||
|
|
@ -54,14 +54,14 @@ from langchain.llms.base import LLM
|
|||
|
||||
|
||||
|
||||
class BigdlLLM(LLM):
|
||||
class BigdlNativeLLM(LLM):
|
||||
"""Wrapper around the BigDL-LLM
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import BigdlLLM
|
||||
llm = BigdlLLM(model_path="/path/to/llama/model")
|
||||
from langchain.llms import BigdlNativeLLM
|
||||
llm = BigdlNativeLLM(model_path="/path/to/llama/model")
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -281,8 +281,8 @@ class BigdlLLM(LLM):
|
|||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import BigdlLLM
|
||||
llm = BigdlLLM(model_path="/path/to/local/llama/model.bin")
|
||||
from langchain.llms import BigdlNativeLLM
|
||||
llm = BigdlNativeLLM(model_path="/path/to/local/llama/model.bin")
|
||||
llm("This is a prompt.")
|
||||
"""
|
||||
if self.streaming:
|
||||
|
|
@ -326,8 +326,8 @@ class BigdlLLM(LLM):
|
|||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import BigdlLLM
|
||||
llm = BigdlLLM(
|
||||
from langchain.llms import BigdlNativeLLM
|
||||
llm = BigdlNativeLLM(
|
||||
model_path="/path/to/local/model.bin",
|
||||
temperature = 0.5
|
||||
)
|
||||
|
|
|
|||
|
|
@ -16,4 +16,4 @@
|
|||
|
||||
from .convert import ggml_convert_int4
|
||||
from .model import AutoModelForCausalLM, AutoModel
|
||||
from .modelling_bigdl import BigdlForCausalLM
|
||||
from .modelling_bigdl import BigdlNativeForCausalLM
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@
|
|||
from bigdl.llm.utils.common import invalidInputError
|
||||
|
||||
|
||||
class BigdlForCausalLM:
|
||||
class BigdlNativeForCausalLM:
|
||||
"""
|
||||
A generic model class that mimics the behavior of
|
||||
``transformers.LlamaForCausalLM.from_pretrained`` API
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@
|
|||
#
|
||||
|
||||
from bigdl.llm.utils import get_avx_flags
|
||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
||||
from bigdl.llm.langchain.llms import BigdlLLM
|
||||
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||
import pytest
|
||||
from unittest import TestCase
|
||||
import os
|
||||
|
|
@ -35,7 +35,7 @@ class Test_Models_Basics(TestCase):
|
|||
|
||||
|
||||
def test_langchain_llm_embedding_llama(self):
|
||||
bigdl_embeddings = BigdlLLMEmbeddings(
|
||||
bigdl_embeddings = BigdlNativeEmbeddings(
|
||||
model_path=self.llama_model_path,
|
||||
model_family="llama")
|
||||
text = "This is a test document."
|
||||
|
|
@ -43,7 +43,7 @@ class Test_Models_Basics(TestCase):
|
|||
doc_result = bigdl_embeddings.embed_documents([text])
|
||||
|
||||
def test_langchain_llm_embedding_gptneox(self):
|
||||
bigdl_embeddings = BigdlLLMEmbeddings(
|
||||
bigdl_embeddings = BigdlNativeEmbeddings(
|
||||
model_path=self.gptneox_model_path,
|
||||
model_family="gptneox")
|
||||
text = "This is a test document."
|
||||
|
|
@ -51,7 +51,7 @@ class Test_Models_Basics(TestCase):
|
|||
doc_result = bigdl_embeddings.embed_documents([text])
|
||||
|
||||
def test_langchain_llm_llama(self):
|
||||
llm = BigdlLLM(
|
||||
llm = BigdlNativeLLM(
|
||||
model_path=self.llama_model_path,
|
||||
max_tokens=32,
|
||||
n_threads=self.n_threads)
|
||||
|
|
@ -59,7 +59,7 @@ class Test_Models_Basics(TestCase):
|
|||
result = llm(question)
|
||||
|
||||
def test_langchain_llm_gptneox(self):
|
||||
llm = BigdlLLM(
|
||||
llm = BigdlNativeLLM(
|
||||
model_path=self.gptneox_model_path,
|
||||
model_family="gptneox",
|
||||
max_tokens=32,
|
||||
|
|
@ -68,7 +68,7 @@ class Test_Models_Basics(TestCase):
|
|||
result = llm(question)
|
||||
|
||||
def test_langchain_llm_bloom(self):
|
||||
llm = BigdlLLM(
|
||||
llm = BigdlNativeLLM(
|
||||
model_path=self.bloom_model_path,
|
||||
model_family="bloom",
|
||||
max_tokens=32,
|
||||
|
|
|
|||
Loading…
Reference in a new issue