LLM: refactor transformers and langchain class name (#8470)
This commit is contained in:
parent
70bc8ea8ae
commit
14626fe05b
11 changed files with 41 additions and 41 deletions
|
|
@ -55,7 +55,7 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
|
||||||
```bash
|
```bash
|
||||||
#convert PyTorch (fp16 or fp32) model;
|
#convert PyTorch (fp16 or fp32) model;
|
||||||
#llama/bloom/gptneox/starcoder model family is currently supported
|
#llama/bloom/gptneox/starcoder model family is currently supported
|
||||||
lm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
|
llm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
|
||||||
|
|
||||||
#convert GPTQ-4bit model
|
#convert GPTQ-4bit model
|
||||||
#only llama model family is currently supported
|
#only llama model family is currently supported
|
||||||
|
|
@ -115,8 +115,8 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
|
||||||
outfile='/path/to/output/', outtype='int4', model_family="llama")
|
outfile='/path/to/output/', outtype='int4', model_family="llama")
|
||||||
|
|
||||||
#load the converted model
|
#load the converted model
|
||||||
from bigdl.llm.transformers import BigdlForCausalLM
|
from bigdl.llm.transformers import BigdlNativeForCausalLM
|
||||||
llm = BigdlForCausalLM.from_pretrained("/path/to/output/model.bin",...)
|
llm = BigdlNativeForCausalLM.from_pretrained("/path/to/output/model.bin",...)
|
||||||
|
|
||||||
#run the converted model
|
#run the converted model
|
||||||
input_ids = llm.tokenize(prompt)
|
input_ids = llm.tokenize(prompt)
|
||||||
|
|
@ -130,13 +130,13 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
|
||||||
You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
|
You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from bigdl.llm.langchain.llms import BigdlLLM
|
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||||
from langchain.chains.question_answering import load_qa_chain
|
from langchain.chains.question_answering import load_qa_chain
|
||||||
|
|
||||||
embeddings = BigdlLLMEmbeddings(model_path='/path/to/converted/model.bin',
|
embeddings = BigdlNativeEmbeddings(model_path='/path/to/converted/model.bin',
|
||||||
model_family="llama",...)
|
model_family="llama",...)
|
||||||
bigdl_llm = BigdlLLM(model_path='/path/to/converted/model.bin',
|
bigdl_llm = BigdlNativeLLM(model_path='/path/to/converted/model.bin',
|
||||||
model_family="llama",...)
|
model_family="llama",...)
|
||||||
|
|
||||||
doc_chain = load_qa_chain(bigdl_llm, ...)
|
doc_chain = load_qa_chain(bigdl_llm, ...)
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,8 @@ from langchain.chains.question_answering import load_qa_chain
|
||||||
from langchain.callbacks.manager import CallbackManager
|
from langchain.callbacks.manager import CallbackManager
|
||||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||||
|
|
||||||
from bigdl.llm.langchain.llms import BigdlLLM
|
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -53,13 +53,13 @@ def main(args):
|
||||||
texts = text_splitter.split_text(input_doc)
|
texts = text_splitter.split_text(input_doc)
|
||||||
|
|
||||||
# create embeddings and store into vectordb
|
# create embeddings and store into vectordb
|
||||||
embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
|
embeddings = BigdlNativeEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
|
||||||
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
|
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
|
||||||
|
|
||||||
#get relavant texts
|
#get relavant texts
|
||||||
docs = docsearch.get_relevant_documents(query)
|
docs = docsearch.get_relevant_documents(query)
|
||||||
|
|
||||||
bigdl_llm = BigdlLLM(
|
bigdl_llm = BigdlNativeLLM(
|
||||||
model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
|
model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from bigdl.llm.langchain.llms import BigdlLLM
|
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||||
from langchain import PromptTemplate, LLMChain
|
from langchain import PromptTemplate, LLMChain
|
||||||
from langchain.callbacks.manager import CallbackManager
|
from langchain.callbacks.manager import CallbackManager
|
||||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||||
|
|
@ -42,7 +42,7 @@ def main(args):
|
||||||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
||||||
|
|
||||||
# Verbose is required to pass to the callback manager
|
# Verbose is required to pass to the callback manager
|
||||||
llm = BigdlLLM(
|
llm = BigdlNativeLLM(
|
||||||
model_path=model_path,
|
model_path=model_path,
|
||||||
model_family=model_family,
|
model_family=model_family,
|
||||||
n_threads=n_threads,
|
n_threads=n_threads,
|
||||||
|
|
|
||||||
|
|
@ -31,8 +31,8 @@ def convert(repo_id_or_model_path, model_family, tmp_path):
|
||||||
return bigdl_llm_path
|
return bigdl_llm_path
|
||||||
|
|
||||||
def load(model_path, model_family, n_threads):
|
def load(model_path, model_family, n_threads):
|
||||||
from bigdl.llm.transformers import BigdlForCausalLM
|
from bigdl.llm.transformers import BigdlNativeForCausalLM
|
||||||
llm = BigdlForCausalLM.from_pretrained(
|
llm = BigdlNativeForCausalLM.from_pretrained(
|
||||||
pretrained_model_name_or_path=model_path,
|
pretrained_model_name_or_path=model_path,
|
||||||
model_family=model_family,
|
model_family=model_family,
|
||||||
n_threads=n_threads)
|
n_threads=n_threads)
|
||||||
|
|
|
||||||
|
|
@ -19,8 +19,8 @@
|
||||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||||
# only search the first bigdl package and end up finding only one sub-package.
|
# only search the first bigdl package and end up finding only one sub-package.
|
||||||
|
|
||||||
from .bigdlllm import BigdlLLMEmbeddings
|
from .bigdlllm import BigdlNativeEmbeddings
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"BigdlLLMEmbeddings",
|
"BigdlNativeEmbeddings",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
# THE SOFTWARE.
|
# THE SOFTWARE.
|
||||||
|
|
||||||
"""Wrapper around BigdlLLM embedding models."""
|
"""Wrapper around BigdlNative embedding models."""
|
||||||
import importlib
|
import importlib
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
@ -53,14 +53,14 @@ from pydantic import BaseModel, Extra, Field, root_validator
|
||||||
from langchain.embeddings.base import Embeddings
|
from langchain.embeddings.base import Embeddings
|
||||||
|
|
||||||
|
|
||||||
class BigdlLLMEmbeddings(BaseModel, Embeddings):
|
class BigdlNativeEmbeddings(BaseModel, Embeddings):
|
||||||
"""Wrapper around bigdl-llm embedding models.
|
"""Wrapper around bigdl-llm embedding models.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||||
llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin")
|
llama = BigdlNativeEmbeddings(model_path="/path/to/model.bin")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_family: str = "llama"
|
model_family: str = "llama"
|
||||||
|
|
|
||||||
|
|
@ -23,12 +23,12 @@
|
||||||
from typing import Dict, Type
|
from typing import Dict, Type
|
||||||
from langchain.llms.base import BaseLLM
|
from langchain.llms.base import BaseLLM
|
||||||
|
|
||||||
from .bigdlllm import BigdlLLM
|
from .bigdlllm import BigdlNativeLLM
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"BigdlLLM",
|
"BigdlNativeLLM",
|
||||||
]
|
]
|
||||||
|
|
||||||
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||||
"BigdlLLM": BigdlLLM,
|
"BigdlNativeLLM": BigdlNativeLLM,
|
||||||
}
|
}
|
||||||
|
|
@ -54,14 +54,14 @@ from langchain.llms.base import LLM
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BigdlLLM(LLM):
|
class BigdlNativeLLM(LLM):
|
||||||
"""Wrapper around the BigDL-LLM
|
"""Wrapper around the BigDL-LLM
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from langchain.llms import BigdlLLM
|
from langchain.llms import BigdlNativeLLM
|
||||||
llm = BigdlLLM(model_path="/path/to/llama/model")
|
llm = BigdlNativeLLM(model_path="/path/to/llama/model")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -281,8 +281,8 @@ class BigdlLLM(LLM):
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from langchain.llms import BigdlLLM
|
from langchain.llms import BigdlNativeLLM
|
||||||
llm = BigdlLLM(model_path="/path/to/local/llama/model.bin")
|
llm = BigdlNativeLLM(model_path="/path/to/local/llama/model.bin")
|
||||||
llm("This is a prompt.")
|
llm("This is a prompt.")
|
||||||
"""
|
"""
|
||||||
if self.streaming:
|
if self.streaming:
|
||||||
|
|
@ -326,8 +326,8 @@ class BigdlLLM(LLM):
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from langchain.llms import BigdlLLM
|
from langchain.llms import BigdlNativeLLM
|
||||||
llm = BigdlLLM(
|
llm = BigdlNativeLLM(
|
||||||
model_path="/path/to/local/model.bin",
|
model_path="/path/to/local/model.bin",
|
||||||
temperature = 0.5
|
temperature = 0.5
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -16,4 +16,4 @@
|
||||||
|
|
||||||
from .convert import ggml_convert_int4
|
from .convert import ggml_convert_int4
|
||||||
from .model import AutoModelForCausalLM, AutoModel
|
from .model import AutoModelForCausalLM, AutoModel
|
||||||
from .modelling_bigdl import BigdlForCausalLM
|
from .modelling_bigdl import BigdlNativeForCausalLM
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@
|
||||||
from bigdl.llm.utils.common import invalidInputError
|
from bigdl.llm.utils.common import invalidInputError
|
||||||
|
|
||||||
|
|
||||||
class BigdlForCausalLM:
|
class BigdlNativeForCausalLM:
|
||||||
"""
|
"""
|
||||||
A generic model class that mimics the behavior of
|
A generic model class that mimics the behavior of
|
||||||
``transformers.LlamaForCausalLM.from_pretrained`` API
|
``transformers.LlamaForCausalLM.from_pretrained`` API
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,8 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
from bigdl.llm.utils import get_avx_flags
|
from bigdl.llm.utils import get_avx_flags
|
||||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
|
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
|
||||||
from bigdl.llm.langchain.llms import BigdlLLM
|
from bigdl.llm.langchain.llms import BigdlNativeLLM
|
||||||
import pytest
|
import pytest
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
import os
|
import os
|
||||||
|
|
@ -35,7 +35,7 @@ class Test_Models_Basics(TestCase):
|
||||||
|
|
||||||
|
|
||||||
def test_langchain_llm_embedding_llama(self):
|
def test_langchain_llm_embedding_llama(self):
|
||||||
bigdl_embeddings = BigdlLLMEmbeddings(
|
bigdl_embeddings = BigdlNativeEmbeddings(
|
||||||
model_path=self.llama_model_path,
|
model_path=self.llama_model_path,
|
||||||
model_family="llama")
|
model_family="llama")
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
|
@ -43,7 +43,7 @@ class Test_Models_Basics(TestCase):
|
||||||
doc_result = bigdl_embeddings.embed_documents([text])
|
doc_result = bigdl_embeddings.embed_documents([text])
|
||||||
|
|
||||||
def test_langchain_llm_embedding_gptneox(self):
|
def test_langchain_llm_embedding_gptneox(self):
|
||||||
bigdl_embeddings = BigdlLLMEmbeddings(
|
bigdl_embeddings = BigdlNativeEmbeddings(
|
||||||
model_path=self.gptneox_model_path,
|
model_path=self.gptneox_model_path,
|
||||||
model_family="gptneox")
|
model_family="gptneox")
|
||||||
text = "This is a test document."
|
text = "This is a test document."
|
||||||
|
|
@ -51,7 +51,7 @@ class Test_Models_Basics(TestCase):
|
||||||
doc_result = bigdl_embeddings.embed_documents([text])
|
doc_result = bigdl_embeddings.embed_documents([text])
|
||||||
|
|
||||||
def test_langchain_llm_llama(self):
|
def test_langchain_llm_llama(self):
|
||||||
llm = BigdlLLM(
|
llm = BigdlNativeLLM(
|
||||||
model_path=self.llama_model_path,
|
model_path=self.llama_model_path,
|
||||||
max_tokens=32,
|
max_tokens=32,
|
||||||
n_threads=self.n_threads)
|
n_threads=self.n_threads)
|
||||||
|
|
@ -59,7 +59,7 @@ class Test_Models_Basics(TestCase):
|
||||||
result = llm(question)
|
result = llm(question)
|
||||||
|
|
||||||
def test_langchain_llm_gptneox(self):
|
def test_langchain_llm_gptneox(self):
|
||||||
llm = BigdlLLM(
|
llm = BigdlNativeLLM(
|
||||||
model_path=self.gptneox_model_path,
|
model_path=self.gptneox_model_path,
|
||||||
model_family="gptneox",
|
model_family="gptneox",
|
||||||
max_tokens=32,
|
max_tokens=32,
|
||||||
|
|
@ -68,7 +68,7 @@ class Test_Models_Basics(TestCase):
|
||||||
result = llm(question)
|
result = llm(question)
|
||||||
|
|
||||||
def test_langchain_llm_bloom(self):
|
def test_langchain_llm_bloom(self):
|
||||||
llm = BigdlLLM(
|
llm = BigdlNativeLLM(
|
||||||
model_path=self.bloom_model_path,
|
model_path=self.bloom_model_path,
|
||||||
model_family="bloom",
|
model_family="bloom",
|
||||||
max_tokens=32,
|
max_tokens=32,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue