diff --git a/python/llm/README.md b/python/llm/README.md index 9e314278..192952c0 100644 --- a/python/llm/README.md +++ b/python/llm/README.md @@ -55,7 +55,7 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., * ```bash #convert PyTorch (fp16 or fp32) model; #llama/bloom/gptneox/starcoder model family is currently supported - lm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/" + llm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/" #convert GPTQ-4bit model #only llama model family is currently supported @@ -102,7 +102,7 @@ You may run the models using `transformers`-style API in `bigdl-llm`. See the complete example [here](example/transformers/transformers_int4_pipeline.py). - - ##### Using native INT4 format +- ##### Using native INT4 format You may also convert Hugging Face *Transformers* models into native INT4 format for maximum performance as follows. @@ -115,8 +115,8 @@ You may run the models using `transformers`-style API in `bigdl-llm`. outfile='/path/to/output/', outtype='int4', model_family="llama") #load the converted model - from bigdl.llm.transformers import BigdlForCausalLM - llm = BigdlForCausalLM.from_pretrained("/path/to/output/model.bin",...) + from bigdl.llm.transformers import BigdlNativeForCausalLM + llm = BigdlNativeForCausalLM.from_pretrained("/path/to/output/model.bin",...) #run the converted model input_ids = llm.tokenize(prompt) @@ -130,13 +130,13 @@ You may run the models using `transformers`-style API in `bigdl-llm`. You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows. ```python -from bigdl.llm.langchain.llms import BigdlLLM -from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings +from bigdl.llm.langchain.llms import BigdlNativeLLM +from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings from langchain.chains.question_answering import load_qa_chain -embeddings = BigdlLLMEmbeddings(model_path='/path/to/converted/model.bin', +embeddings = BigdlNativeEmbeddings(model_path='/path/to/converted/model.bin', model_family="llama",...) -bigdl_llm = BigdlLLM(model_path='/path/to/converted/model.bin', +bigdl_llm = BigdlNativeLLM(model_path='/path/to/converted/model.bin', model_family="llama",...) doc_chain = load_qa_chain(bigdl_llm, ...) diff --git a/python/llm/example/langchain/docqa.py b/python/llm/example/langchain/docqa.py index 7f508fb8..42d808bf 100644 --- a/python/llm/example/langchain/docqa.py +++ b/python/llm/example/langchain/docqa.py @@ -29,8 +29,8 @@ from langchain.chains.question_answering import load_qa_chain from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -from bigdl.llm.langchain.llms import BigdlLLM -from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings +from bigdl.llm.langchain.llms import BigdlNativeLLM +from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings @@ -53,13 +53,13 @@ def main(args): texts = text_splitter.split_text(input_doc) # create embeddings and store into vectordb - embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx) + embeddings = BigdlNativeEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx) docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever() #get relavant texts docs = docsearch.get_relevant_documents(query) - bigdl_llm = BigdlLLM( + bigdl_llm = BigdlNativeLLM( model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager ) diff --git a/python/llm/example/langchain/streamchat.py b/python/llm/example/langchain/streamchat.py index b070a642..f3b32e91 100644 --- a/python/llm/example/langchain/streamchat.py +++ b/python/llm/example/langchain/streamchat.py @@ -21,7 +21,7 @@ import argparse -from bigdl.llm.langchain.llms import BigdlLLM +from bigdl.llm.langchain.llms import BigdlNativeLLM from langchain import PromptTemplate, LLMChain from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler @@ -42,7 +42,7 @@ def main(args): callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) # Verbose is required to pass to the callback manager - llm = BigdlLLM( + llm = BigdlNativeLLM( model_path=model_path, model_family=model_family, n_threads=n_threads, diff --git a/python/llm/example/transformers/native_int4_pipeline.py b/python/llm/example/transformers/native_int4_pipeline.py index 1d0b5856..b39ccb13 100644 --- a/python/llm/example/transformers/native_int4_pipeline.py +++ b/python/llm/example/transformers/native_int4_pipeline.py @@ -31,8 +31,8 @@ def convert(repo_id_or_model_path, model_family, tmp_path): return bigdl_llm_path def load(model_path, model_family, n_threads): - from bigdl.llm.transformers import BigdlForCausalLM - llm = BigdlForCausalLM.from_pretrained( + from bigdl.llm.transformers import BigdlNativeForCausalLM + llm = BigdlNativeForCausalLM.from_pretrained( pretrained_model_name_or_path=model_path, model_family=model_family, n_threads=n_threads) diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py b/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py index 1b19ac26..f5c9fac3 100644 --- a/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py +++ b/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py @@ -19,8 +19,8 @@ # Otherwise there would be module not found error in non-pip's setting as Python would # only search the first bigdl package and end up finding only one sub-package. -from .bigdlllm import BigdlLLMEmbeddings +from .bigdlllm import BigdlNativeEmbeddings __all__ = [ - "BigdlLLMEmbeddings", + "BigdlNativeEmbeddings", ] diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py index 5049f9c8..94ed9d61 100644 --- a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py +++ b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py @@ -44,7 +44,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -"""Wrapper around BigdlLLM embedding models.""" +"""Wrapper around BigdlNative embedding models.""" import importlib from typing import Any, Dict, List, Optional @@ -53,14 +53,14 @@ from pydantic import BaseModel, Extra, Field, root_validator from langchain.embeddings.base import Embeddings -class BigdlLLMEmbeddings(BaseModel, Embeddings): +class BigdlNativeEmbeddings(BaseModel, Embeddings): """Wrapper around bigdl-llm embedding models. Example: .. code-block:: python - from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings - llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin") + from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings + llama = BigdlNativeEmbeddings(model_path="/path/to/model.bin") """ model_family: str = "llama" diff --git a/python/llm/src/bigdl/llm/langchain/llms/__init__.py b/python/llm/src/bigdl/llm/langchain/llms/__init__.py index dbc75f61..5ec5b38d 100644 --- a/python/llm/src/bigdl/llm/langchain/llms/__init__.py +++ b/python/llm/src/bigdl/llm/langchain/llms/__init__.py @@ -23,12 +23,12 @@ from typing import Dict, Type from langchain.llms.base import BaseLLM -from .bigdlllm import BigdlLLM +from .bigdlllm import BigdlNativeLLM __all__ = [ - "BigdlLLM", + "BigdlNativeLLM", ] type_to_cls_dict: Dict[str, Type[BaseLLM]] = { - "BigdlLLM": BigdlLLM, + "BigdlNativeLLM": BigdlNativeLLM, } \ No newline at end of file diff --git a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py index ecefde0d..ef1f6590 100644 --- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py +++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py @@ -54,14 +54,14 @@ from langchain.llms.base import LLM -class BigdlLLM(LLM): +class BigdlNativeLLM(LLM): """Wrapper around the BigDL-LLM Example: .. code-block:: python - from langchain.llms import BigdlLLM - llm = BigdlLLM(model_path="/path/to/llama/model") + from langchain.llms import BigdlNativeLLM + llm = BigdlNativeLLM(model_path="/path/to/llama/model") """ @@ -281,8 +281,8 @@ class BigdlLLM(LLM): Example: .. code-block:: python - from langchain.llms import BigdlLLM - llm = BigdlLLM(model_path="/path/to/local/llama/model.bin") + from langchain.llms import BigdlNativeLLM + llm = BigdlNativeLLM(model_path="/path/to/local/llama/model.bin") llm("This is a prompt.") """ if self.streaming: @@ -326,8 +326,8 @@ class BigdlLLM(LLM): Example: .. code-block:: python - from langchain.llms import BigdlLLM - llm = BigdlLLM( + from langchain.llms import BigdlNativeLLM + llm = BigdlNativeLLM( model_path="/path/to/local/model.bin", temperature = 0.5 ) diff --git a/python/llm/src/bigdl/llm/transformers/__init__.py b/python/llm/src/bigdl/llm/transformers/__init__.py index eaef6320..67bd2474 100644 --- a/python/llm/src/bigdl/llm/transformers/__init__.py +++ b/python/llm/src/bigdl/llm/transformers/__init__.py @@ -16,4 +16,4 @@ from .convert import ggml_convert_int4 from .model import AutoModelForCausalLM, AutoModel -from .modelling_bigdl import BigdlForCausalLM +from .modelling_bigdl import BigdlNativeForCausalLM diff --git a/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py b/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py index 2b067a3b..dc89b573 100644 --- a/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py +++ b/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py @@ -22,7 +22,7 @@ from bigdl.llm.utils.common import invalidInputError -class BigdlForCausalLM: +class BigdlNativeForCausalLM: """ A generic model class that mimics the behavior of ``transformers.LlamaForCausalLM.from_pretrained`` API diff --git a/python/llm/test/langchain/test_langchain.py b/python/llm/test/langchain/test_langchain.py index 1d221225..b333fe64 100644 --- a/python/llm/test/langchain/test_langchain.py +++ b/python/llm/test/langchain/test_langchain.py @@ -15,8 +15,8 @@ # from bigdl.llm.utils import get_avx_flags -from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings -from bigdl.llm.langchain.llms import BigdlLLM +from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings +from bigdl.llm.langchain.llms import BigdlNativeLLM import pytest from unittest import TestCase import os @@ -35,7 +35,7 @@ class Test_Models_Basics(TestCase): def test_langchain_llm_embedding_llama(self): - bigdl_embeddings = BigdlLLMEmbeddings( + bigdl_embeddings = BigdlNativeEmbeddings( model_path=self.llama_model_path, model_family="llama") text = "This is a test document." @@ -43,7 +43,7 @@ class Test_Models_Basics(TestCase): doc_result = bigdl_embeddings.embed_documents([text]) def test_langchain_llm_embedding_gptneox(self): - bigdl_embeddings = BigdlLLMEmbeddings( + bigdl_embeddings = BigdlNativeEmbeddings( model_path=self.gptneox_model_path, model_family="gptneox") text = "This is a test document." @@ -51,7 +51,7 @@ class Test_Models_Basics(TestCase): doc_result = bigdl_embeddings.embed_documents([text]) def test_langchain_llm_llama(self): - llm = BigdlLLM( + llm = BigdlNativeLLM( model_path=self.llama_model_path, max_tokens=32, n_threads=self.n_threads) @@ -59,7 +59,7 @@ class Test_Models_Basics(TestCase): result = llm(question) def test_langchain_llm_gptneox(self): - llm = BigdlLLM( + llm = BigdlNativeLLM( model_path=self.gptneox_model_path, model_family="gptneox", max_tokens=32, @@ -68,7 +68,7 @@ class Test_Models_Basics(TestCase): result = llm(question) def test_langchain_llm_bloom(self): - llm = BigdlLLM( + llm = BigdlNativeLLM( model_path=self.bloom_model_path, model_family="bloom", max_tokens=32,