LLM: refactor transformers and langchain class name (#8470)
This commit is contained in:
		
							parent
							
								
									70bc8ea8ae
								
							
						
					
					
						commit
						14626fe05b
					
				
					 11 changed files with 41 additions and 41 deletions
				
			
		| 
						 | 
				
			
			@ -55,7 +55,7 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
 | 
			
		|||
   ```bash
 | 
			
		||||
   #convert PyTorch (fp16 or fp32) model; 
 | 
			
		||||
   #llama/bloom/gptneox/starcoder model family is currently supported
 | 
			
		||||
   lm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
 | 
			
		||||
   llm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
 | 
			
		||||
 | 
			
		||||
   #convert GPTQ-4bit model
 | 
			
		||||
   #only llama model family is currently supported
 | 
			
		||||
| 
						 | 
				
			
			@ -102,7 +102,7 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 | 
			
		|||
 | 
			
		||||
  See the complete example [here](example/transformers/transformers_int4_pipeline.py). 
 | 
			
		||||
  
 | 
			
		||||
  - ##### Using native INT4 format
 | 
			
		||||
- ##### Using native INT4 format
 | 
			
		||||
 | 
			
		||||
  You may also convert Hugging Face *Transformers* models into native INT4 format for maximum performance as follows.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -115,8 +115,8 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 | 
			
		|||
      outfile='/path/to/output/', outtype='int4', model_family="llama")
 | 
			
		||||
 | 
			
		||||
  #load the converted model
 | 
			
		||||
  from bigdl.llm.transformers import BigdlForCausalLM
 | 
			
		||||
  llm = BigdlForCausalLM.from_pretrained("/path/to/output/model.bin",...)
 | 
			
		||||
  from bigdl.llm.transformers import BigdlNativeForCausalLM
 | 
			
		||||
  llm = BigdlNativeForCausalLM.from_pretrained("/path/to/output/model.bin",...)
 | 
			
		||||
   
 | 
			
		||||
  #run the converted  model
 | 
			
		||||
  input_ids = llm.tokenize(prompt)
 | 
			
		||||
| 
						 | 
				
			
			@ -130,13 +130,13 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 | 
			
		|||
You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
			
		||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
			
		||||
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
			
		||||
from langchain.chains.question_answering import load_qa_chain
 | 
			
		||||
 | 
			
		||||
embeddings = BigdlLLMEmbeddings(model_path='/path/to/converted/model.bin',
 | 
			
		||||
embeddings = BigdlNativeEmbeddings(model_path='/path/to/converted/model.bin',
 | 
			
		||||
                                model_family="llama",...)
 | 
			
		||||
bigdl_llm = BigdlLLM(model_path='/path/to/converted/model.bin',
 | 
			
		||||
bigdl_llm = BigdlNativeLLM(model_path='/path/to/converted/model.bin',
 | 
			
		||||
                     model_family="llama",...)
 | 
			
		||||
 | 
			
		||||
doc_chain = load_qa_chain(bigdl_llm, ...)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,8 +29,8 @@ from langchain.chains.question_answering import load_qa_chain
 | 
			
		|||
from langchain.callbacks.manager import CallbackManager
 | 
			
		||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 | 
			
		||||
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
			
		||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
			
		||||
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -53,13 +53,13 @@ def main(args):
 | 
			
		|||
    texts = text_splitter.split_text(input_doc)
 | 
			
		||||
 | 
			
		||||
    # create embeddings and store into vectordb
 | 
			
		||||
    embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
 | 
			
		||||
    embeddings = BigdlNativeEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
 | 
			
		||||
    docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
 | 
			
		||||
 | 
			
		||||
    #get relavant texts
 | 
			
		||||
    docs = docsearch.get_relevant_documents(query)
 | 
			
		||||
        
 | 
			
		||||
    bigdl_llm = BigdlLLM(
 | 
			
		||||
    bigdl_llm = BigdlNativeLLM(
 | 
			
		||||
        model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,7 +21,7 @@
 | 
			
		|||
 | 
			
		||||
import argparse
 | 
			
		||||
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
			
		||||
from langchain import PromptTemplate, LLMChain
 | 
			
		||||
from langchain.callbacks.manager import CallbackManager
 | 
			
		||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 | 
			
		||||
| 
						 | 
				
			
			@ -42,7 +42,7 @@ def main(args):
 | 
			
		|||
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 | 
			
		||||
    
 | 
			
		||||
    # Verbose is required to pass to the callback manager
 | 
			
		||||
    llm = BigdlLLM(
 | 
			
		||||
    llm = BigdlNativeLLM(
 | 
			
		||||
        model_path=model_path,
 | 
			
		||||
        model_family=model_family,
 | 
			
		||||
        n_threads=n_threads,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,8 +31,8 @@ def convert(repo_id_or_model_path, model_family, tmp_path):
 | 
			
		|||
    return bigdl_llm_path
 | 
			
		||||
 | 
			
		||||
def load(model_path, model_family, n_threads):
 | 
			
		||||
    from bigdl.llm.transformers import BigdlForCausalLM
 | 
			
		||||
    llm = BigdlForCausalLM.from_pretrained(
 | 
			
		||||
    from bigdl.llm.transformers import BigdlNativeForCausalLM
 | 
			
		||||
    llm = BigdlNativeForCausalLM.from_pretrained(
 | 
			
		||||
        pretrained_model_name_or_path=model_path,
 | 
			
		||||
        model_family=model_family,
 | 
			
		||||
        n_threads=n_threads)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,8 +19,8 @@
 | 
			
		|||
# Otherwise there would be module not found error in non-pip's setting as Python would
 | 
			
		||||
# only search the first bigdl package and end up finding only one sub-package.
 | 
			
		||||
 | 
			
		||||
from .bigdlllm import BigdlLLMEmbeddings
 | 
			
		||||
from .bigdlllm import BigdlNativeEmbeddings
 | 
			
		||||
 | 
			
		||||
__all__ = [
 | 
			
		||||
    "BigdlLLMEmbeddings",
 | 
			
		||||
    "BigdlNativeEmbeddings",
 | 
			
		||||
]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -44,7 +44,7 @@
 | 
			
		|||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | 
			
		||||
# THE SOFTWARE.
 | 
			
		||||
 | 
			
		||||
"""Wrapper around BigdlLLM embedding models."""
 | 
			
		||||
"""Wrapper around BigdlNative embedding models."""
 | 
			
		||||
import importlib
 | 
			
		||||
from typing import Any, Dict, List, Optional
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -53,14 +53,14 @@ from pydantic import BaseModel, Extra, Field, root_validator
 | 
			
		|||
from langchain.embeddings.base import Embeddings
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BigdlLLMEmbeddings(BaseModel, Embeddings):
 | 
			
		||||
class BigdlNativeEmbeddings(BaseModel, Embeddings):
 | 
			
		||||
    """Wrapper around bigdl-llm embedding models.
 | 
			
		||||
 | 
			
		||||
    Example:
 | 
			
		||||
        .. code-block:: python
 | 
			
		||||
 | 
			
		||||
            from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
			
		||||
            llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin")
 | 
			
		||||
            from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
			
		||||
            llama = BigdlNativeEmbeddings(model_path="/path/to/model.bin")
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    model_family: str = "llama"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -23,12 +23,12 @@
 | 
			
		|||
from typing import Dict, Type
 | 
			
		||||
from langchain.llms.base import BaseLLM
 | 
			
		||||
 | 
			
		||||
from .bigdlllm import BigdlLLM
 | 
			
		||||
from .bigdlllm import BigdlNativeLLM
 | 
			
		||||
 | 
			
		||||
__all__ = [
 | 
			
		||||
    "BigdlLLM",
 | 
			
		||||
    "BigdlNativeLLM",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
 | 
			
		||||
    "BigdlLLM": BigdlLLM,
 | 
			
		||||
    "BigdlNativeLLM": BigdlNativeLLM,
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -54,14 +54,14 @@ from langchain.llms.base import LLM
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BigdlLLM(LLM):
 | 
			
		||||
class BigdlNativeLLM(LLM):
 | 
			
		||||
    """Wrapper around the BigDL-LLM
 | 
			
		||||
 | 
			
		||||
    Example:
 | 
			
		||||
        .. code-block:: python
 | 
			
		||||
 | 
			
		||||
            from langchain.llms import BigdlLLM
 | 
			
		||||
            llm = BigdlLLM(model_path="/path/to/llama/model")
 | 
			
		||||
            from langchain.llms import BigdlNativeLLM
 | 
			
		||||
            llm = BigdlNativeLLM(model_path="/path/to/llama/model")
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -281,8 +281,8 @@ class BigdlLLM(LLM):
 | 
			
		|||
        Example:
 | 
			
		||||
            .. code-block:: python
 | 
			
		||||
 | 
			
		||||
                from langchain.llms import BigdlLLM
 | 
			
		||||
                llm = BigdlLLM(model_path="/path/to/local/llama/model.bin")
 | 
			
		||||
                from langchain.llms import BigdlNativeLLM
 | 
			
		||||
                llm = BigdlNativeLLM(model_path="/path/to/local/llama/model.bin")
 | 
			
		||||
                llm("This is a prompt.")
 | 
			
		||||
        """
 | 
			
		||||
        if self.streaming:
 | 
			
		||||
| 
						 | 
				
			
			@ -326,8 +326,8 @@ class BigdlLLM(LLM):
 | 
			
		|||
        Example:
 | 
			
		||||
            .. code-block:: python
 | 
			
		||||
 | 
			
		||||
                from langchain.llms import BigdlLLM
 | 
			
		||||
                llm = BigdlLLM(
 | 
			
		||||
                from langchain.llms import BigdlNativeLLM
 | 
			
		||||
                llm = BigdlNativeLLM(
 | 
			
		||||
                    model_path="/path/to/local/model.bin",
 | 
			
		||||
                    temperature = 0.5
 | 
			
		||||
                )
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,4 +16,4 @@
 | 
			
		|||
 | 
			
		||||
from .convert import ggml_convert_int4
 | 
			
		||||
from .model import AutoModelForCausalLM, AutoModel
 | 
			
		||||
from .modelling_bigdl import BigdlForCausalLM
 | 
			
		||||
from .modelling_bigdl import BigdlNativeForCausalLM
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,7 +22,7 @@
 | 
			
		|||
from bigdl.llm.utils.common import invalidInputError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BigdlForCausalLM:
 | 
			
		||||
class BigdlNativeForCausalLM:
 | 
			
		||||
    """
 | 
			
		||||
    A generic model class that mimics the behavior of
 | 
			
		||||
    ``transformers.LlamaForCausalLM.from_pretrained`` API
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,8 +15,8 @@
 | 
			
		|||
#
 | 
			
		||||
 | 
			
		||||
from bigdl.llm.utils import get_avx_flags
 | 
			
		||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
			
		||||
from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
			
		||||
from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
			
		||||
import pytest
 | 
			
		||||
from unittest import TestCase
 | 
			
		||||
import os
 | 
			
		||||
| 
						 | 
				
			
			@ -35,7 +35,7 @@ class Test_Models_Basics(TestCase):
 | 
			
		|||
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_embedding_llama(self):
 | 
			
		||||
        bigdl_embeddings = BigdlLLMEmbeddings(
 | 
			
		||||
        bigdl_embeddings = BigdlNativeEmbeddings(
 | 
			
		||||
            model_path=self.llama_model_path,
 | 
			
		||||
            model_family="llama")
 | 
			
		||||
        text = "This is a test document."
 | 
			
		||||
| 
						 | 
				
			
			@ -43,7 +43,7 @@ class Test_Models_Basics(TestCase):
 | 
			
		|||
        doc_result = bigdl_embeddings.embed_documents([text])
 | 
			
		||||
    
 | 
			
		||||
    def test_langchain_llm_embedding_gptneox(self):
 | 
			
		||||
        bigdl_embeddings = BigdlLLMEmbeddings(
 | 
			
		||||
        bigdl_embeddings = BigdlNativeEmbeddings(
 | 
			
		||||
            model_path=self.gptneox_model_path,
 | 
			
		||||
            model_family="gptneox")
 | 
			
		||||
        text = "This is a test document."
 | 
			
		||||
| 
						 | 
				
			
			@ -51,7 +51,7 @@ class Test_Models_Basics(TestCase):
 | 
			
		|||
        doc_result = bigdl_embeddings.embed_documents([text])
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_llama(self):
 | 
			
		||||
        llm = BigdlLLM(
 | 
			
		||||
        llm = BigdlNativeLLM(
 | 
			
		||||
            model_path=self.llama_model_path, 
 | 
			
		||||
            max_tokens=32,
 | 
			
		||||
            n_threads=self.n_threads)
 | 
			
		||||
| 
						 | 
				
			
			@ -59,7 +59,7 @@ class Test_Models_Basics(TestCase):
 | 
			
		|||
        result = llm(question)
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_gptneox(self):
 | 
			
		||||
        llm = BigdlLLM(
 | 
			
		||||
        llm = BigdlNativeLLM(
 | 
			
		||||
            model_path=self.gptneox_model_path,
 | 
			
		||||
            model_family="gptneox", 
 | 
			
		||||
            max_tokens=32,
 | 
			
		||||
| 
						 | 
				
			
			@ -68,7 +68,7 @@ class Test_Models_Basics(TestCase):
 | 
			
		|||
        result = llm(question)
 | 
			
		||||
        
 | 
			
		||||
    def test_langchain_llm_bloom(self):
 | 
			
		||||
        llm = BigdlLLM(
 | 
			
		||||
        llm = BigdlNativeLLM(
 | 
			
		||||
            model_path=self.bloom_model_path, 
 | 
			
		||||
            model_family="bloom",
 | 
			
		||||
            max_tokens=32,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue