LLM: refactor transformers and langchain class name (#8470)
This commit is contained in:
		
							parent
							
								
									70bc8ea8ae
								
							
						
					
					
						commit
						14626fe05b
					
				
					 11 changed files with 41 additions and 41 deletions
				
			
		| 
						 | 
					@ -55,7 +55,7 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
 | 
				
			||||||
   ```bash
 | 
					   ```bash
 | 
				
			||||||
   #convert PyTorch (fp16 or fp32) model; 
 | 
					   #convert PyTorch (fp16 or fp32) model; 
 | 
				
			||||||
   #llama/bloom/gptneox/starcoder model family is currently supported
 | 
					   #llama/bloom/gptneox/starcoder model family is currently supported
 | 
				
			||||||
   lm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
 | 
					   llm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   #convert GPTQ-4bit model
 | 
					   #convert GPTQ-4bit model
 | 
				
			||||||
   #only llama model family is currently supported
 | 
					   #only llama model family is currently supported
 | 
				
			||||||
| 
						 | 
					@ -102,7 +102,7 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  See the complete example [here](example/transformers/transformers_int4_pipeline.py). 
 | 
					  See the complete example [here](example/transformers/transformers_int4_pipeline.py). 
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  - ##### Using native INT4 format
 | 
					- ##### Using native INT4 format
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  You may also convert Hugging Face *Transformers* models into native INT4 format for maximum performance as follows.
 | 
					  You may also convert Hugging Face *Transformers* models into native INT4 format for maximum performance as follows.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -115,8 +115,8 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 | 
				
			||||||
      outfile='/path/to/output/', outtype='int4', model_family="llama")
 | 
					      outfile='/path/to/output/', outtype='int4', model_family="llama")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  #load the converted model
 | 
					  #load the converted model
 | 
				
			||||||
  from bigdl.llm.transformers import BigdlForCausalLM
 | 
					  from bigdl.llm.transformers import BigdlNativeForCausalLM
 | 
				
			||||||
  llm = BigdlForCausalLM.from_pretrained("/path/to/output/model.bin",...)
 | 
					  llm = BigdlNativeForCausalLM.from_pretrained("/path/to/output/model.bin",...)
 | 
				
			||||||
   
 | 
					   
 | 
				
			||||||
  #run the converted  model
 | 
					  #run the converted  model
 | 
				
			||||||
  input_ids = llm.tokenize(prompt)
 | 
					  input_ids = llm.tokenize(prompt)
 | 
				
			||||||
| 
						 | 
					@ -130,13 +130,13 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 | 
				
			||||||
You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
 | 
					You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```python
 | 
					```python
 | 
				
			||||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
					from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
				
			||||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
					from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
				
			||||||
from langchain.chains.question_answering import load_qa_chain
 | 
					from langchain.chains.question_answering import load_qa_chain
 | 
				
			||||||
 | 
					
 | 
				
			||||||
embeddings = BigdlLLMEmbeddings(model_path='/path/to/converted/model.bin',
 | 
					embeddings = BigdlNativeEmbeddings(model_path='/path/to/converted/model.bin',
 | 
				
			||||||
                                model_family="llama",...)
 | 
					                                model_family="llama",...)
 | 
				
			||||||
bigdl_llm = BigdlLLM(model_path='/path/to/converted/model.bin',
 | 
					bigdl_llm = BigdlNativeLLM(model_path='/path/to/converted/model.bin',
 | 
				
			||||||
                     model_family="llama",...)
 | 
					                     model_family="llama",...)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
doc_chain = load_qa_chain(bigdl_llm, ...)
 | 
					doc_chain = load_qa_chain(bigdl_llm, ...)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -29,8 +29,8 @@ from langchain.chains.question_answering import load_qa_chain
 | 
				
			||||||
from langchain.callbacks.manager import CallbackManager
 | 
					from langchain.callbacks.manager import CallbackManager
 | 
				
			||||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 | 
					from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
					from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
				
			||||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
					from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -53,13 +53,13 @@ def main(args):
 | 
				
			||||||
    texts = text_splitter.split_text(input_doc)
 | 
					    texts = text_splitter.split_text(input_doc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # create embeddings and store into vectordb
 | 
					    # create embeddings and store into vectordb
 | 
				
			||||||
    embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
 | 
					    embeddings = BigdlNativeEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
 | 
				
			||||||
    docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
 | 
					    docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #get relavant texts
 | 
					    #get relavant texts
 | 
				
			||||||
    docs = docsearch.get_relevant_documents(query)
 | 
					    docs = docsearch.get_relevant_documents(query)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    bigdl_llm = BigdlLLM(
 | 
					    bigdl_llm = BigdlNativeLLM(
 | 
				
			||||||
        model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
 | 
					        model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,7 +21,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import argparse
 | 
					import argparse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
					from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
				
			||||||
from langchain import PromptTemplate, LLMChain
 | 
					from langchain import PromptTemplate, LLMChain
 | 
				
			||||||
from langchain.callbacks.manager import CallbackManager
 | 
					from langchain.callbacks.manager import CallbackManager
 | 
				
			||||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 | 
					from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 | 
				
			||||||
| 
						 | 
					@ -42,7 +42,7 @@ def main(args):
 | 
				
			||||||
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 | 
					    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    # Verbose is required to pass to the callback manager
 | 
					    # Verbose is required to pass to the callback manager
 | 
				
			||||||
    llm = BigdlLLM(
 | 
					    llm = BigdlNativeLLM(
 | 
				
			||||||
        model_path=model_path,
 | 
					        model_path=model_path,
 | 
				
			||||||
        model_family=model_family,
 | 
					        model_family=model_family,
 | 
				
			||||||
        n_threads=n_threads,
 | 
					        n_threads=n_threads,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,8 +31,8 @@ def convert(repo_id_or_model_path, model_family, tmp_path):
 | 
				
			||||||
    return bigdl_llm_path
 | 
					    return bigdl_llm_path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load(model_path, model_family, n_threads):
 | 
					def load(model_path, model_family, n_threads):
 | 
				
			||||||
    from bigdl.llm.transformers import BigdlForCausalLM
 | 
					    from bigdl.llm.transformers import BigdlNativeForCausalLM
 | 
				
			||||||
    llm = BigdlForCausalLM.from_pretrained(
 | 
					    llm = BigdlNativeForCausalLM.from_pretrained(
 | 
				
			||||||
        pretrained_model_name_or_path=model_path,
 | 
					        pretrained_model_name_or_path=model_path,
 | 
				
			||||||
        model_family=model_family,
 | 
					        model_family=model_family,
 | 
				
			||||||
        n_threads=n_threads)
 | 
					        n_threads=n_threads)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -19,8 +19,8 @@
 | 
				
			||||||
# Otherwise there would be module not found error in non-pip's setting as Python would
 | 
					# Otherwise there would be module not found error in non-pip's setting as Python would
 | 
				
			||||||
# only search the first bigdl package and end up finding only one sub-package.
 | 
					# only search the first bigdl package and end up finding only one sub-package.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .bigdlllm import BigdlLLMEmbeddings
 | 
					from .bigdlllm import BigdlNativeEmbeddings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__all__ = [
 | 
					__all__ = [
 | 
				
			||||||
    "BigdlLLMEmbeddings",
 | 
					    "BigdlNativeEmbeddings",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -44,7 +44,7 @@
 | 
				
			||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | 
					# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | 
				
			||||||
# THE SOFTWARE.
 | 
					# THE SOFTWARE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
"""Wrapper around BigdlLLM embedding models."""
 | 
					"""Wrapper around BigdlNative embedding models."""
 | 
				
			||||||
import importlib
 | 
					import importlib
 | 
				
			||||||
from typing import Any, Dict, List, Optional
 | 
					from typing import Any, Dict, List, Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -53,14 +53,14 @@ from pydantic import BaseModel, Extra, Field, root_validator
 | 
				
			||||||
from langchain.embeddings.base import Embeddings
 | 
					from langchain.embeddings.base import Embeddings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class BigdlLLMEmbeddings(BaseModel, Embeddings):
 | 
					class BigdlNativeEmbeddings(BaseModel, Embeddings):
 | 
				
			||||||
    """Wrapper around bigdl-llm embedding models.
 | 
					    """Wrapper around bigdl-llm embedding models.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Example:
 | 
					    Example:
 | 
				
			||||||
        .. code-block:: python
 | 
					        .. code-block:: python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
					            from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
				
			||||||
            llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin")
 | 
					            llama = BigdlNativeEmbeddings(model_path="/path/to/model.bin")
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model_family: str = "llama"
 | 
					    model_family: str = "llama"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -23,12 +23,12 @@
 | 
				
			||||||
from typing import Dict, Type
 | 
					from typing import Dict, Type
 | 
				
			||||||
from langchain.llms.base import BaseLLM
 | 
					from langchain.llms.base import BaseLLM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .bigdlllm import BigdlLLM
 | 
					from .bigdlllm import BigdlNativeLLM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__all__ = [
 | 
					__all__ = [
 | 
				
			||||||
    "BigdlLLM",
 | 
					    "BigdlNativeLLM",
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
 | 
					type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
 | 
				
			||||||
    "BigdlLLM": BigdlLLM,
 | 
					    "BigdlNativeLLM": BigdlNativeLLM,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -54,14 +54,14 @@ from langchain.llms.base import LLM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class BigdlLLM(LLM):
 | 
					class BigdlNativeLLM(LLM):
 | 
				
			||||||
    """Wrapper around the BigDL-LLM
 | 
					    """Wrapper around the BigDL-LLM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Example:
 | 
					    Example:
 | 
				
			||||||
        .. code-block:: python
 | 
					        .. code-block:: python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            from langchain.llms import BigdlLLM
 | 
					            from langchain.llms import BigdlNativeLLM
 | 
				
			||||||
            llm = BigdlLLM(model_path="/path/to/llama/model")
 | 
					            llm = BigdlNativeLLM(model_path="/path/to/llama/model")
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -281,8 +281,8 @@ class BigdlLLM(LLM):
 | 
				
			||||||
        Example:
 | 
					        Example:
 | 
				
			||||||
            .. code-block:: python
 | 
					            .. code-block:: python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                from langchain.llms import BigdlLLM
 | 
					                from langchain.llms import BigdlNativeLLM
 | 
				
			||||||
                llm = BigdlLLM(model_path="/path/to/local/llama/model.bin")
 | 
					                llm = BigdlNativeLLM(model_path="/path/to/local/llama/model.bin")
 | 
				
			||||||
                llm("This is a prompt.")
 | 
					                llm("This is a prompt.")
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        if self.streaming:
 | 
					        if self.streaming:
 | 
				
			||||||
| 
						 | 
					@ -326,8 +326,8 @@ class BigdlLLM(LLM):
 | 
				
			||||||
        Example:
 | 
					        Example:
 | 
				
			||||||
            .. code-block:: python
 | 
					            .. code-block:: python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                from langchain.llms import BigdlLLM
 | 
					                from langchain.llms import BigdlNativeLLM
 | 
				
			||||||
                llm = BigdlLLM(
 | 
					                llm = BigdlNativeLLM(
 | 
				
			||||||
                    model_path="/path/to/local/model.bin",
 | 
					                    model_path="/path/to/local/model.bin",
 | 
				
			||||||
                    temperature = 0.5
 | 
					                    temperature = 0.5
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,4 +16,4 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .convert import ggml_convert_int4
 | 
					from .convert import ggml_convert_int4
 | 
				
			||||||
from .model import AutoModelForCausalLM, AutoModel
 | 
					from .model import AutoModelForCausalLM, AutoModel
 | 
				
			||||||
from .modelling_bigdl import BigdlForCausalLM
 | 
					from .modelling_bigdl import BigdlNativeForCausalLM
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -22,7 +22,7 @@
 | 
				
			||||||
from bigdl.llm.utils.common import invalidInputError
 | 
					from bigdl.llm.utils.common import invalidInputError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class BigdlForCausalLM:
 | 
					class BigdlNativeForCausalLM:
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    A generic model class that mimics the behavior of
 | 
					    A generic model class that mimics the behavior of
 | 
				
			||||||
    ``transformers.LlamaForCausalLM.from_pretrained`` API
 | 
					    ``transformers.LlamaForCausalLM.from_pretrained`` API
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,8 +15,8 @@
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from bigdl.llm.utils import get_avx_flags
 | 
					from bigdl.llm.utils import get_avx_flags
 | 
				
			||||||
from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
 | 
					from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 | 
				
			||||||
from bigdl.llm.langchain.llms import BigdlLLM
 | 
					from bigdl.llm.langchain.llms import BigdlNativeLLM
 | 
				
			||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
from unittest import TestCase
 | 
					from unittest import TestCase
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
| 
						 | 
					@ -35,7 +35,7 @@ class Test_Models_Basics(TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    def test_langchain_llm_embedding_llama(self):
 | 
					    def test_langchain_llm_embedding_llama(self):
 | 
				
			||||||
        bigdl_embeddings = BigdlLLMEmbeddings(
 | 
					        bigdl_embeddings = BigdlNativeEmbeddings(
 | 
				
			||||||
            model_path=self.llama_model_path,
 | 
					            model_path=self.llama_model_path,
 | 
				
			||||||
            model_family="llama")
 | 
					            model_family="llama")
 | 
				
			||||||
        text = "This is a test document."
 | 
					        text = "This is a test document."
 | 
				
			||||||
| 
						 | 
					@ -43,7 +43,7 @@ class Test_Models_Basics(TestCase):
 | 
				
			||||||
        doc_result = bigdl_embeddings.embed_documents([text])
 | 
					        doc_result = bigdl_embeddings.embed_documents([text])
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    def test_langchain_llm_embedding_gptneox(self):
 | 
					    def test_langchain_llm_embedding_gptneox(self):
 | 
				
			||||||
        bigdl_embeddings = BigdlLLMEmbeddings(
 | 
					        bigdl_embeddings = BigdlNativeEmbeddings(
 | 
				
			||||||
            model_path=self.gptneox_model_path,
 | 
					            model_path=self.gptneox_model_path,
 | 
				
			||||||
            model_family="gptneox")
 | 
					            model_family="gptneox")
 | 
				
			||||||
        text = "This is a test document."
 | 
					        text = "This is a test document."
 | 
				
			||||||
| 
						 | 
					@ -51,7 +51,7 @@ class Test_Models_Basics(TestCase):
 | 
				
			||||||
        doc_result = bigdl_embeddings.embed_documents([text])
 | 
					        doc_result = bigdl_embeddings.embed_documents([text])
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    def test_langchain_llm_llama(self):
 | 
					    def test_langchain_llm_llama(self):
 | 
				
			||||||
        llm = BigdlLLM(
 | 
					        llm = BigdlNativeLLM(
 | 
				
			||||||
            model_path=self.llama_model_path, 
 | 
					            model_path=self.llama_model_path, 
 | 
				
			||||||
            max_tokens=32,
 | 
					            max_tokens=32,
 | 
				
			||||||
            n_threads=self.n_threads)
 | 
					            n_threads=self.n_threads)
 | 
				
			||||||
| 
						 | 
					@ -59,7 +59,7 @@ class Test_Models_Basics(TestCase):
 | 
				
			||||||
        result = llm(question)
 | 
					        result = llm(question)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    def test_langchain_llm_gptneox(self):
 | 
					    def test_langchain_llm_gptneox(self):
 | 
				
			||||||
        llm = BigdlLLM(
 | 
					        llm = BigdlNativeLLM(
 | 
				
			||||||
            model_path=self.gptneox_model_path,
 | 
					            model_path=self.gptneox_model_path,
 | 
				
			||||||
            model_family="gptneox", 
 | 
					            model_family="gptneox", 
 | 
				
			||||||
            max_tokens=32,
 | 
					            max_tokens=32,
 | 
				
			||||||
| 
						 | 
					@ -68,7 +68,7 @@ class Test_Models_Basics(TestCase):
 | 
				
			||||||
        result = llm(question)
 | 
					        result = llm(question)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    def test_langchain_llm_bloom(self):
 | 
					    def test_langchain_llm_bloom(self):
 | 
				
			||||||
        llm = BigdlLLM(
 | 
					        llm = BigdlNativeLLM(
 | 
				
			||||||
            model_path=self.bloom_model_path, 
 | 
					            model_path=self.bloom_model_path, 
 | 
				
			||||||
            model_family="bloom",
 | 
					            model_family="bloom",
 | 
				
			||||||
            max_tokens=32,
 | 
					            max_tokens=32,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue