[LLM Docs] Remain API Docs Issues Solution (#8780)

* langchain readthedocs update

* solve langchain.llms.transformersllm issues

* langchain.embeddings.transformersembeddings/transfortmersllms issues

* update docs for get_num_tokens

* add low_bit api doc

* add optimizer model api doc

* update rst index

* fix coomments style

* update docs following the comments

* update api doc
This commit is contained in:
SONG Ge 2023-09-06 16:29:34 +08:00 committed by GitHub
parent cf6a620bae
commit 7a71ced78f
6 changed files with 164 additions and 11 deletions

View file

@ -2,7 +2,8 @@ BigDL-LLM API
================== ==================
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 3
transformers.rst transformers.rst
langchain.rst langchain.rst
optimize.rst

View file

@ -4,10 +4,59 @@ BigDL-LLM LangChain API
llm.langchain.embeddings.bigdlllm llm.langchain.embeddings.bigdlllm
---------------------------------------- ----------------------------------------
.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.LlamaEmbeddings
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.BloomEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.GptneoxEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.ChatGLMEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.StarcoderEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
llm.langchain.embeddings.transformersembeddings llm.langchain.embeddings.transformersembeddings
@ -22,10 +71,59 @@ llm.langchain.embeddings.transformersembeddings
llm.langchain.llms.bigdlllm llm.langchain.llms.bigdlllm
---------------------------------------- ----------------------------------------
.. automodule:: bigdl.llm.langchain.llms.bigdlllm .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.LlamaLLM
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.BloomLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.GptneoxLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.ChatGLMLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.StarcoderLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
llm.langchain.llms.transformersllm llm.langchain.llms.transformersllm

View file

@ -0,0 +1,10 @@
BigDL-LLM Optimize API
=====================
llm.optimize
----------------------------------------
.. automodule:: bigdl.llm.optimize
:members: optimize_model
:undoc-members:
:show-inheritance:

View file

@ -68,9 +68,11 @@ class TransformersEmbeddings(BaseModel, Embeddings):
""" """
model: Any #: :meta private: model: Any #: :meta private:
"""BigDL-LLM Transformers-INT4 model."""
tokenizer: Any #: :meta private: tokenizer: Any #: :meta private:
"""Huggingface tokenizer model."""
model_id: str = DEFAULT_MODEL_NAME model_id: str = DEFAULT_MODEL_NAME
"""Model id to use.""" """Model name or model path to use."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict) model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass to the model.""" """Keyword arguments to pass to the model."""
encode_kwargs: Dict[str, Any] = Field(default_factory=dict) encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
@ -83,7 +85,18 @@ class TransformersEmbeddings(BaseModel, Embeddings):
model_kwargs: Optional[dict] = None, model_kwargs: Optional[dict] = None,
**kwargs: Any, **kwargs: Any,
): ):
"""Construct object from model_id""" """
Construct object from model_id.
Args:
model_id: Path for the huggingface repo id to be downloaded or the huggingface
checkpoint folder.
model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
kwargs: Extra arguments that will be passed to the model and tokenizer.
Returns: An object of TransformersEmbeddings.
"""
try: try:
from bigdl.llm.transformers import AutoModel from bigdl.llm.transformers import AutoModel
from transformers import AutoTokenizer, LlamaTokenizer from transformers import AutoTokenizer, LlamaTokenizer

View file

@ -659,6 +659,16 @@ class _BaseCausalLM(LLM):
yield chunk yield chunk
def get_num_tokens(self, text: str) -> int: def get_num_tokens(self, text: str) -> int:
"""Get the number of tokens that present in the text.
Useful for checking if an input will fit in a model's context window.
Args:
text: The string input to tokenize.
Returns:
The number of tokens in the text.
"""
tokenized_text = self.client.tokenize(text.encode("utf-8")) tokenized_text = self.client.tokenize(text.encode("utf-8"))
return len(tokenized_text) return len(tokenized_text)

View file

@ -73,7 +73,7 @@ class TransformersLLM(LLM):
model_kwargs: Optional[dict] = None model_kwargs: Optional[dict] = None
"""Keyword arguments passed to the model.""" """Keyword arguments passed to the model."""
model: Any #: :meta private: model: Any #: :meta private:
"""BigDL-LLM Transformer-INT4 model.""" """BigDL-LLM Transformers-INT4 model."""
tokenizer: Any #: :meta private: tokenizer: Any #: :meta private:
"""Huggingface tokenizer model.""" """Huggingface tokenizer model."""
streaming: bool = True streaming: bool = True
@ -91,7 +91,18 @@ class TransformersLLM(LLM):
model_kwargs: Optional[dict] = None, model_kwargs: Optional[dict] = None,
**kwargs: Any, **kwargs: Any,
) -> LLM: ) -> LLM:
"""Construct object from model_id""" """
Construct object from model_id
Args:
model_id: Path for the huggingface repo id to be downloaded or
the huggingface checkpoint folder.
model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
kwargs: Extra arguments that will be passed to the model and tokenizer.
Returns: An object of TransformersLLM.
"""
try: try:
from bigdl.llm.transformers import ( from bigdl.llm.transformers import (
AutoModel, AutoModel,
@ -139,7 +150,17 @@ class TransformersLLM(LLM):
model_kwargs: Optional[dict] = None, model_kwargs: Optional[dict] = None,
**kwargs: Any, **kwargs: Any,
) -> LLM: ) -> LLM:
"""Construct object from model_id""" """
Construct low_bit object from model_id
Args:
model_id: Path for the bigdl transformers low-bit model checkpoint folder.
model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
kwargs: Extra arguments that will be passed to the model and tokenizer.
Returns: An object of TransformersLLM.
"""
try: try:
from bigdl.llm.transformers import ( from bigdl.llm.transformers import (
AutoModel, AutoModel,