[LLM Docs] Remain API Docs Issues Solution (#8780)

* langchain readthedocs update

* solve langchain.llms.transformersllm issues

* langchain.embeddings.transformersembeddings/transfortmersllms issues

* update docs for get_num_tokens

* add low_bit api doc

* add optimizer model api doc

* update rst index

* fix coomments style

* update docs following the comments

* update api doc
This commit is contained in:
SONG Ge 2023-09-06 16:29:34 +08:00 committed by GitHub
parent cf6a620bae
commit 7a71ced78f
6 changed files with 164 additions and 11 deletions

View file

@ -2,7 +2,8 @@ BigDL-LLM API
==================
.. toctree::
:maxdepth: 2
:maxdepth: 3
transformers.rst
langchain.rst
optimize.rst

View file

@ -4,10 +4,59 @@ BigDL-LLM LangChain API
llm.langchain.embeddings.bigdlllm
----------------------------------------
.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.LlamaEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.BloomEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.GptneoxEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.ChatGLMEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.StarcoderEmbeddings
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: embed_documents
.. automethod:: embed_query
llm.langchain.embeddings.transformersembeddings
@ -22,10 +71,59 @@ llm.langchain.embeddings.transformersembeddings
llm.langchain.llms.bigdlllm
----------------------------------------
.. automodule:: bigdl.llm.langchain.llms.bigdlllm
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.LlamaLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.BloomLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.GptneoxLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.ChatGLMLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.StarcoderLLM
:members:
:undoc-members:
:show-inheritance:
:exclude-members: ggml_model , ggml_module
.. automethod:: validate_environment
.. automethod:: stream
.. automethod:: get_num_tokens
llm.langchain.llms.transformersllm

View file

@ -0,0 +1,10 @@
BigDL-LLM Optimize API
=====================
llm.optimize
----------------------------------------
.. automodule:: bigdl.llm.optimize
:members: optimize_model
:undoc-members:
:show-inheritance:

View file

@ -68,13 +68,15 @@ class TransformersEmbeddings(BaseModel, Embeddings):
"""
model: Any #: :meta private:
"""BigDL-LLM Transformers-INT4 model."""
tokenizer: Any #: :meta private:
"""Huggingface tokenizer model."""
model_id: str = DEFAULT_MODEL_NAME
"""Model id to use."""
"""Model name or model path to use."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Key word arguments to pass to the model."""
"""Keyword arguments to pass to the model."""
encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Key word arguments to pass when calling the `encode` method of the model."""
"""Keyword arguments to pass when calling the `encode` method of the model."""
@classmethod
def from_model_id(
@ -83,7 +85,18 @@ class TransformersEmbeddings(BaseModel, Embeddings):
model_kwargs: Optional[dict] = None,
**kwargs: Any,
):
"""Construct object from model_id"""
"""
Construct object from model_id.
Args:
model_id: Path for the huggingface repo id to be downloaded or the huggingface
checkpoint folder.
model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
kwargs: Extra arguments that will be passed to the model and tokenizer.
Returns: An object of TransformersEmbeddings.
"""
try:
from bigdl.llm.transformers import AutoModel
from transformers import AutoTokenizer, LlamaTokenizer

View file

@ -659,6 +659,16 @@ class _BaseCausalLM(LLM):
yield chunk
def get_num_tokens(self, text: str) -> int:
"""Get the number of tokens that present in the text.
Useful for checking if an input will fit in a model's context window.
Args:
text: The string input to tokenize.
Returns:
The number of tokens in the text.
"""
tokenized_text = self.client.tokenize(text.encode("utf-8"))
return len(tokenized_text)

View file

@ -71,9 +71,9 @@ class TransformersLLM(LLM):
model_id: str = DEFAULT_MODEL_ID
"""Model name or model path to use."""
model_kwargs: Optional[dict] = None
"""Key word arguments passed to the model."""
"""Keyword arguments passed to the model."""
model: Any #: :meta private:
"""BigDL-LLM Transformer-INT4 model."""
"""BigDL-LLM Transformers-INT4 model."""
tokenizer: Any #: :meta private:
"""Huggingface tokenizer model."""
streaming: bool = True
@ -91,7 +91,18 @@ class TransformersLLM(LLM):
model_kwargs: Optional[dict] = None,
**kwargs: Any,
) -> LLM:
"""Construct object from model_id"""
"""
Construct object from model_id
Args:
model_id: Path for the huggingface repo id to be downloaded or
the huggingface checkpoint folder.
model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
kwargs: Extra arguments that will be passed to the model and tokenizer.
Returns: An object of TransformersLLM.
"""
try:
from bigdl.llm.transformers import (
AutoModel,
@ -139,7 +150,17 @@ class TransformersLLM(LLM):
model_kwargs: Optional[dict] = None,
**kwargs: Any,
) -> LLM:
"""Construct object from model_id"""
"""
Construct low_bit object from model_id
Args:
model_id: Path for the bigdl transformers low-bit model checkpoint folder.
model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
kwargs: Extra arguments that will be passed to the model and tokenizer.
Returns: An object of TransformersLLM.
"""
try:
from bigdl.llm.transformers import (
AutoModel,