[LLM Docs] Remain API Docs Issues Solution (#8780)

* langchain readthedocs update * solve langchain.llms.transformersllm issues * langchain.embeddings.transformersembeddings/transfortmersllms issues * update docs for get_num_tokens * add low_bit api doc * add optimizer model api doc * update rst index * fix coomments style * update docs following the comments * update api doc
2023-09-06 16:29:34 +08:00 · 2023-09-06 16:29:34 +08:00 · 7a71ced78f
commit 7a71ced78f
parent cf6a620bae
6 changed files with 164 additions and 11 deletions
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
@ -2,7 +2,8 @@ BigDL-LLM API
 ==================

 .. toctree::
-    :maxdepth: 2
+    :maxdepth: 3

    transformers.rst
    langchain.rst
+    optimize.rst
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
@ -4,10 +4,59 @@ BigDL-LLM LangChain API
 llm.langchain.embeddings.bigdlllm
 ----------------------------------------

-.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm
+.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.LlamaEmbeddings
    :members:
    :undoc-members:
    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: embed_documents
+    .. automethod:: embed_query 
+
+
+.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.BloomEmbeddings
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: embed_documents
+    .. automethod:: embed_query
+
+
+.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.GptneoxEmbeddings
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: embed_documents
+    .. automethod:: embed_query
+
+
+.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.ChatGLMEmbeddings
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: embed_documents
+    .. automethod:: embed_query
+
+
+.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.StarcoderEmbeddings
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: embed_documents
+    .. automethod:: embed_query


 llm.langchain.embeddings.transformersembeddings
@ -22,10 +71,59 @@ llm.langchain.embeddings.transformersembeddings
 llm.langchain.llms.bigdlllm
 ----------------------------------------

-.. automodule:: bigdl.llm.langchain.llms.bigdlllm
+.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.LlamaLLM
    :members:
    :undoc-members:
    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: stream
+    .. automethod:: get_num_tokens
+
+
+.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.BloomLLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: stream
+    .. automethod:: get_num_tokens 
+
+
+.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.GptneoxLLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: stream
+    .. automethod:: get_num_tokens
+
+
+.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.ChatGLMLLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: stream
+    .. automethod:: get_num_tokens
+
+
+.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.StarcoderLLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :exclude-members: ggml_model , ggml_module
+
+    .. automethod:: validate_environment
+    .. automethod:: stream
+    .. automethod:: get_num_tokens


 llm.langchain.llms.transformersllm
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst
@ -0,0 +1,10 @@
+BigDL-LLM Optimize API
+=====================
+
+llm.optimize
+----------------------------------------
+
+.. automodule:: bigdl.llm.optimize
+    :members: optimize_model
+    :undoc-members:
+    :show-inheritance:
--- a/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py
@ -68,13 +68,15 @@ class TransformersEmbeddings(BaseModel, Embeddings):
    """

    model: Any  #: :meta private:
+    """BigDL-LLM Transformers-INT4 model."""
    tokenizer: Any  #: :meta private:
+    """Huggingface tokenizer model."""
    model_id: str = DEFAULT_MODEL_NAME
-    """Model id to use."""
+    """Model name or model path to use."""
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
-    """Key word arguments to pass to the model."""
+    """Keyword arguments to pass to the model."""
    encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
-    """Key word arguments to pass when calling the `encode` method of the model."""
+    """Keyword arguments to pass when calling the `encode` method of the model."""

    @classmethod
    def from_model_id(
@ -83,7 +85,18 @@ class TransformersEmbeddings(BaseModel, Embeddings):
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ):
-        """Construct object from model_id"""
+        """
+        Construct object from model_id.
+
+        Args:
+
+            model_id: Path for the huggingface repo id to be downloaded or the huggingface
+                      checkpoint folder.
+            model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
+            kwargs: Extra arguments that will be passed to the model and tokenizer.
+        
+        Returns: An object of TransformersEmbeddings.
+        """
        try:
            from bigdl.llm.transformers import AutoModel
            from transformers import AutoTokenizer, LlamaTokenizer
--- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
@ -659,6 +659,16 @@ class _BaseCausalLM(LLM):
            yield chunk

    def get_num_tokens(self, text: str) -> int:
+        """Get the number of tokens that present in the text.
+
+        Useful for checking if an input will fit in a model's context window.
+
+        Args:
+            text: The string input to tokenize.
+
+        Returns:
+            The number of tokens in the text.
+        """
        tokenized_text = self.client.tokenize(text.encode("utf-8"))
        return len(tokenized_text)

--- a/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py
@ -71,9 +71,9 @@ class TransformersLLM(LLM):
    model_id: str = DEFAULT_MODEL_ID
    """Model name or model path to use."""
    model_kwargs: Optional[dict] = None
-    """Key word arguments passed to the model."""
+    """Keyword arguments passed to the model."""
    model: Any  #: :meta private:
-    """BigDL-LLM Transformer-INT4 model."""
+    """BigDL-LLM Transformers-INT4 model."""
    tokenizer: Any  #: :meta private:
    """Huggingface tokenizer model."""
    streaming: bool = True
@ -91,7 +91,18 @@ class TransformersLLM(LLM):
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ) -> LLM:
-        """Construct object from model_id"""
+        """
+        Construct object from model_id
+        
+        Args:
+        
+            model_id: Path for the huggingface repo id to be downloaded or
+                      the huggingface checkpoint folder.
+            model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
+            kwargs: Extra arguments that will be passed to the model and tokenizer.
+
+        Returns: An object of TransformersLLM.
+        """
        try:
            from bigdl.llm.transformers import (
                AutoModel,
@ -139,7 +150,17 @@ class TransformersLLM(LLM):
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ) -> LLM:
-        """Construct object from model_id"""
+        """
+        Construct low_bit object from model_id
+        
+        Args:
+        
+            model_id: Path for the bigdl transformers low-bit model checkpoint folder.
+            model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
+            kwargs: Extra arguments that will be passed to the model and tokenizer.
+
+        Returns: An object of TransformersLLM.
+        """
        try:
            from bigdl.llm.transformers import (
                AutoModel,