[LLM Docs] Remain API Docs Issues Solution (#8780)

* langchain readthedocs update * solve langchain.llms.transformersllm issues * langchain.embeddings.transformersembeddings/transfortmersllms issues * update docs for get_num_tokens * add low_bit api doc * add optimizer model api doc * update rst index * fix coomments style * update docs following the comments * update api doc
2023-09-06 16:29:34 +08:00 · 2023-09-06 16:29:34 +08:00 · 7a71ced78f
commit 7a71ced78f
parent cf6a620bae
6 changed files with 164 additions and 11 deletions
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
@ -2,7 +2,8 @@ BigDL-LLM API
 ==================
 .. toctree::
-    :maxdepth: 2
+    :maxdepth: 3
    transformers.rst
    langchain.rst
    optimize.rst
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
@ -4,10 +4,59 @@ BigDL-LLM LangChain API
 llm.langchain.embeddings.bigdlllm
 ----------------------------------------
-.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm
+.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.LlamaEmbeddings
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: embed_documents
    .. automethod:: embed_query 
 .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.BloomEmbeddings
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: embed_documents
    .. automethod:: embed_query
 .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.GptneoxEmbeddings
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: embed_documents
    .. automethod:: embed_query
 .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.ChatGLMEmbeddings
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: embed_documents
    .. automethod:: embed_query
 .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.StarcoderEmbeddings
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: embed_documents
    .. automethod:: embed_query
 llm.langchain.embeddings.transformersembeddings
@ -22,10 +71,59 @@ llm.langchain.embeddings.transformersembeddings
 llm.langchain.llms.bigdlllm
 ----------------------------------------
-.. automodule:: bigdl.llm.langchain.llms.bigdlllm
+.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.LlamaLLM
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: stream
    .. automethod:: get_num_tokens
 .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.BloomLLM
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: stream
    .. automethod:: get_num_tokens 
 .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.GptneoxLLM
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: stream
    .. automethod:: get_num_tokens
 .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.ChatGLMLLM
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: stream
    .. automethod:: get_num_tokens
 .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.StarcoderLLM
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: ggml_model , ggml_module
    .. automethod:: validate_environment
    .. automethod:: stream
    .. automethod:: get_num_tokens
 llm.langchain.llms.transformersllm
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst
@ -0,0 +1,10 @@
 BigDL-LLM Optimize API
 =====================
 llm.optimize
 ----------------------------------------
 .. automodule:: bigdl.llm.optimize
    :members: optimize_model
    :undoc-members:
    :show-inheritance:
--- a/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py
@ -68,9 +68,11 @@ class TransformersEmbeddings(BaseModel, Embeddings):
    """
    model: Any  #: :meta private:
    """BigDL-LLM Transformers-INT4 model."""
    tokenizer: Any  #: :meta private:
    """Huggingface tokenizer model."""
    model_id: str = DEFAULT_MODEL_NAME
-    """Model id to use."""
+    """Model name or model path to use."""
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Keyword arguments to pass to the model."""
    encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
@ -83,7 +85,18 @@ class TransformersEmbeddings(BaseModel, Embeddings):
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ):
-        """Construct object from model_id"""
+        """
        Construct object from model_id.
        Args:
            model_id: Path for the huggingface repo id to be downloaded or the huggingface
                      checkpoint folder.
            model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
            kwargs: Extra arguments that will be passed to the model and tokenizer.
        Returns: An object of TransformersEmbeddings.
        """
        try:
            from bigdl.llm.transformers import AutoModel
            from transformers import AutoTokenizer, LlamaTokenizer
--- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
@ -659,6 +659,16 @@ class _BaseCausalLM(LLM):
            yield chunk
    def get_num_tokens(self, text: str) -> int:
        """Get the number of tokens that present in the text.
        Useful for checking if an input will fit in a model's context window.
        Args:
            text: The string input to tokenize.
        Returns:
            The number of tokens in the text.
        """
        tokenized_text = self.client.tokenize(text.encode("utf-8"))
        return len(tokenized_text)
--- a/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py
@ -73,7 +73,7 @@ class TransformersLLM(LLM):
    model_kwargs: Optional[dict] = None
    """Keyword arguments passed to the model."""
    model: Any  #: :meta private:
-    """BigDL-LLM Transformer-INT4 model."""
+    """BigDL-LLM Transformers-INT4 model."""
    tokenizer: Any  #: :meta private:
    """Huggingface tokenizer model."""
    streaming: bool = True
@ -91,7 +91,18 @@ class TransformersLLM(LLM):
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ) -> LLM:
-        """Construct object from model_id"""
+        """
        Construct object from model_id
        Args:
            model_id: Path for the huggingface repo id to be downloaded or
                      the huggingface checkpoint folder.
            model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
            kwargs: Extra arguments that will be passed to the model and tokenizer.
        Returns: An object of TransformersLLM.
        """
        try:
            from bigdl.llm.transformers import (
                AutoModel,
@ -139,7 +150,17 @@ class TransformersLLM(LLM):
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ) -> LLM:
-        """Construct object from model_id"""
+        """
        Construct low_bit object from model_id
        Args:
            model_id: Path for the bigdl transformers low-bit model checkpoint folder.
            model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
            kwargs: Extra arguments that will be passed to the model and tokenizer.
        Returns: An object of TransformersLLM.
        """
        try:
            from bigdl.llm.transformers import (
                AutoModel,