From 7a71ced78f78f17c9a4c65847c16ff0e7e6a08e0 Mon Sep 17 00:00:00 2001 From: SONG Ge <38711238+sgwhat@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:29:34 +0800 Subject: [PATCH] [LLM Docs] Remain API Docs Issues Solution (#8780) * langchain readthedocs update * solve langchain.llms.transformersllm issues * langchain.embeddings.transformersembeddings/transfortmersllms issues * update docs for get_num_tokens * add low_bit api doc * add optimizer model api doc * update rst index * fix coomments style * update docs following the comments * update api doc --- .../source/doc/PythonAPI/LLM/index.rst | 3 +- .../source/doc/PythonAPI/LLM/langchain.rst | 102 +++++++++++++++++- .../source/doc/PythonAPI/LLM/optimize.rst | 10 ++ .../embeddings/transformersembeddings.py | 21 +++- .../src/bigdl/llm/langchain/llms/bigdlllm.py | 10 ++ .../llm/langchain/llms/transformersllm.py | 29 ++++- 6 files changed, 164 insertions(+), 11 deletions(-) create mode 100644 docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst index e70ab862..ea8d4fc0 100644 --- a/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst +++ b/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst @@ -2,7 +2,8 @@ BigDL-LLM API ================== .. toctree:: - :maxdepth: 2 + :maxdepth: 3 transformers.rst langchain.rst + optimize.rst diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst index ac6eb250..d95b7bee 100644 --- a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst +++ b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst @@ -4,10 +4,59 @@ BigDL-LLM LangChain API llm.langchain.embeddings.bigdlllm ---------------------------------------- -.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm +.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.LlamaEmbeddings :members: :undoc-members: :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: embed_documents + .. automethod:: embed_query + + +.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.BloomEmbeddings + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: embed_documents + .. automethod:: embed_query + + +.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.GptneoxEmbeddings + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: embed_documents + .. automethod:: embed_query + + +.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.ChatGLMEmbeddings + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: embed_documents + .. automethod:: embed_query + + +.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.StarcoderEmbeddings + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: embed_documents + .. automethod:: embed_query llm.langchain.embeddings.transformersembeddings @@ -22,10 +71,59 @@ llm.langchain.embeddings.transformersembeddings llm.langchain.llms.bigdlllm ---------------------------------------- -.. automodule:: bigdl.llm.langchain.llms.bigdlllm +.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.LlamaLLM :members: :undoc-members: :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: stream + .. automethod:: get_num_tokens + + +.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.BloomLLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: stream + .. automethod:: get_num_tokens + + +.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.GptneoxLLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: stream + .. automethod:: get_num_tokens + + +.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.ChatGLMLLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: stream + .. automethod:: get_num_tokens + + +.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.StarcoderLLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: ggml_model , ggml_module + + .. automethod:: validate_environment + .. automethod:: stream + .. automethod:: get_num_tokens llm.langchain.llms.transformersllm diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst new file mode 100644 index 00000000..a6949247 --- /dev/null +++ b/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst @@ -0,0 +1,10 @@ +BigDL-LLM Optimize API +===================== + +llm.optimize +---------------------------------------- + +.. automodule:: bigdl.llm.optimize + :members: optimize_model + :undoc-members: + :show-inheritance: diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py b/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py index 51a98dc1..a4b78200 100644 --- a/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py +++ b/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py @@ -68,13 +68,15 @@ class TransformersEmbeddings(BaseModel, Embeddings): """ model: Any #: :meta private: + """BigDL-LLM Transformers-INT4 model.""" tokenizer: Any #: :meta private: + """Huggingface tokenizer model.""" model_id: str = DEFAULT_MODEL_NAME - """Model id to use.""" + """Model name or model path to use.""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Key word arguments to pass to the model.""" + """Keyword arguments to pass to the model.""" encode_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Key word arguments to pass when calling the `encode` method of the model.""" + """Keyword arguments to pass when calling the `encode` method of the model.""" @classmethod def from_model_id( @@ -83,7 +85,18 @@ class TransformersEmbeddings(BaseModel, Embeddings): model_kwargs: Optional[dict] = None, **kwargs: Any, ): - """Construct object from model_id""" + """ + Construct object from model_id. + + Args: + + model_id: Path for the huggingface repo id to be downloaded or the huggingface + checkpoint folder. + model_kwargs: Keyword arguments that will be passed to the model and tokenizer. + kwargs: Extra arguments that will be passed to the model and tokenizer. + + Returns: An object of TransformersEmbeddings. + """ try: from bigdl.llm.transformers import AutoModel from transformers import AutoTokenizer, LlamaTokenizer diff --git a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py index 31d93503..dce33518 100644 --- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py +++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py @@ -659,6 +659,16 @@ class _BaseCausalLM(LLM): yield chunk def get_num_tokens(self, text: str) -> int: + """Get the number of tokens that present in the text. + + Useful for checking if an input will fit in a model's context window. + + Args: + text: The string input to tokenize. + + Returns: + The number of tokens in the text. + """ tokenized_text = self.client.tokenize(text.encode("utf-8")) return len(tokenized_text) diff --git a/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py b/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py index 6b5385a8..787d2504 100644 --- a/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py +++ b/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py @@ -71,9 +71,9 @@ class TransformersLLM(LLM): model_id: str = DEFAULT_MODEL_ID """Model name or model path to use.""" model_kwargs: Optional[dict] = None - """Key word arguments passed to the model.""" + """Keyword arguments passed to the model.""" model: Any #: :meta private: - """BigDL-LLM Transformer-INT4 model.""" + """BigDL-LLM Transformers-INT4 model.""" tokenizer: Any #: :meta private: """Huggingface tokenizer model.""" streaming: bool = True @@ -91,7 +91,18 @@ class TransformersLLM(LLM): model_kwargs: Optional[dict] = None, **kwargs: Any, ) -> LLM: - """Construct object from model_id""" + """ + Construct object from model_id + + Args: + + model_id: Path for the huggingface repo id to be downloaded or + the huggingface checkpoint folder. + model_kwargs: Keyword arguments that will be passed to the model and tokenizer. + kwargs: Extra arguments that will be passed to the model and tokenizer. + + Returns: An object of TransformersLLM. + """ try: from bigdl.llm.transformers import ( AutoModel, @@ -139,7 +150,17 @@ class TransformersLLM(LLM): model_kwargs: Optional[dict] = None, **kwargs: Any, ) -> LLM: - """Construct object from model_id""" + """ + Construct low_bit object from model_id + + Args: + + model_id: Path for the bigdl transformers low-bit model checkpoint folder. + model_kwargs: Keyword arguments that will be passed to the model and tokenizer. + kwargs: Extra arguments that will be passed to the model and tokenizer. + + Returns: An object of TransformersLLM. + """ try: from bigdl.llm.transformers import ( AutoModel,