From db37edae8a5a961f337fa4f042db1ab22c786471 Mon Sep 17 00:00:00 2001
From: binbin Deng <108676127+plusbang@users.noreply.github.com>
Date: Tue, 24 Oct 2023 10:13:41 +0800
Subject: [PATCH] LLM: update langchain api document page (#9222)

---
 .../KeyFeatures/hugging_face_format.md        |   6 +-
 .../LLM/Overview/KeyFeatures/langchain_api.md |   4 +-
 .../LLM/Overview/KeyFeatures/native_format.md |   2 +-
 .../source/doc/PythonAPI/LLM/langchain.rst    | 244 ++++++++++--------
 docs/readthedocs/source/index.rst             |   4 +-
 .../embeddings/transformersembeddings.py      |   3 +-
 .../llm/langchain/llms/transformersllm.py     |   6 +-
 7 files changed, 146 insertions(+), 123 deletions(-)

diff --git a/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/hugging_face_format.md b/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/hugging_face_format.md
index 11b1a040..ef3c6238 100644
--- a/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/hugging_face_format.md
+++ b/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/hugging_face_format.md
@@ -25,7 +25,7 @@ output = tokenizer.batch_decode(output_ids)
 ```eval_rst
 .. seealso::
 
-   See the complete examples `here <https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/transformers/transformers_int4>`_
+   See the complete CPU examples `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/CPU/HF-Transformers-AutoModels>`_ and GPU examples `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/GPU/HF-Transformers-AutoModels>`_.
 
 .. note::
 
@@ -35,7 +35,7 @@ output = tokenizer.batch_decode(output_ids)
 
       model = AutoModelForCausalLM.from_pretrained('/path/to/model/', load_in_low_bit="sym_int5")
 
-   See the complete example `here <https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/transformers/transformers_low_bit>`_.
+   See the CPU example `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/CPU/HF-Transformers-AutoModels/More-Data-Types>`_ and GPU example `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/GPU/HF-Transformers-AutoModels/More-Data-Types>`_.
 ```
 
 ## Save & Load
@@ -50,5 +50,5 @@ new_model = AutoModelForCausalLM.load_low_bit(model_path)
 ```eval_rst
 .. seealso::
 
-   See the examples `here <https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/transformers/transformers_low_bit>`_
+   See the CPU example `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/CPU/HF-Transformers-AutoModels/Save-Load>`_ and GPU example `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/GPU/HF-Transformers-AutoModels/Save-Load>`_
 ```
\ No newline at end of file
diff --git a/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/langchain_api.md b/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/langchain_api.md
index c954402b..8ec3f433 100644
--- a/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/langchain_api.md
+++ b/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/langchain_api.md
@@ -21,7 +21,7 @@ output = doc_chain.run(...)
 ```eval_rst
 .. seealso::
 
-   See the examples `here <https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/langchain/transformers_int4>`_.
+   See the examples `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/CPU/LangChain/transformers_int4>`_.
 ```
 
 ## Using Native INT4 Format
@@ -53,5 +53,5 @@ doc_chain.run(...)
 ```eval_rst
 .. seealso::
 
-   See the examples `here <https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/langchain/native_int4>`_.
+   See the examples `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/CPU/LangChain/native_int4>`_.
 ```
diff --git a/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/native_format.md b/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/native_format.md
index ce70a980..e66d68fd 100644
--- a/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/native_format.md
+++ b/docs/readthedocs/source/doc/LLM/Overview/KeyFeatures/native_format.md
@@ -28,5 +28,5 @@ output = llm.batch_decode(output_ids)
 ```eval_rst
 .. seealso::
    
-   See the complete example `here <https://github.com/intel-analytics/BigDL/blob/main/python/llm/example/transformers/native_int4/native_int4_pipeline.py>`_
+   See the complete example `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/CPU/Native-Models>`_
 ```
\ No newline at end of file
diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
index d95b7bee..445e71f8 100644
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
@@ -1,144 +1,164 @@
 BigDL-LLM LangChain API
 =====================
 
-llm.langchain.embeddings.bigdlllm
+LLM Wrapper of LangChain
 ----------------------------------------
 
-.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.LlamaEmbeddings
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
+Hugging Face ``transformers`` Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-    .. automethod:: validate_environment
-    .. automethod:: embed_documents
-    .. automethod:: embed_query 
+BigDL-LLM provides ``TransformersLLM`` and ``TransformersPipelineLLM``, which implement the standard interface of LLM wrapper of LangChain.
+
+.. tabs::
+
+    .. tab:: AutoModel
+
+        .. automodule:: bigdl.llm.langchain.llms.transformersllm
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: model_id, model_kwargs, model, tokenizer, streaming, Config
+
+    .. tab:: pipeline
+
+        .. automodule:: bigdl.llm.langchain.llms.transformerspipelinellm
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: pipeline, model_id, model_kwargs, pipeline_kwargs, Config
 
 
-.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.BloomEmbeddings
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
+Native Model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-    .. automethod:: validate_environment
-    .. automethod:: embed_documents
-    .. automethod:: embed_query
+For ``llama``/``chatglm``/``bloom``/``gptneox``/``starcoder`` model families, you could also use the following LLM wrappers with the native (cpp) implementation for maximum performance.
+
+.. tabs::
+
+    .. tab:: Llama
+
+        .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.LlamaLLM
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
+
+            .. automethod:: validate_environment
+            .. automethod:: stream
+            .. automethod:: get_num_tokens
+
+    .. tab:: ChatGLM
+
+        .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.ChatGLMLLM
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
+
+            .. automethod:: validate_environment
+            .. automethod:: stream
+            .. automethod:: get_num_tokens
+
+    .. tab:: Bloom
+
+        .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.BloomLLM
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
+
+            .. automethod:: validate_environment
+            .. automethod:: stream
+            .. automethod:: get_num_tokens
+
+    .. tab:: Gptneox
+
+        .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.GptneoxLLM
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
+
+            .. automethod:: validate_environment
+            .. automethod:: stream
+            .. automethod:: get_num_tokens
+
+    .. tab:: Starcoder
+
+        .. autoclass:: bigdl.llm.langchain.llms.bigdlllm.StarcoderLLM
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
+
+            .. automethod:: validate_environment
+            .. automethod:: stream
+            .. automethod:: get_num_tokens
 
 
-.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.GptneoxEmbeddings
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
+Embeddings Wrapper of LangChain
+----------------------------------------
 
-    .. automethod:: validate_environment
-    .. automethod:: embed_documents
-    .. automethod:: embed_query
-
-
-.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.ChatGLMEmbeddings
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
-
-    .. automethod:: validate_environment
-    .. automethod:: embed_documents
-    .. automethod:: embed_query
-
-
-.. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.StarcoderEmbeddings
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
-
-    .. automethod:: validate_environment
-    .. automethod:: embed_documents
-    .. automethod:: embed_query
-
-
-llm.langchain.embeddings.transformersembeddings
---------------------------------------------------
+Hugging Face ``transformers`` AutoModel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. automodule:: bigdl.llm.langchain.embeddings.transformersembeddings
     :members:
     :undoc-members:
     :show-inheritance:
+    :exclude-members: model, tokenizer, model_id, model_kwargs, encode_kwargs, Config
 
+Native Model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-llm.langchain.llms.bigdlllm
-----------------------------------------
+For ``llama``/``bloom``/``gptneox``/``starcoder`` model families, you could also use the following wrappers.
 
-.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.LlamaLLM
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
+.. tabs::
 
-    .. automethod:: validate_environment
-    .. automethod:: stream
-    .. automethod:: get_num_tokens
+    .. tab:: Llama
 
+        .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.LlamaEmbeddings
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
 
-.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.BloomLLM
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
+            .. automethod:: validate_environment
+            .. automethod:: embed_documents
+            .. automethod:: embed_query
 
-    .. automethod:: validate_environment
-    .. automethod:: stream
-    .. automethod:: get_num_tokens 
+    .. tab:: Bloom
 
+        .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.BloomEmbeddings
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
 
-.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.GptneoxLLM
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
+            .. automethod:: validate_environment
+            .. automethod:: embed_documents
+            .. automethod:: embed_query
 
-    .. automethod:: validate_environment
-    .. automethod:: stream
-    .. automethod:: get_num_tokens
+    .. tab:: Gptneox
 
+        .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.GptneoxEmbeddings
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
 
-.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.ChatGLMLLM
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
+            .. automethod:: validate_environment
+            .. automethod:: embed_documents
+            .. automethod:: embed_query
 
-    .. automethod:: validate_environment
-    .. automethod:: stream
-    .. automethod:: get_num_tokens
+    .. tab:: Starcoder
 
+        .. autoclass:: bigdl.llm.langchain.embeddings.bigdlllm.StarcoderEmbeddings
+            :members:
+            :undoc-members:
+            :show-inheritance:
+            :exclude-members: ggml_model, ggml_module, client, model_path, kwargs
 
-.. autoclass:: bigdl.llm.langchain.llms.bigdlllm.StarcoderLLM
-    :members:
-    :undoc-members:
-    :show-inheritance:
-    :exclude-members: ggml_model , ggml_module
-
-    .. automethod:: validate_environment
-    .. automethod:: stream
-    .. automethod:: get_num_tokens
-
-
-llm.langchain.llms.transformersllm
-----------------------------------------
-
-.. automodule:: bigdl.llm.langchain.llms.transformersllm
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
-llm.langchain.llms.transformerspipelinellm
----------------------------------------------
-
-.. automodule:: bigdl.llm.langchain.llms.transformerspipelinellm
-    :members:
-    :undoc-members:
-    :show-inheritance:
+            .. automethod:: validate_environment
+            .. automethod:: embed_documents
+            .. automethod:: embed_query
diff --git a/docs/readthedocs/source/index.rst b/docs/readthedocs/source/index.rst
index 85a1ad5c..15040696 100644
--- a/docs/readthedocs/source/index.rst
+++ b/docs/readthedocs/source/index.rst
@@ -24,8 +24,8 @@ BigDL-LLM: low-Bit LLM library
 ============================================
 Latest update
 ============================================
-- **[New]** ``bigdl-llm`` now supports QLoRA fintuning on Intel GPU; see the the example `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/gpu/qlora_finetuning>`_.
-- ``bigdl-llm`` now supports Intel GPU (including Arc, Flex and MAX); see the the latest GPU examples `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/gpu>`_.
+- **[New]** ``bigdl-llm`` now supports QLoRA fintuning on Intel GPU; see the the example `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/GPU/QLoRA-FineTuning>`_.
+- ``bigdl-llm`` now supports Intel GPU (including Arc, Flex and MAX); see the the latest GPU examples `here <https://github.com/intel-analytics/BigDL/tree/main/python/llm/example/GPU>`_.
 - ``bigdl-llm`` tutorial is released `here <https://github.com/intel-analytics/bigdl-llm-tutorial>`_.
 - Over 20 models have been verified on ``bigdl-llm``, including *LLaMA/LLaMA2, ChatGLM/ChatGLM2, MPT, Falcon, Dolly, StarCoder, Whisper, InternLM, QWen, Baichuan, Aquila, MOSS* and more; see the complete list `here <https://github.com/intel-analytics/bigdl#verified-models>`_.
 
diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py b/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py
index a4b78200..d3974893 100644
--- a/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/transformersembeddings.py
@@ -95,7 +95,8 @@ class TransformersEmbeddings(BaseModel, Embeddings):
             model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
             kwargs: Extra arguments that will be passed to the model and tokenizer.
         
-        Returns: An object of TransformersEmbeddings.
+        Returns:
+            An object of TransformersEmbeddings.
         """
         try:
             from bigdl.llm.transformers import AutoModel
diff --git a/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py b/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py
index 787d2504..34dc4e03 100644
--- a/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/transformersllm.py
@@ -101,7 +101,8 @@ class TransformersLLM(LLM):
             model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
             kwargs: Extra arguments that will be passed to the model and tokenizer.
 
-        Returns: An object of TransformersLLM.
+        Returns:
+            An object of TransformersLLM.
         """
         try:
             from bigdl.llm.transformers import (
@@ -159,7 +160,8 @@ class TransformersLLM(LLM):
             model_kwargs: Keyword arguments that will be passed to the model and tokenizer.
             kwargs: Extra arguments that will be passed to the model and tokenizer.
 
-        Returns: An object of TransformersLLM.
+        Returns:
+            An object of TransformersLLM.
         """
         try:
             from bigdl.llm.transformers import (