From 1b65288bdb065afd58fc7d3280feff6c1c0302c3 Mon Sep 17 00:00:00 2001
From: Kai Huang <huangkaivision@gmail.com>
Date: Tue, 8 Aug 2023 18:17:16 +0800
Subject: [PATCH] Add api doc for LLM (#8605)

* api doc initial

* update desc
---
 docs/readthedocs/source/_toc.yml              |  7 +++
 docs/readthedocs/source/conf.py               |  7 ++-
 docs/readthedocs/source/doc/LLM/index.rst     | 26 +++++++++
 .../source/doc/PythonAPI/LLM/index.rst        |  8 +++
 .../source/doc/PythonAPI/LLM/langchain.rst    | 46 ++++++++++++++++
 .../source/doc/PythonAPI/LLM/transformers.rst | 54 +++++++++++++++++++
 .../source/doc/PythonAPI/Orca/orca.rst        | 11 ----
 .../bigdl/llm/ggml/model/bloom/bloom_cpp.py   |  2 +-
 .../llm/ggml/model/gptneox/gptneox_cpp.py     |  2 +-
 .../bigdl/llm/ggml/model/llama/llama_cpp.py   |  2 +-
 .../llm/ggml/model/starcoder/starcoder_cpp.py |  2 +-
 .../llm/langchain/embeddings/bigdlllm.py      |  2 +-
 .../src/bigdl/llm/langchain/llms/bigdlllm.py  |  2 +-
 .../llm/src/bigdl/llm/transformers/convert.py |  2 +-
 .../llm/src/bigdl/llm/transformers/model.py   | 12 ++---
 .../bigdl/llm/transformers/modelling_bigdl.py |  2 +-
 python/llm/src/bigdl/llm/utils/__init__.py    |  2 -
 17 files changed, 160 insertions(+), 29 deletions(-)
 create mode 100644 docs/readthedocs/source/doc/LLM/index.rst
 create mode 100644 docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
 create mode 100644 docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
 create mode 100644 docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst

diff --git a/docs/readthedocs/source/_toc.yml b/docs/readthedocs/source/_toc.yml
index fcfeaf6a..fdba5c9a 100644
--- a/docs/readthedocs/source/_toc.yml
+++ b/docs/readthedocs/source/_toc.yml
@@ -329,6 +329,13 @@ subtrees:
                   - file: doc/PPML/QuickStart/tpc-ds_with_sparksql_on_k8s
                   - file: doc/PPML/Overview/azure_ppml_occlum
                   - file: doc/PPML/Overview/secure_lightgbm_on_spark
+  - entries:
+    - file: doc/LLM/index
+      title: "LLM"
+      subtrees:
+        - entries:
+          - file: doc/PythonAPI/LLM/index
+            title: "API Reference"
 
   - entries:
     - file: doc/UserGuide/contributor
diff --git a/docs/readthedocs/source/conf.py b/docs/readthedocs/source/conf.py
index f77875af..c38c45e2 100644
--- a/docs/readthedocs/source/conf.py
+++ b/docs/readthedocs/source/conf.py
@@ -18,7 +18,9 @@ import glob
 import shutil
 import urllib
 
-autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet"]
+autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet",
+                        "accelerate", "langchain", "pydantic", "transformers", "ray", "sklearn", "torchmetrics",
+                        "pandas", "pmdarima", "scipy", "optuna", "cloudpickle", "xgboost", "filelock"]
 
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #sys.path.insert(0, '.')
@@ -29,11 +31,12 @@ sys.path.insert(0, os.path.abspath("../../../python/dllib/src/"))
 sys.path.insert(0, os.path.abspath("../../../python/orca/src/"))
 sys.path.insert(0, os.path.abspath("../../../python/serving/src/"))
 sys.path.insert(0, os.path.abspath("../../../python/nano/src/"))
+sys.path.insert(0, os.path.abspath("../../../python/llm/src/"))
 
 # -- Project information -----------------------------------------------------
 html_theme = "pydata_sphinx_theme"
 html_theme_options = {
-  "header_links_before_dropdown": 8,
+  "header_links_before_dropdown": 9,
   "icon_links": [
         {
             "name": "GitHub Repository for BigDL",
diff --git a/docs/readthedocs/source/doc/LLM/index.rst b/docs/readthedocs/source/doc/LLM/index.rst
new file mode 100644
index 00000000..383ada99
--- /dev/null
+++ b/docs/readthedocs/source/doc/LLM/index.rst
@@ -0,0 +1,26 @@
+BigDL-LLM
+=========================
+
+
+BigDL-LLM is a library for running LLM (Large Language Models) on your Intel laptop using INT4 with very low latency (for any Hugging Face Transformers model).
+
+-------
+
+.. grid:: 1 2 2 2
+    :gutter: 2
+
+    .. grid-item-card::
+
+        **API Document**
+        ^^^
+
+        API Document provides detailed description of LLM APIs.
+
+        +++
+
+        :bdg-link:`API Document <../PythonAPI/LLM/index.html>`
+
+..  toctree::
+    :hidden:
+
+    BigDL-LLM Document <self>
\ No newline at end of file
diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
new file mode 100644
index 00000000..599645cf
--- /dev/null
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
@@ -0,0 +1,8 @@
+LLM API
+==================
+
+.. toctree::
+    :maxdepth: 2
+
+    transformers.rst
+    langchain.rst
diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
new file mode 100644
index 00000000..047a54fb
--- /dev/null
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
@@ -0,0 +1,46 @@
+LLM LangChain API
+=====================
+
+llm.langchain.embeddings.bigdlllm
+----------------------------------------
+
+.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.embeddings.transformersembeddings
+--------------------------------------------------
+
+.. automodule:: bigdl.llm.langchain.embeddings.transformersembeddings
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.llms.bigdlllm
+----------------------------------------
+
+.. automodule:: bigdl.llm.langchain.llms.bigdlllm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.llms.transformersllm
+----------------------------------------
+
+.. automodule:: bigdl.llm.langchain.llms.transformersllm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.llms.transformerspipelinellm
+---------------------------------------------
+
+.. automodule:: bigdl.llm.langchain.llms.transformerspipelinellm
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst
new file mode 100644
index 00000000..58a08162
--- /dev/null
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst
@@ -0,0 +1,54 @@
+LLM Transformers API
+=====================
+
+llm.transformers.model
+---------------------------
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModelForCausalLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModel
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModelForSpeechSeq2Seq
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModelForSeq2SeqLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+
+llm.transformers.modelling_bigdl
+----------------------------------------
+
+.. automodule:: bigdl.llm.transformers.modelling_bigdl
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/readthedocs/source/doc/PythonAPI/Orca/orca.rst b/docs/readthedocs/source/doc/PythonAPI/Orca/orca.rst
index 749e94bd..a2a45a27 100644
--- a/docs/readthedocs/source/doc/PythonAPI/Orca/orca.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/Orca/orca.rst
@@ -70,17 +70,6 @@ Orca Pytorch Estimator with backend of "horovod" or "ray".
     :show-inheritance:
 
 
-orca.learn.pytorch.pytorch_spark_estimator
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Orca Pytorch Estimator with backend of "bigdl".
-
-.. autoclass:: bigdl.orca.learn.pytorch.pytorch_spark_estimator.PyTorchSparkEstimator
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
 orca.learn.openvino.estimator
 ------------------------------
 
diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py
index 16824654..912fa729 100644
--- a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py
@@ -64,7 +64,7 @@ from ctypes import (
     c_size_t,
 )
 import pathlib
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info
 from bigdl.llm.utils.common import invalidInputError
 
 
diff --git a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox_cpp.py b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox_cpp.py
index d7d9b26c..edc5cc81 100644
--- a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox_cpp.py
@@ -64,7 +64,7 @@ from ctypes import (
 )
 import pathlib
 from bigdl.llm.utils.common import invalidInputError
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info
 
 
 # Load the library
diff --git a/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py b/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
index bb456c9b..e9a84a94 100644
--- a/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
@@ -64,7 +64,7 @@ from ctypes import (
 )
 import pathlib
 from bigdl.llm.utils.common import invalidInputError
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info
 
 
 # Load the library
diff --git a/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder_cpp.py b/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder_cpp.py
index 7f50d0fa..d2cb9631 100644
--- a/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder_cpp.py
@@ -64,7 +64,7 @@ from ctypes import (
     c_size_t,
 )
 import pathlib
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info
 from bigdl.llm.utils.common import invalidInputError
 
 
diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
index 94ed9d61..4a1718dc 100644
--- a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
@@ -74,7 +74,7 @@ class BigdlNativeEmbeddings(BaseModel, Embeddings):
     """info necessary for different model family initiation and configure"""
 
     client: Any  #: :meta private:
-    model_path: str
+    model_path: str  # TODO: missing doc
 
     n_ctx: int = Field(512, alias="n_ctx")
     """Token context window."""
diff --git a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
index ef1f6590..30b2d628 100644
--- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
@@ -332,7 +332,7 @@ class BigdlNativeLLM(LLM):
                     temperature = 0.5
                 )
                 for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
-                        stop=["'","\n"]):
+                        stop=["'","\\n"]):
                     result = chunk["choices"][0]
                     print(result["text"], end='', flush=True)
 
diff --git a/python/llm/src/bigdl/llm/transformers/convert.py b/python/llm/src/bigdl/llm/transformers/convert.py
index 07fba239..2cf92ad1 100644
--- a/python/llm/src/bigdl/llm/transformers/convert.py
+++ b/python/llm/src/bigdl/llm/transformers/convert.py
@@ -38,7 +38,6 @@
 import torch
 import torch.nn as nn
 from accelerate import init_empty_weights
-from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
 import warnings
 import transformers
 import importlib
@@ -46,6 +45,7 @@ import importlib
 
 def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None,
                                current_key_name=None, convert_shape_only=False):
+    from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
     has_been_replaced = False
 
     # Through our method, certain layers that were initialized on the device "meta"
diff --git a/python/llm/src/bigdl/llm/transformers/model.py b/python/llm/src/bigdl/llm/transformers/model.py
index 8c198fe6..3b40b596 100644
--- a/python/llm/src/bigdl/llm/transformers/model.py
+++ b/python/llm/src/bigdl/llm/transformers/model.py
@@ -48,12 +48,12 @@ class _BaseAutoModelClass:
         the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
 
         Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
-        New Arguments:
-            load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
-            load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
-                             or sym_int8. (sym_int4 means symmetric int 4, asym_int4 means
-                             asymmetric int 4, etc.). Relevant low bit optimizations will
-                             be applied to the model.
+
+        :param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
+        :param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
+                                or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
+                                asymmetric int 4, etc. Relevant low bit optimizations will
+                                be applied to the model.
         """
         pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
             if len(args) == 0 else args[0]
diff --git a/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py b/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
index 9170f5ec..f928ffcd 100644
--- a/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
+++ b/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
@@ -48,7 +48,7 @@ class BigdlNativeForCausalLM:
                It indicates the saving path for the converted low precision model.
         :param tmp_path: (optional) Which path to store the intermediate fp16 model during the
                conversion process. Default to `None` so that intermediate model will not be saved.
-        :param **kwargs: keyword arguments which will be passed to the model instance
+        :param kwargs: keyword arguments which will be passed to the model instance
 
         :return: a model instance
         """
diff --git a/python/llm/src/bigdl/llm/utils/__init__.py b/python/llm/src/bigdl/llm/utils/__init__.py
index 0656c8b5..dbdafd2a 100644
--- a/python/llm/src/bigdl/llm/utils/__init__.py
+++ b/python/llm/src/bigdl/llm/utils/__init__.py
@@ -18,5 +18,3 @@
 # physically located elsewhere.
 # Otherwise there would be module not found error in non-pip's setting as Python would
 # only search the first bigdl package and end up finding only one sub-package.
-
-from .utils import *