Add api doc for LLM (#8605)

* api doc initial * update desc
2023-08-08 18:17:16 +08:00 · 2023-08-08 18:17:16 +08:00 · 1b65288bdb
commit 1b65288bdb
parent 4c44153584
17 changed files with 160 additions and 29 deletions
--- a/docs/readthedocs/source/_toc.yml
+++ b/docs/readthedocs/source/_toc.yml
@ -329,6 +329,13 @@ subtrees:
                  - file: doc/PPML/QuickStart/tpc-ds_with_sparksql_on_k8s
                  - file: doc/PPML/Overview/azure_ppml_occlum
                  - file: doc/PPML/Overview/secure_lightgbm_on_spark
+  - entries:
+    - file: doc/LLM/index
+      title: "LLM"
+      subtrees:
+        - entries:
+          - file: doc/PythonAPI/LLM/index
+            title: "API Reference"

  - entries:
    - file: doc/UserGuide/contributor
--- a/docs/readthedocs/source/conf.py
+++ b/docs/readthedocs/source/conf.py
@ -18,7 +18,9 @@ import glob
 import shutil
 import urllib

-autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet"]
+autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet",
+                        "accelerate", "langchain", "pydantic", "transformers", "ray", "sklearn", "torchmetrics",
+                        "pandas", "pmdarima", "scipy", "optuna", "cloudpickle", "xgboost", "filelock"]

 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #sys.path.insert(0, '.')
@ -29,11 +31,12 @@ sys.path.insert(0, os.path.abspath("../../../python/dllib/src/"))
 sys.path.insert(0, os.path.abspath("../../../python/orca/src/"))
 sys.path.insert(0, os.path.abspath("../../../python/serving/src/"))
 sys.path.insert(0, os.path.abspath("../../../python/nano/src/"))
+sys.path.insert(0, os.path.abspath("../../../python/llm/src/"))

 # -- Project information -----------------------------------------------------
 html_theme = "pydata_sphinx_theme"
 html_theme_options = {
-  "header_links_before_dropdown": 8,
+  "header_links_before_dropdown": 9,
  "icon_links": [
        {
            "name": "GitHub Repository for BigDL",
--- a/docs/readthedocs/source/doc/LLM/index.rst
+++ b/docs/readthedocs/source/doc/LLM/index.rst
@ -0,0 +1,26 @@
+BigDL-LLM
+=========================
+
+
+BigDL-LLM is a library for running LLM (Large Language Models) on your Intel laptop using INT4 with very low latency (for any Hugging Face Transformers model).
+
+-------
+
+.. grid:: 1 2 2 2
+    :gutter: 2
+
+    .. grid-item-card::
+
+        **API Document**
+        ^^^
+
+        API Document provides detailed description of LLM APIs.
+
+        +++
+
+        :bdg-link:`API Document <../PythonAPI/LLM/index.html>`
+
+..  toctree::
+    :hidden:
+
+    BigDL-LLM Document <self>
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
@ -0,0 +1,8 @@
+LLM API
+==================
+
+.. toctree::
+    :maxdepth: 2
+
+    transformers.rst
+    langchain.rst
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
@ -0,0 +1,46 @@
+LLM LangChain API
+=====================
+
+llm.langchain.embeddings.bigdlllm
+----------------------------------------
+
+.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.embeddings.transformersembeddings
+--------------------------------------------------
+
+.. automodule:: bigdl.llm.langchain.embeddings.transformersembeddings
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.llms.bigdlllm
+----------------------------------------
+
+.. automodule:: bigdl.llm.langchain.llms.bigdlllm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.llms.transformersllm
+----------------------------------------
+
+.. automodule:: bigdl.llm.langchain.llms.transformersllm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+llm.langchain.llms.transformerspipelinellm
+---------------------------------------------
+
+.. automodule:: bigdl.llm.langchain.llms.transformerspipelinellm
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst
@ -0,0 +1,54 @@
+LLM Transformers API
+=====================
+
+llm.transformers.model
+---------------------------
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModelForCausalLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModel
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModelForSpeechSeq2Seq
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+.. autoclass:: bigdl.llm.transformers.model.AutoModelForSeq2SeqLM
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: from_pretrained
+    .. automethod:: load_convert
+    .. automethod:: load_low_bit
+
+
+
+llm.transformers.modelling_bigdl
+----------------------------------------
+
+.. automodule:: bigdl.llm.transformers.modelling_bigdl
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/readthedocs/source/doc/PythonAPI/Orca/orca.rst
+++ b/docs/readthedocs/source/doc/PythonAPI/Orca/orca.rst
@ -70,17 +70,6 @@ Orca Pytorch Estimator with backend of "horovod" or "ray".
    :show-inheritance:


-orca.learn.pytorch.pytorch_spark_estimator
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Orca Pytorch Estimator with backend of "bigdl".
-
-.. autoclass:: bigdl.orca.learn.pytorch.pytorch_spark_estimator.PyTorchSparkEstimator
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
 orca.learn.openvino.estimator
 ------------------------------

--- a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py
@ -64,7 +64,7 @@ from ctypes import (
    c_size_t,
 )
 import pathlib
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info
 from bigdl.llm.utils.common import invalidInputError


--- a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox_cpp.py
@ -64,7 +64,7 @@ from ctypes import (
 )
 import pathlib
 from bigdl.llm.utils.common import invalidInputError
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info


 # Load the library
--- a/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
@ -64,7 +64,7 @@ from ctypes import (
 )
 import pathlib
 from bigdl.llm.utils.common import invalidInputError
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info


 # Load the library
--- a/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder_cpp.py
@ -64,7 +64,7 @@ from ctypes import (
    c_size_t,
 )
 import pathlib
-from bigdl.llm.utils import get_shared_lib_info
+from bigdl.llm.utils.utils import get_shared_lib_info
 from bigdl.llm.utils.common import invalidInputError


--- a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
@ -74,7 +74,7 @@ class BigdlNativeEmbeddings(BaseModel, Embeddings):
    """info necessary for different model family initiation and configure"""

    client: Any  #: :meta private:
-    model_path: str
+    model_path: str  # TODO: missing doc

    n_ctx: int = Field(512, alias="n_ctx")
    """Token context window."""
--- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
@ -332,7 +332,7 @@ class BigdlNativeLLM(LLM):
                    temperature = 0.5
                )
                for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
-                        stop=["'","\n"]):
+                        stop=["'","\\n"]):
                    result = chunk["choices"][0]
                    print(result["text"], end='', flush=True)

--- a/python/llm/src/bigdl/llm/transformers/convert.py
+++ b/python/llm/src/bigdl/llm/transformers/convert.py
@ -38,7 +38,6 @@
 import torch
 import torch.nn as nn
 from accelerate import init_empty_weights
-from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
 import warnings
 import transformers
 import importlib
@ -46,6 +45,7 @@ import importlib

 def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None,
                               current_key_name=None, convert_shape_only=False):
+    from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
    has_been_replaced = False

    # Through our method, certain layers that were initialized on the device "meta"
--- a/python/llm/src/bigdl/llm/transformers/model.py
+++ b/python/llm/src/bigdl/llm/transformers/model.py
@ -48,12 +48,12 @@ class _BaseAutoModelClass:
        the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.

        Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
-        New Arguments:
-            load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
-            load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
-                             or sym_int8. (sym_int4 means symmetric int 4, asym_int4 means
-                             asymmetric int 4, etc.). Relevant low bit optimizations will
-                             be applied to the model.
+
+        :param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
+        :param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
+                                or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
+                                asymmetric int 4, etc. Relevant low bit optimizations will
+                                be applied to the model.
        """
        pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
            if len(args) == 0 else args[0]
--- a/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
+++ b/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
@ -48,7 +48,7 @@ class BigdlNativeForCausalLM:
               It indicates the saving path for the converted low precision model.
        :param tmp_path: (optional) Which path to store the intermediate fp16 model during the
               conversion process. Default to `None` so that intermediate model will not be saved.
-        :param **kwargs: keyword arguments which will be passed to the model instance
+        :param kwargs: keyword arguments which will be passed to the model instance

        :return: a model instance
        """
--- a/python/llm/src/bigdl/llm/utils/init.py
+++ b/python/llm/src/bigdl/llm/utils/init.py
@ -18,5 +18,3 @@
 # physically located elsewhere.
 # Otherwise there would be module not found error in non-pip's setting as Python would
 # only search the first bigdl package and end up finding only one sub-package.
-
-from .utils import *