Add api doc for LLM (#8605)

* api doc initial

* update desc
This commit is contained in:
Kai Huang 2023-08-08 18:17:16 +08:00 committed by GitHub
parent 4c44153584
commit 1b65288bdb
17 changed files with 160 additions and 29 deletions

View file

@ -329,6 +329,13 @@ subtrees:
- file: doc/PPML/QuickStart/tpc-ds_with_sparksql_on_k8s - file: doc/PPML/QuickStart/tpc-ds_with_sparksql_on_k8s
- file: doc/PPML/Overview/azure_ppml_occlum - file: doc/PPML/Overview/azure_ppml_occlum
- file: doc/PPML/Overview/secure_lightgbm_on_spark - file: doc/PPML/Overview/secure_lightgbm_on_spark
- entries:
- file: doc/LLM/index
title: "LLM"
subtrees:
- entries:
- file: doc/PythonAPI/LLM/index
title: "API Reference"
- entries: - entries:
- file: doc/UserGuide/contributor - file: doc/UserGuide/contributor

View file

@ -18,7 +18,9 @@ import glob
import shutil import shutil
import urllib import urllib
autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet"] autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet",
"accelerate", "langchain", "pydantic", "transformers", "ray", "sklearn", "torchmetrics",
"pandas", "pmdarima", "scipy", "optuna", "cloudpickle", "xgboost", "filelock"]
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, '.') #sys.path.insert(0, '.')
@ -29,11 +31,12 @@ sys.path.insert(0, os.path.abspath("../../../python/dllib/src/"))
sys.path.insert(0, os.path.abspath("../../../python/orca/src/")) sys.path.insert(0, os.path.abspath("../../../python/orca/src/"))
sys.path.insert(0, os.path.abspath("../../../python/serving/src/")) sys.path.insert(0, os.path.abspath("../../../python/serving/src/"))
sys.path.insert(0, os.path.abspath("../../../python/nano/src/")) sys.path.insert(0, os.path.abspath("../../../python/nano/src/"))
sys.path.insert(0, os.path.abspath("../../../python/llm/src/"))
# -- Project information ----------------------------------------------------- # -- Project information -----------------------------------------------------
html_theme = "pydata_sphinx_theme" html_theme = "pydata_sphinx_theme"
html_theme_options = { html_theme_options = {
"header_links_before_dropdown": 8, "header_links_before_dropdown": 9,
"icon_links": [ "icon_links": [
{ {
"name": "GitHub Repository for BigDL", "name": "GitHub Repository for BigDL",

View file

@ -0,0 +1,26 @@
BigDL-LLM
=========================
BigDL-LLM is a library for running LLM (Large Language Models) on your Intel laptop using INT4 with very low latency (for any Hugging Face Transformers model).
-------
.. grid:: 1 2 2 2
:gutter: 2
.. grid-item-card::
**API Document**
^^^
API Document provides detailed description of LLM APIs.
+++
:bdg-link:`API Document <../PythonAPI/LLM/index.html>`
.. toctree::
:hidden:
BigDL-LLM Document <self>

View file

@ -0,0 +1,8 @@
LLM API
==================
.. toctree::
:maxdepth: 2
transformers.rst
langchain.rst

View file

@ -0,0 +1,46 @@
LLM LangChain API
=====================
llm.langchain.embeddings.bigdlllm
----------------------------------------
.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm
:members:
:undoc-members:
:show-inheritance:
llm.langchain.embeddings.transformersembeddings
--------------------------------------------------
.. automodule:: bigdl.llm.langchain.embeddings.transformersembeddings
:members:
:undoc-members:
:show-inheritance:
llm.langchain.llms.bigdlllm
----------------------------------------
.. automodule:: bigdl.llm.langchain.llms.bigdlllm
:members:
:undoc-members:
:show-inheritance:
llm.langchain.llms.transformersllm
----------------------------------------
.. automodule:: bigdl.llm.langchain.llms.transformersllm
:members:
:undoc-members:
:show-inheritance:
llm.langchain.llms.transformerspipelinellm
---------------------------------------------
.. automodule:: bigdl.llm.langchain.llms.transformerspipelinellm
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,54 @@
LLM Transformers API
=====================
llm.transformers.model
---------------------------
.. autoclass:: bigdl.llm.transformers.model.AutoModelForCausalLM
:members:
:undoc-members:
:show-inheritance:
.. automethod:: from_pretrained
.. automethod:: load_convert
.. automethod:: load_low_bit
.. autoclass:: bigdl.llm.transformers.model.AutoModel
:members:
:undoc-members:
:show-inheritance:
.. automethod:: from_pretrained
.. automethod:: load_convert
.. automethod:: load_low_bit
.. autoclass:: bigdl.llm.transformers.model.AutoModelForSpeechSeq2Seq
:members:
:undoc-members:
:show-inheritance:
.. automethod:: from_pretrained
.. automethod:: load_convert
.. automethod:: load_low_bit
.. autoclass:: bigdl.llm.transformers.model.AutoModelForSeq2SeqLM
:members:
:undoc-members:
:show-inheritance:
.. automethod:: from_pretrained
.. automethod:: load_convert
.. automethod:: load_low_bit
llm.transformers.modelling_bigdl
----------------------------------------
.. automodule:: bigdl.llm.transformers.modelling_bigdl
:members:
:undoc-members:
:show-inheritance:

View file

@ -70,17 +70,6 @@ Orca Pytorch Estimator with backend of "horovod" or "ray".
:show-inheritance: :show-inheritance:
orca.learn.pytorch.pytorch_spark_estimator
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Orca Pytorch Estimator with backend of "bigdl".
.. autoclass:: bigdl.orca.learn.pytorch.pytorch_spark_estimator.PyTorchSparkEstimator
:members:
:undoc-members:
:show-inheritance:
orca.learn.openvino.estimator orca.learn.openvino.estimator
------------------------------ ------------------------------

View file

@ -64,7 +64,7 @@ from ctypes import (
c_size_t, c_size_t,
) )
import pathlib import pathlib
from bigdl.llm.utils import get_shared_lib_info from bigdl.llm.utils.utils import get_shared_lib_info
from bigdl.llm.utils.common import invalidInputError from bigdl.llm.utils.common import invalidInputError

View file

@ -64,7 +64,7 @@ from ctypes import (
) )
import pathlib import pathlib
from bigdl.llm.utils.common import invalidInputError from bigdl.llm.utils.common import invalidInputError
from bigdl.llm.utils import get_shared_lib_info from bigdl.llm.utils.utils import get_shared_lib_info
# Load the library # Load the library

View file

@ -64,7 +64,7 @@ from ctypes import (
) )
import pathlib import pathlib
from bigdl.llm.utils.common import invalidInputError from bigdl.llm.utils.common import invalidInputError
from bigdl.llm.utils import get_shared_lib_info from bigdl.llm.utils.utils import get_shared_lib_info
# Load the library # Load the library

View file

@ -64,7 +64,7 @@ from ctypes import (
c_size_t, c_size_t,
) )
import pathlib import pathlib
from bigdl.llm.utils import get_shared_lib_info from bigdl.llm.utils.utils import get_shared_lib_info
from bigdl.llm.utils.common import invalidInputError from bigdl.llm.utils.common import invalidInputError

View file

@ -74,7 +74,7 @@ class BigdlNativeEmbeddings(BaseModel, Embeddings):
"""info necessary for different model family initiation and configure""" """info necessary for different model family initiation and configure"""
client: Any #: :meta private: client: Any #: :meta private:
model_path: str model_path: str # TODO: missing doc
n_ctx: int = Field(512, alias="n_ctx") n_ctx: int = Field(512, alias="n_ctx")
"""Token context window.""" """Token context window."""

View file

@ -332,7 +332,7 @@ class BigdlNativeLLM(LLM):
temperature = 0.5 temperature = 0.5
) )
for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'", for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
stop=["'","\n"]): stop=["'","\\n"]):
result = chunk["choices"][0] result = chunk["choices"][0]
print(result["text"], end='', flush=True) print(result["text"], end='', flush=True)

View file

@ -38,7 +38,6 @@
import torch import torch
import torch.nn as nn import torch.nn as nn
from accelerate import init_empty_weights from accelerate import init_empty_weights
from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
import warnings import warnings
import transformers import transformers
import importlib import importlib
@ -46,6 +45,7 @@ import importlib
def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None, def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None,
current_key_name=None, convert_shape_only=False): current_key_name=None, convert_shape_only=False):
from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
has_been_replaced = False has_been_replaced = False
# Through our method, certain layers that were initialized on the device "meta" # Through our method, certain layers that were initialized on the device "meta"

View file

@ -48,12 +48,12 @@ class _BaseAutoModelClass:
the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8. the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
Two new arguments are added to extend Hugging Face's from_pretrained method as follows: Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
New Arguments:
load_in_4bit: boolean value, True means load linear's weight to symmetric int 4. :param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5 :param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
or sym_int8. (sym_int4 means symmetric int 4, asym_int4 means or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
asymmetric int 4, etc.). Relevant low bit optimizations will asymmetric int 4, etc. Relevant low bit optimizations will
be applied to the model. be applied to the model.
""" """
pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \ pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
if len(args) == 0 else args[0] if len(args) == 0 else args[0]

View file

@ -48,7 +48,7 @@ class BigdlNativeForCausalLM:
It indicates the saving path for the converted low precision model. It indicates the saving path for the converted low precision model.
:param tmp_path: (optional) Which path to store the intermediate fp16 model during the :param tmp_path: (optional) Which path to store the intermediate fp16 model during the
conversion process. Default to `None` so that intermediate model will not be saved. conversion process. Default to `None` so that intermediate model will not be saved.
:param **kwargs: keyword arguments which will be passed to the model instance :param kwargs: keyword arguments which will be passed to the model instance
:return: a model instance :return: a model instance
""" """

View file

@ -18,5 +18,3 @@
# physically located elsewhere. # physically located elsewhere.
# Otherwise there would be module not found error in non-pip's setting as Python would # Otherwise there would be module not found error in non-pip's setting as Python would
# only search the first bigdl package and end up finding only one sub-package. # only search the first bigdl package and end up finding only one sub-package.
from .utils import *