parent
4c44153584
commit
1b65288bdb
17 changed files with 160 additions and 29 deletions
|
|
@ -329,6 +329,13 @@ subtrees:
|
|||
- file: doc/PPML/QuickStart/tpc-ds_with_sparksql_on_k8s
|
||||
- file: doc/PPML/Overview/azure_ppml_occlum
|
||||
- file: doc/PPML/Overview/secure_lightgbm_on_spark
|
||||
- entries:
|
||||
- file: doc/LLM/index
|
||||
title: "LLM"
|
||||
subtrees:
|
||||
- entries:
|
||||
- file: doc/PythonAPI/LLM/index
|
||||
title: "API Reference"
|
||||
|
||||
- entries:
|
||||
- file: doc/UserGuide/contributor
|
||||
|
|
|
|||
|
|
@ -18,7 +18,9 @@ import glob
|
|||
import shutil
|
||||
import urllib
|
||||
|
||||
autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet"]
|
||||
autodoc_mock_imports = ["openvino", "pytorch_lightning", "keras", "cpuinfo", "sigfig", "prophet",
|
||||
"accelerate", "langchain", "pydantic", "transformers", "ray", "sklearn", "torchmetrics",
|
||||
"pandas", "pmdarima", "scipy", "optuna", "cloudpickle", "xgboost", "filelock"]
|
||||
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#sys.path.insert(0, '.')
|
||||
|
|
@ -29,11 +31,12 @@ sys.path.insert(0, os.path.abspath("../../../python/dllib/src/"))
|
|||
sys.path.insert(0, os.path.abspath("../../../python/orca/src/"))
|
||||
sys.path.insert(0, os.path.abspath("../../../python/serving/src/"))
|
||||
sys.path.insert(0, os.path.abspath("../../../python/nano/src/"))
|
||||
sys.path.insert(0, os.path.abspath("../../../python/llm/src/"))
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
html_theme = "pydata_sphinx_theme"
|
||||
html_theme_options = {
|
||||
"header_links_before_dropdown": 8,
|
||||
"header_links_before_dropdown": 9,
|
||||
"icon_links": [
|
||||
{
|
||||
"name": "GitHub Repository for BigDL",
|
||||
|
|
|
|||
26
docs/readthedocs/source/doc/LLM/index.rst
Normal file
26
docs/readthedocs/source/doc/LLM/index.rst
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
BigDL-LLM
|
||||
=========================
|
||||
|
||||
|
||||
BigDL-LLM is a library for running LLM (Large Language Models) on your Intel laptop using INT4 with very low latency (for any Hugging Face Transformers model).
|
||||
|
||||
-------
|
||||
|
||||
.. grid:: 1 2 2 2
|
||||
:gutter: 2
|
||||
|
||||
.. grid-item-card::
|
||||
|
||||
**API Document**
|
||||
^^^
|
||||
|
||||
API Document provides detailed description of LLM APIs.
|
||||
|
||||
+++
|
||||
|
||||
:bdg-link:`API Document <../PythonAPI/LLM/index.html>`
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
|
||||
BigDL-LLM Document <self>
|
||||
8
docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
Normal file
8
docs/readthedocs/source/doc/PythonAPI/LLM/index.rst
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
LLM API
|
||||
==================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
transformers.rst
|
||||
langchain.rst
|
||||
46
docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
Normal file
46
docs/readthedocs/source/doc/PythonAPI/LLM/langchain.rst
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
LLM LangChain API
|
||||
=====================
|
||||
|
||||
llm.langchain.embeddings.bigdlllm
|
||||
----------------------------------------
|
||||
|
||||
.. automodule:: bigdl.llm.langchain.embeddings.bigdlllm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
llm.langchain.embeddings.transformersembeddings
|
||||
--------------------------------------------------
|
||||
|
||||
.. automodule:: bigdl.llm.langchain.embeddings.transformersembeddings
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
llm.langchain.llms.bigdlllm
|
||||
----------------------------------------
|
||||
|
||||
.. automodule:: bigdl.llm.langchain.llms.bigdlllm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
llm.langchain.llms.transformersllm
|
||||
----------------------------------------
|
||||
|
||||
.. automodule:: bigdl.llm.langchain.llms.transformersllm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
llm.langchain.llms.transformerspipelinellm
|
||||
---------------------------------------------
|
||||
|
||||
.. automodule:: bigdl.llm.langchain.llms.transformerspipelinellm
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
54
docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst
Normal file
54
docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
LLM Transformers API
|
||||
=====================
|
||||
|
||||
llm.transformers.model
|
||||
---------------------------
|
||||
|
||||
.. autoclass:: bigdl.llm.transformers.model.AutoModelForCausalLM
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automethod:: from_pretrained
|
||||
.. automethod:: load_convert
|
||||
.. automethod:: load_low_bit
|
||||
|
||||
|
||||
.. autoclass:: bigdl.llm.transformers.model.AutoModel
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automethod:: from_pretrained
|
||||
.. automethod:: load_convert
|
||||
.. automethod:: load_low_bit
|
||||
|
||||
|
||||
.. autoclass:: bigdl.llm.transformers.model.AutoModelForSpeechSeq2Seq
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automethod:: from_pretrained
|
||||
.. automethod:: load_convert
|
||||
.. automethod:: load_low_bit
|
||||
|
||||
|
||||
.. autoclass:: bigdl.llm.transformers.model.AutoModelForSeq2SeqLM
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
.. automethod:: from_pretrained
|
||||
.. automethod:: load_convert
|
||||
.. automethod:: load_low_bit
|
||||
|
||||
|
||||
|
||||
llm.transformers.modelling_bigdl
|
||||
----------------------------------------
|
||||
|
||||
.. automodule:: bigdl.llm.transformers.modelling_bigdl
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
|
@ -70,17 +70,6 @@ Orca Pytorch Estimator with backend of "horovod" or "ray".
|
|||
:show-inheritance:
|
||||
|
||||
|
||||
orca.learn.pytorch.pytorch_spark_estimator
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Orca Pytorch Estimator with backend of "bigdl".
|
||||
|
||||
.. autoclass:: bigdl.orca.learn.pytorch.pytorch_spark_estimator.PyTorchSparkEstimator
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
orca.learn.openvino.estimator
|
||||
------------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ from ctypes import (
|
|||
c_size_t,
|
||||
)
|
||||
import pathlib
|
||||
from bigdl.llm.utils import get_shared_lib_info
|
||||
from bigdl.llm.utils.utils import get_shared_lib_info
|
||||
from bigdl.llm.utils.common import invalidInputError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ from ctypes import (
|
|||
)
|
||||
import pathlib
|
||||
from bigdl.llm.utils.common import invalidInputError
|
||||
from bigdl.llm.utils import get_shared_lib_info
|
||||
from bigdl.llm.utils.utils import get_shared_lib_info
|
||||
|
||||
|
||||
# Load the library
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ from ctypes import (
|
|||
)
|
||||
import pathlib
|
||||
from bigdl.llm.utils.common import invalidInputError
|
||||
from bigdl.llm.utils import get_shared_lib_info
|
||||
from bigdl.llm.utils.utils import get_shared_lib_info
|
||||
|
||||
|
||||
# Load the library
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ from ctypes import (
|
|||
c_size_t,
|
||||
)
|
||||
import pathlib
|
||||
from bigdl.llm.utils import get_shared_lib_info
|
||||
from bigdl.llm.utils.utils import get_shared_lib_info
|
||||
from bigdl.llm.utils.common import invalidInputError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class BigdlNativeEmbeddings(BaseModel, Embeddings):
|
|||
"""info necessary for different model family initiation and configure"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
model_path: str
|
||||
model_path: str # TODO: missing doc
|
||||
|
||||
n_ctx: int = Field(512, alias="n_ctx")
|
||||
"""Token context window."""
|
||||
|
|
|
|||
|
|
@ -332,7 +332,7 @@ class BigdlNativeLLM(LLM):
|
|||
temperature = 0.5
|
||||
)
|
||||
for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
|
||||
stop=["'","\n"]):
|
||||
stop=["'","\\n"]):
|
||||
result = chunk["choices"][0]
|
||||
print(result["text"], end='', flush=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from accelerate import init_empty_weights
|
||||
from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
|
||||
import warnings
|
||||
import transformers
|
||||
import importlib
|
||||
|
|
@ -46,6 +45,7 @@ import importlib
|
|||
|
||||
def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None,
|
||||
current_key_name=None, convert_shape_only=False):
|
||||
from bigdl.llm.transformers.linear_quant import LinearQuant, ParamsQuant
|
||||
has_been_replaced = False
|
||||
|
||||
# Through our method, certain layers that were initialized on the device "meta"
|
||||
|
|
|
|||
|
|
@ -48,12 +48,12 @@ class _BaseAutoModelClass:
|
|||
the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
|
||||
|
||||
Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
|
||||
New Arguments:
|
||||
load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
|
||||
load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
|
||||
or sym_int8. (sym_int4 means symmetric int 4, asym_int4 means
|
||||
asymmetric int 4, etc.). Relevant low bit optimizations will
|
||||
be applied to the model.
|
||||
|
||||
:param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
|
||||
:param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
|
||||
or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
|
||||
asymmetric int 4, etc. Relevant low bit optimizations will
|
||||
be applied to the model.
|
||||
"""
|
||||
pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
|
||||
if len(args) == 0 else args[0]
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class BigdlNativeForCausalLM:
|
|||
It indicates the saving path for the converted low precision model.
|
||||
:param tmp_path: (optional) Which path to store the intermediate fp16 model during the
|
||||
conversion process. Default to `None` so that intermediate model will not be saved.
|
||||
:param **kwargs: keyword arguments which will be passed to the model instance
|
||||
:param kwargs: keyword arguments which will be passed to the model instance
|
||||
|
||||
:return: a model instance
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -18,5 +18,3 @@
|
|||
# physically located elsewhere.
|
||||
# Otherwise there would be module not found error in non-pip's setting as Python would
|
||||
# only search the first bigdl package and end up finding only one sub-package.
|
||||
|
||||
from .utils import *
|
||||
|
|
|
|||
Loading…
Reference in a new issue