diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst index 01903ada..fa6ba7fa 100644 --- a/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst +++ b/docs/readthedocs/source/doc/PythonAPI/LLM/optimize.rst @@ -1,7 +1,7 @@ BigDL-LLM PyTorch API ===================== -llm.optimize +optimize model ---------------------------------------- .. automodule:: bigdl.llm.optimize diff --git a/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst b/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst index 62b05ac7..23aa10a3 100644 --- a/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst +++ b/docs/readthedocs/source/doc/PythonAPI/LLM/transformers.rst @@ -1,10 +1,16 @@ -BigDL-LLM `transformers`-style API -===================== +BigDL-LLM ``transformers``-style API +==================================== -llm.transformers.model ---------------------------- +Hugging Face ``transformers`` AutoModel +------------------------------------ -.. autoclass:: bigdl.llm.transformers.model.AutoModelForCausalLM +You can apply BigDL-LLM optimizations on any Hugging Face Transformers models by using the standard AutoModel APIs. + + +AutoModelForCausalLM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: bigdl.llm.transformers.AutoModelForCausalLM :members: :undoc-members: :show-inheritance: @@ -13,8 +19,10 @@ llm.transformers.model .. automethod:: load_convert .. automethod:: load_low_bit +AutoModel +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: bigdl.llm.transformers.model.AutoModel +.. autoclass:: bigdl.llm.transformers.AutoModel :members: :undoc-members: :show-inheritance: @@ -23,8 +31,10 @@ llm.transformers.model .. automethod:: load_convert .. automethod:: load_low_bit +AutoModelForSpeechSeq2Seq +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: bigdl.llm.transformers.model.AutoModelForSpeechSeq2Seq +.. autoclass:: bigdl.llm.transformers.AutoModelForSpeechSeq2Seq :members: :undoc-members: :show-inheritance: @@ -33,8 +43,10 @@ llm.transformers.model .. automethod:: load_convert .. automethod:: load_low_bit +AutoModelForSeq2SeqLM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: bigdl.llm.transformers.model.AutoModelForSeq2SeqLM +.. autoclass:: bigdl.llm.transformers.AutoModelForSeq2SeqLM :members: :undoc-members: :show-inheritance: @@ -45,48 +57,59 @@ llm.transformers.model -llm.transformers.modelling_bigdl +Native Model ---------------------------------------- -.. autoclass:: bigdl.llm.transformers.modelling_bigdl.LlamaForCausalLM - :members: - :undoc-members: - :show-inheritance: - :exclude-members: GGML_Model, GGML_Module, HF_Class - - .. automethod:: from_pretrained +For ``llama``/``chatglm``/``bloom``/``gptneox``/``starcoder`` model families, you may also convert and run LLM using the native (cpp) implementation for maximum performance. -.. autoclass:: bigdl.llm.transformers.modelling_bigdl.ChatGLMForCausalLM - :members: - :undoc-members: - :show-inheritance: - :exclude-members: GGML_Model, GGML_Module, HF_Class +.. tabs:: - .. automethod:: from_pretrained + .. tab:: Llama + .. autoclass:: bigdl.llm.transformers.LlamaForCausalLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: GGML_Model, GGML_Module, HF_Class -.. autoclass:: bigdl.llm.transformers.modelling_bigdl.GptneoxForCausalLM - :members: - :undoc-members: - :show-inheritance: - :exclude-members: GGML_Model, GGML_Module, HF_Class + .. automethod:: from_pretrained - .. automethod:: from_pretrained + .. tab:: ChatGLM + .. autoclass:: bigdl.llm.transformers.ChatGLMForCausalLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: GGML_Model, GGML_Module, HF_Class -.. autoclass:: bigdl.llm.transformers.modelling_bigdl.BloomForCausalLM - :members: - :undoc-members: - :show-inheritance: - :exclude-members: GGML_Model, GGML_Module, HF_Class + .. automethod:: from_pretrained - .. automethod:: from_pretrained + .. tab:: Gptneox -.. autoclass:: bigdl.llm.transformers.modelling_bigdl.StarcoderForCausalLM - :members: - :undoc-members: - :show-inheritance: - :exclude-members: GGML_Model, GGML_Module, HF_Class + .. autoclass:: bigdl.llm.transformers.GptneoxForCausalLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: GGML_Model, GGML_Module, HF_Class - .. automethod:: from_pretrained + .. automethod:: from_pretrained + + .. tab:: Bloom + .. autoclass:: bigdl.llm.transformers.BloomForCausalLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: GGML_Model, GGML_Module, HF_Class + + .. automethod:: from_pretrained + + .. tab:: Starcoder + + .. autoclass:: bigdl.llm.transformers.StarcoderForCausalLM + :members: + :undoc-members: + :show-inheritance: + :exclude-members: GGML_Model, GGML_Module, HF_Class + + .. automethod:: from_pretrained diff --git a/python/llm/src/bigdl/llm/utils/lazy_load_torch.py b/python/llm/src/bigdl/llm/utils/lazy_load_torch.py index 4d205b68..eda2d6de 100644 --- a/python/llm/src/bigdl/llm/utils/lazy_load_torch.py +++ b/python/llm/src/bigdl/llm/utils/lazy_load_torch.py @@ -46,7 +46,7 @@ from torch.serialization import StorageType import pickle import zipfile import io -from typing import Dict, IO, Any, Callable +from typing import Dict, IO, Any, Callable, List from dataclasses import dataclass from .common import invalidInputError @@ -69,7 +69,7 @@ class LazyStorage: @dataclass class LazyTensor: _load: Callable[[], torch.Tensor] - shape: list[int] + shape: List[int] data_type: torch.dtype description: str