From 7132ef60813fc74179b6664fedf7a1b9cbbae948 Mon Sep 17 00:00:00 2001 From: SONG Ge <38711238+sgwhat@users.noreply.github.com> Date: Wed, 13 Sep 2023 10:42:33 +0800 Subject: [PATCH] [LLM Doc] Add optimize_model doc in transformers api (#8957) * add optimize in from_pretrained * add api doc for load_low_bit * update api docs following comments * update api docs * update * reord comments --- python/llm/src/bigdl/llm/transformers/model.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python/llm/src/bigdl/llm/transformers/model.py b/python/llm/src/bigdl/llm/transformers/model.py index 18eeb693..92155626 100644 --- a/python/llm/src/bigdl/llm/transformers/model.py +++ b/python/llm/src/bigdl/llm/transformers/model.py @@ -54,13 +54,18 @@ class _BaseAutoModelClass: Load a model from a directory or the HF Hub. Use load_in_4bit or load_in_low_bit parameter the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8. - Two new arguments are added to extend Hugging Face's from_pretrained method as follows: + Three new arguments are added to extend Hugging Face's from_pretrained method as follows: :param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4. + Default to be False. :param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5 or sym_int8. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4, etc. Relevant low bit optimizations will be applied to the model. + :param optimize_model: boolean value, Whether to further optimize the low_bit llm model. + Default to be True. + + :return: a model instance """ pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \ if len(args) == 0 else args[0] @@ -128,6 +133,15 @@ class _BaseAutoModelClass: pretrained_model_name_or_path, *model_args, **kwargs): + """ + Load a low bit optimized model (including INT4, INT5 and INT8) from a saved ckpt. + + :param pretrained_model_name_or_path: str value, Path to load the optimized model ckpt. + :param optimize_model: boolean value, Whether to further optimize the low_bit llm model. + Default to be True. + + :return: a model instance + """ from transformers.modeling_utils import no_init_weights, get_state_dict_dtype from transformers.dynamic_module_utils import resolve_trust_remote_code, \ get_class_from_dynamic_module