[LLM Doc] Add optimize_model doc in transformers api (#8957)

* add optimize in from_pretrained * add api doc for load_low_bit * update api docs following comments * update api docs * update * reord comments
2023-09-13 10:42:33 +08:00 · 2023-09-13 10:42:33 +08:00 · 7132ef6081
commit 7132ef6081
parent c32c260ce2
1 changed files with 15 additions and 1 deletions
--- a/python/llm/src/bigdl/llm/transformers/model.py
+++ b/python/llm/src/bigdl/llm/transformers/model.py
@ -54,13 +54,18 @@ class _BaseAutoModelClass:
        Load a model from a directory or the HF Hub. Use load_in_4bit or load_in_low_bit parameter
        the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
-        Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
+        Three new arguments are added to extend Hugging Face's from_pretrained method as follows:
        :param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
                             Default to be False.
        :param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
                                or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
                                asymmetric int 4, etc. Relevant low bit optimizations will
                                be applied to the model.
        :param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
                               Default to be True.
        :return: a model instance
        """
        pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
            if len(args) == 0 else args[0]
@ -128,6 +133,15 @@ class _BaseAutoModelClass:
                     pretrained_model_name_or_path,
                     *model_args,
                     **kwargs):
        """
        Load a low bit optimized model (including INT4, INT5 and INT8) from a saved ckpt.
        :param pretrained_model_name_or_path: str value, Path to load the optimized model ckpt.
        :param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
                               Default to be True.
        :return: a model instance
        """
        from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
        from transformers.dynamic_module_utils import resolve_trust_remote_code, \
            get_class_from_dynamic_module