From 7132ef60813fc74179b6664fedf7a1b9cbbae948 Mon Sep 17 00:00:00 2001
From: SONG Ge <38711238+sgwhat@users.noreply.github.com>
Date: Wed, 13 Sep 2023 10:42:33 +0800
Subject: [PATCH] [LLM Doc] Add optimize_model doc in transformers api (#8957)

* add optimize in from_pretrained

* add api doc for load_low_bit

* update api docs following comments

* update api docs

* update

* reord comments
---
 python/llm/src/bigdl/llm/transformers/model.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/python/llm/src/bigdl/llm/transformers/model.py b/python/llm/src/bigdl/llm/transformers/model.py
index 18eeb693..92155626 100644
--- a/python/llm/src/bigdl/llm/transformers/model.py
+++ b/python/llm/src/bigdl/llm/transformers/model.py
@@ -54,13 +54,18 @@ class _BaseAutoModelClass:
         Load a model from a directory or the HF Hub. Use load_in_4bit or load_in_low_bit parameter
         the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
 
-        Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
+        Three new arguments are added to extend Hugging Face's from_pretrained method as follows:
 
         :param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
+                             Default to be False.
         :param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
                                 or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
                                 asymmetric int 4, etc. Relevant low bit optimizations will
                                 be applied to the model.
+        :param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
+                               Default to be True.
+
+        :return: a model instance
         """
         pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
             if len(args) == 0 else args[0]
@@ -128,6 +133,15 @@ class _BaseAutoModelClass:
                      pretrained_model_name_or_path,
                      *model_args,
                      **kwargs):
+        """
+        Load a low bit optimized model (including INT4, INT5 and INT8) from a saved ckpt.
+
+        :param pretrained_model_name_or_path: str value, Path to load the optimized model ckpt.
+        :param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
+                               Default to be True.
+
+        :return: a model instance
+        """
         from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
         from transformers.dynamic_module_utils import resolve_trust_remote_code, \
             get_class_from_dynamic_module