[LLM Doc] Add optimize_model doc in transformers api (#8957)
* add optimize in from_pretrained * add api doc for load_low_bit * update api docs following comments * update api docs * update * reord comments
This commit is contained in:
parent
c32c260ce2
commit
7132ef6081
1 changed files with 15 additions and 1 deletions
|
|
@ -54,13 +54,18 @@ class _BaseAutoModelClass:
|
|||
Load a model from a directory or the HF Hub. Use load_in_4bit or load_in_low_bit parameter
|
||||
the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
|
||||
|
||||
Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
|
||||
Three new arguments are added to extend Hugging Face's from_pretrained method as follows:
|
||||
|
||||
:param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
|
||||
Default to be False.
|
||||
:param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
|
||||
or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
|
||||
asymmetric int 4, etc. Relevant low bit optimizations will
|
||||
be applied to the model.
|
||||
:param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
|
||||
Default to be True.
|
||||
|
||||
:return: a model instance
|
||||
"""
|
||||
pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
|
||||
if len(args) == 0 else args[0]
|
||||
|
|
@ -128,6 +133,15 @@ class _BaseAutoModelClass:
|
|||
pretrained_model_name_or_path,
|
||||
*model_args,
|
||||
**kwargs):
|
||||
"""
|
||||
Load a low bit optimized model (including INT4, INT5 and INT8) from a saved ckpt.
|
||||
|
||||
:param pretrained_model_name_or_path: str value, Path to load the optimized model ckpt.
|
||||
:param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
|
||||
Default to be True.
|
||||
|
||||
:return: a model instance
|
||||
"""
|
||||
from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
|
||||
from transformers.dynamic_module_utils import resolve_trust_remote_code, \
|
||||
get_class_from_dynamic_module
|
||||
|
|
|
|||
Loading…
Reference in a new issue