[LLM Doc] Add optimize_model doc in transformers api (#8957)
* add optimize in from_pretrained * add api doc for load_low_bit * update api docs following comments * update api docs * update * reord comments
This commit is contained in:
parent
c32c260ce2
commit
7132ef6081
1 changed files with 15 additions and 1 deletions
|
|
@ -54,13 +54,18 @@ class _BaseAutoModelClass:
|
||||||
Load a model from a directory or the HF Hub. Use load_in_4bit or load_in_low_bit parameter
|
Load a model from a directory or the HF Hub. Use load_in_4bit or load_in_low_bit parameter
|
||||||
the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
|
the weight of model's linears can be loaded to low-bit format, like int4, int5 and int8.
|
||||||
|
|
||||||
Two new arguments are added to extend Hugging Face's from_pretrained method as follows:
|
Three new arguments are added to extend Hugging Face's from_pretrained method as follows:
|
||||||
|
|
||||||
:param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
|
:param load_in_4bit: boolean value, True means load linear's weight to symmetric int 4.
|
||||||
|
Default to be False.
|
||||||
:param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
|
:param load_in_low_bit: str value, options are sym_int4, asym_int4, sym_int5, asym_int5
|
||||||
or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
|
or sym_int8. sym_int4 means symmetric int 4, asym_int4 means
|
||||||
asymmetric int 4, etc. Relevant low bit optimizations will
|
asymmetric int 4, etc. Relevant low bit optimizations will
|
||||||
be applied to the model.
|
be applied to the model.
|
||||||
|
:param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
|
||||||
|
Default to be True.
|
||||||
|
|
||||||
|
:return: a model instance
|
||||||
"""
|
"""
|
||||||
pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
|
pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) \
|
||||||
if len(args) == 0 else args[0]
|
if len(args) == 0 else args[0]
|
||||||
|
|
@ -128,6 +133,15 @@ class _BaseAutoModelClass:
|
||||||
pretrained_model_name_or_path,
|
pretrained_model_name_or_path,
|
||||||
*model_args,
|
*model_args,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
"""
|
||||||
|
Load a low bit optimized model (including INT4, INT5 and INT8) from a saved ckpt.
|
||||||
|
|
||||||
|
:param pretrained_model_name_or_path: str value, Path to load the optimized model ckpt.
|
||||||
|
:param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
|
||||||
|
Default to be True.
|
||||||
|
|
||||||
|
:return: a model instance
|
||||||
|
"""
|
||||||
from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
|
from transformers.modeling_utils import no_init_weights, get_state_dict_dtype
|
||||||
from transformers.dynamic_module_utils import resolve_trust_remote_code, \
|
from transformers.dynamic_module_utils import resolve_trust_remote_code, \
|
||||||
get_class_from_dynamic_module
|
get_class_from_dynamic_module
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue