expose modules_to_not_convert in optimize_model (#9180)

* expose modules_to_not_convert in optimize_model

* some fixes
This commit is contained in:
dingbaorong 2023-10-17 09:50:26 +08:00 committed by GitHub
parent 5ca8a851e9
commit af3b575c7e
2 changed files with 9 additions and 2 deletions

View file

@ -192,7 +192,7 @@ def load_low_bit(model, model_path):
return model
def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
def optimize_model(model, low_bit='sym_int4', optimize_llm=True, modules_to_not_convert=None):
"""
A method to optimize any pytorch model.
@ -200,6 +200,8 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
:param low_bit: Supported low-bit options are "sym_int4", "asym_int4", "sym_int5",
"asym_int5" or "sym_int8".
:param optimize_llm: Whether to further optimize llm model.
:param modules_to_not_convert: list of str value, modules (nn.Module) that are skipped
when conducting model optimizations. Default to be None.
:return: The optimized model.
@ -222,7 +224,10 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
"Expect model on device `cpu`, "
f"but got device type {model.device.type}")
qtype = ggml_tensor_qtype[low_bit]
model = ggml_convert_low_bit(model, qtype=qtype, optimize_model=optimize_llm)
model = ggml_convert_low_bit(model,
qtype=qtype,
optimize_model=optimize_llm,
modules_to_not_convert=modules_to_not_convert)
# add save_low_bit to pretrained model dynamically
import types
model._bigdl_config = dict()

View file

@ -65,6 +65,8 @@ class _BaseAutoModelClass:
be applied to the model.
:param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
Default to be True.
:param modules_to_not_convert: list of str value, modules (nn.Module) that are skipped when
conducting model optimizations. Default to be None.
:return: a model instance
"""