From af3b575c7e3d9f5be4c515009d5e3db47fd3d0dd Mon Sep 17 00:00:00 2001 From: dingbaorong Date: Tue, 17 Oct 2023 09:50:26 +0800 Subject: [PATCH] expose modules_to_not_convert in optimize_model (#9180) * expose modules_to_not_convert in optimize_model * some fixes --- python/llm/src/bigdl/llm/optimize.py | 9 +++++++-- python/llm/src/bigdl/llm/transformers/model.py | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/python/llm/src/bigdl/llm/optimize.py b/python/llm/src/bigdl/llm/optimize.py index b10b8acc..880629a2 100644 --- a/python/llm/src/bigdl/llm/optimize.py +++ b/python/llm/src/bigdl/llm/optimize.py @@ -192,7 +192,7 @@ def load_low_bit(model, model_path): return model -def optimize_model(model, low_bit='sym_int4', optimize_llm=True): +def optimize_model(model, low_bit='sym_int4', optimize_llm=True, modules_to_not_convert=None): """ A method to optimize any pytorch model. @@ -200,6 +200,8 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True): :param low_bit: Supported low-bit options are "sym_int4", "asym_int4", "sym_int5", "asym_int5" or "sym_int8". :param optimize_llm: Whether to further optimize llm model. + :param modules_to_not_convert: list of str value, modules (nn.Module) that are skipped + when conducting model optimizations. Default to be None. :return: The optimized model. @@ -222,7 +224,10 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True): "Expect model on device `cpu`, " f"but got device type {model.device.type}") qtype = ggml_tensor_qtype[low_bit] - model = ggml_convert_low_bit(model, qtype=qtype, optimize_model=optimize_llm) + model = ggml_convert_low_bit(model, + qtype=qtype, + optimize_model=optimize_llm, + modules_to_not_convert=modules_to_not_convert) # add save_low_bit to pretrained model dynamically import types model._bigdl_config = dict() diff --git a/python/llm/src/bigdl/llm/transformers/model.py b/python/llm/src/bigdl/llm/transformers/model.py index 51b2f656..76dea7fc 100644 --- a/python/llm/src/bigdl/llm/transformers/model.py +++ b/python/llm/src/bigdl/llm/transformers/model.py @@ -65,6 +65,8 @@ class _BaseAutoModelClass: be applied to the model. :param optimize_model: boolean value, Whether to further optimize the low_bit llm model. Default to be True. + :param modules_to_not_convert: list of str value, modules (nn.Module) that are skipped when + conducting model optimizations. Default to be None. :return: a model instance """