From af3b575c7e3d9f5be4c515009d5e3db47fd3d0dd Mon Sep 17 00:00:00 2001
From: dingbaorong <Ricky.Ting@outlook.com>
Date: Tue, 17 Oct 2023 09:50:26 +0800
Subject: [PATCH] expose modules_to_not_convert in optimize_model (#9180)

* expose modules_to_not_convert in optimize_model

* some fixes
---
 python/llm/src/bigdl/llm/optimize.py           | 9 +++++++--
 python/llm/src/bigdl/llm/transformers/model.py | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/python/llm/src/bigdl/llm/optimize.py b/python/llm/src/bigdl/llm/optimize.py
index b10b8acc..880629a2 100644
--- a/python/llm/src/bigdl/llm/optimize.py
+++ b/python/llm/src/bigdl/llm/optimize.py
@@ -192,7 +192,7 @@ def load_low_bit(model, model_path):
     return model
 
 
-def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
+def optimize_model(model, low_bit='sym_int4', optimize_llm=True, modules_to_not_convert=None):
     """
     A method to optimize any pytorch model.
 
@@ -200,6 +200,8 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
     :param low_bit: Supported low-bit options are "sym_int4", "asym_int4", "sym_int5",
         "asym_int5" or "sym_int8".
     :param optimize_llm: Whether to further optimize llm model.
+    :param modules_to_not_convert: list of str value, modules (nn.Module) that are skipped
+        when conducting model optimizations. Default to be None.
 
     :return: The optimized model.
 
@@ -222,7 +224,10 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
                       "Expect model on device `cpu`, "
                       f"but got device type {model.device.type}")
     qtype = ggml_tensor_qtype[low_bit]
-    model = ggml_convert_low_bit(model, qtype=qtype, optimize_model=optimize_llm)
+    model = ggml_convert_low_bit(model,
+                                 qtype=qtype,
+                                 optimize_model=optimize_llm,
+                                 modules_to_not_convert=modules_to_not_convert)
     # add save_low_bit to pretrained model dynamically
     import types
     model._bigdl_config = dict()
diff --git a/python/llm/src/bigdl/llm/transformers/model.py b/python/llm/src/bigdl/llm/transformers/model.py
index 51b2f656..76dea7fc 100644
--- a/python/llm/src/bigdl/llm/transformers/model.py
+++ b/python/llm/src/bigdl/llm/transformers/model.py
@@ -65,6 +65,8 @@ class _BaseAutoModelClass:
                                 be applied to the model.
         :param optimize_model: boolean value, Whether to further optimize the low_bit llm model.
                                Default to be True.
+        :param modules_to_not_convert: list of str value, modules (nn.Module) that are skipped when
+                                       conducting model optimizations. Default to be None.
 
         :return: a model instance
         """