diff --git a/python/llm/src/bigdl/llm/transformers/convert.py b/python/llm/src/bigdl/llm/transformers/convert.py index 5c973c6e..98078146 100644 --- a/python/llm/src/bigdl/llm/transformers/convert.py +++ b/python/llm/src/bigdl/llm/transformers/convert.py @@ -258,6 +258,8 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None, # fp16 may generalize to other sizes later if new_linear is not None: + if not module.training: + new_linear.eval() model._modules[name] = new_linear has_been_replaced = True # Force requires grad to False to avoid unexpected errors