Optimize transformer int4 memory footprint (#8579)

2023-07-21 11:22:13 +08:00 · 2023-07-21 11:22:13 +08:00 · feb3af0567
commit feb3af0567
parent 57e880f63a
1 changed files with 1 additions and 0 deletions
--- a/python/llm/src/bigdl/llm/transformers/convert.py
+++ b/python/llm/src/bigdl/llm/transformers/convert.py
@ -61,6 +61,7 @@ def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None,
                                        name,
                                        "cpu",
                                        torch.empty(*param.size(), dtype=torch.float32))
+    del model_state_dict

    for name, module in model.named_children():
        if current_key_name is None: