Optimize transformer int4 memory footprint (#8579)

This commit is contained in:
Yang Wang 2023-07-21 11:22:13 +08:00 committed by GitHub
parent 57e880f63a
commit feb3af0567

View file

@ -61,6 +61,7 @@ def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None,
name,
"cpu",
torch.empty(*param.size(), dtype=torch.float32))
del model_state_dict
for name, module in model.named_children():
if current_key_name is None: