Optimize transformer int4 memory footprint (#8579)
This commit is contained in:
parent
57e880f63a
commit
feb3af0567
1 changed files with 1 additions and 0 deletions
|
|
@ -61,6 +61,7 @@ def _replace_with_quant_linear(model, qtype, modules_to_not_convert=None,
|
||||||
name,
|
name,
|
||||||
"cpu",
|
"cpu",
|
||||||
torch.empty(*param.size(), dtype=torch.float32))
|
torch.empty(*param.size(), dtype=torch.float32))
|
||||||
|
del model_state_dict
|
||||||
|
|
||||||
for name, module in model.named_children():
|
for name, module in model.named_children():
|
||||||
if current_key_name is None:
|
if current_key_name is None:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue