diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py index 48df4d48..eeda4ae5 100644 --- a/python/llm/src/ipex_llm/transformers/npu_model.py +++ b/python/llm/src/ipex_llm/transformers/npu_model.py @@ -49,7 +49,6 @@ def save_low_bit(self, model_dir: str, *args, **kwargs): kwargs["safe_serialization"] = False self.save_pretrained(model_dir, *args, **kwargs) import json - import os # We conveniently save all the keys of the model to have them on hand, # so that when using 'low_cpumem load', @@ -391,7 +390,6 @@ class _BaseAutoModelClass: if is_sharded: loaded_state_dict_keys = sharded_metadata["all_checkpoint_keys"] else: - import os import json with open(