--- model_adapter.py.old 2024-03-05 15:08:47.169275336 +0800 +++ model_adapter.py 2024-03-05 15:10:13.434703674 +0800 @@ -1690,15 +1690,17 @@ ) # NOTE: if you use the old version of model file, please remove the comments below # config.use_flash_attn = False - self.float_set(config, "fp16") + # self.float_set(config, "fp16") generation_config = GenerationConfig.from_pretrained( model_path, trust_remote_code=True ) + from ipex_llm.transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained( model_path, config=config, low_cpu_mem_usage=True, trust_remote_code=True, + load_in_4bit=True, **from_pretrained_kwargs, ).eval() if hasattr(model.config, "use_dynamic_ntk") and model.config.use_dynamic_ntk: