ipex-llm/docker/llm/serving/cpu/docker/model_adapter.py.patch
Wang, Jian4 9df70d95eb
Refactor bigdl.llm to ipex_llm (#24)
* Rename bigdl/llm to ipex_llm

* rm python/llm/src/bigdl

* from bigdl.llm to from ipex_llm
2024-03-22 15:41:21 +08:00

21 lines
922 B
Diff

--- model_adapter.py.old 2024-03-05 15:08:47.169275336 +0800
+++ model_adapter.py 2024-03-05 15:10:13.434703674 +0800
@@ -1690,15 +1690,17 @@
)
# NOTE: if you use the old version of model file, please remove the comments below
# config.use_flash_attn = False
- self.float_set(config, "fp16")
+ # self.float_set(config, "fp16")
generation_config = GenerationConfig.from_pretrained(
model_path, trust_remote_code=True
)
+ from ipex_llm.transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
model_path,
config=config,
low_cpu_mem_usage=True,
trust_remote_code=True,
+ load_in_4bit=True,
**from_pretrained_kwargs,
).eval()
if hasattr(model.config, "use_dynamic_ntk") and model.config.use_dynamic_ntk: