From 0918d3baca774d17a84ee4b1dd5ac7c2414edb57 Mon Sep 17 00:00:00 2001 From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com> Date: Thu, 5 Dec 2024 19:19:58 +0800 Subject: [PATCH] [NPU] Fix hf generate with save/load generation config for Python (cpp backend) (#12509) * Fix hf generate with save/load generation config * Small fix * Fix based on comments --- python/llm/src/ipex_llm/transformers/npu_model.py | 9 +++++++++ .../transformers/npu_pipeline_model/convert_pipeline.py | 2 ++ 2 files changed, 11 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py index 9744e2f8..b2b08fe2 100644 --- a/python/llm/src/ipex_llm/transformers/npu_model.py +++ b/python/llm/src/ipex_llm/transformers/npu_model.py @@ -440,6 +440,15 @@ class _BaseAutoModelClass: model.kv_len = config_dict['kv_len'] model.vocab_size = config_dict['vocab_size'] model.logits_buffer = torch.empty(1, 1, model.vocab_size, dtype=torch.float32) + if model.can_generate(): + try: + model.generation_config = GenerationConfig.from_pretrained( + pretrained_model_name_or_path, + subfolder=subfolder, + **kwargs, + ) + except (OSError, TypeError): + pass except: invalidInputError(False, "Fail to InitLLMPipeline.") diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py index 337736a7..3b223017 100644 --- a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +++ b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py @@ -466,6 +466,8 @@ def convert_llm_for_deploy(model: torch.nn.Module, "lm_head_low_bit": lm_head_low_bit} model.config.update(update_dict) model.config.save_pretrained(save_directory) + if model.can_generate(): + model.generation_config.save_pretrained(save_directory) from .qwen import convert_qwen_layer, convert_fused_qwen_layer from .qwen import convert_lm_head_and_embedding