[NPU] Fix hf generate with save/load generation config for Python (cpp backend) (#12509)
* Fix hf generate with save/load generation config * Small fix * Fix based on comments
This commit is contained in:
		
							parent
							
								
									49ab8974fa
								
							
						
					
					
						commit
						0918d3baca
					
				
					 2 changed files with 11 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -440,6 +440,15 @@ class _BaseAutoModelClass:
 | 
			
		|||
                model.kv_len = config_dict['kv_len']
 | 
			
		||||
                model.vocab_size = config_dict['vocab_size']
 | 
			
		||||
                model.logits_buffer = torch.empty(1, 1, model.vocab_size, dtype=torch.float32)
 | 
			
		||||
                if model.can_generate():
 | 
			
		||||
                    try:
 | 
			
		||||
                        model.generation_config = GenerationConfig.from_pretrained(
 | 
			
		||||
                            pretrained_model_name_or_path,
 | 
			
		||||
                            subfolder=subfolder,
 | 
			
		||||
                            **kwargs,
 | 
			
		||||
                        )
 | 
			
		||||
                    except (OSError, TypeError):
 | 
			
		||||
                        pass
 | 
			
		||||
            except:
 | 
			
		||||
                invalidInputError(False,
 | 
			
		||||
                                  "Fail to InitLLMPipeline.")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -466,6 +466,8 @@ def convert_llm_for_deploy(model: torch.nn.Module,
 | 
			
		|||
                       "lm_head_low_bit": lm_head_low_bit}
 | 
			
		||||
        model.config.update(update_dict)
 | 
			
		||||
        model.config.save_pretrained(save_directory)
 | 
			
		||||
        if model.can_generate():
 | 
			
		||||
            model.generation_config.save_pretrained(save_directory)
 | 
			
		||||
 | 
			
		||||
        from .qwen import convert_qwen_layer, convert_fused_qwen_layer
 | 
			
		||||
        from .qwen import convert_lm_head_and_embedding
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue