Fix NPU LLM example save/load tokenizer (#12485)
This commit is contained in:
		
							parent
							
								
									5fe766788e
								
							
						
					
					
						commit
						7082844f3f
					
				
					 11 changed files with 33 additions and 11 deletions
				
			
		| 
						 | 
					@ -79,6 +79,8 @@ if __name__ == "__main__":
 | 
				
			||||||
                                                     transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					                                                     transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
                                                     trust_remote_code=True,
 | 
					                                                     trust_remote_code=True,
 | 
				
			||||||
                                                     save_directory=args.save_directory)
 | 
					                                                     save_directory=args.save_directory)
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -90,8 +92,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            trust_remote_code=True
 | 
					            trust_remote_code=True
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if args.disable_streaming:
 | 
					    if args.disable_streaming:
 | 
				
			||||||
        streamer = None
 | 
					        streamer = None
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -78,6 +78,8 @@ if __name__ == "__main__":
 | 
				
			||||||
                                                     attn_implementation="eager",
 | 
					                                                     attn_implementation="eager",
 | 
				
			||||||
                                                     transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					                                                     transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
                                                     save_directory=args.save_directory)
 | 
					                                                     save_directory=args.save_directory)
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -88,8 +90,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            pipeline=True,
 | 
					            pipeline=True,
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if args.disable_streaming:
 | 
					    if args.disable_streaming:
 | 
				
			||||||
        streamer = None
 | 
					        streamer = None
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -84,6 +84,8 @@ if __name__ == "__main__":
 | 
				
			||||||
                                                    attn_implementation="eager",
 | 
					                                                    attn_implementation="eager",
 | 
				
			||||||
                                                    transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					                                                    transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
                                                    save_directory=args.save_directory)
 | 
					                                                    save_directory=args.save_directory)
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -94,8 +96,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            pipeline=True,
 | 
					            pipeline=True,
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if args.disable_streaming:
 | 
					    if args.disable_streaming:
 | 
				
			||||||
        streamer = None
 | 
					        streamer = None
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -66,6 +66,8 @@ if __name__ == "__main__":
 | 
				
			||||||
                                                     transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					                                                     transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
                                                     trust_remote_code=True,
 | 
					                                                     trust_remote_code=True,
 | 
				
			||||||
                                                     save_directory=args.save_directory)
 | 
					                                                     save_directory=args.save_directory)
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -77,8 +79,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            trust_remote_code=True
 | 
					            trust_remote_code=True
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if args.disable_streaming:
 | 
					    if args.disable_streaming:
 | 
				
			||||||
        streamer = None
 | 
					        streamer = None
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -70,6 +70,8 @@ if __name__ == "__main__":
 | 
				
			||||||
                                                     mixed_precision=True,
 | 
					                                                     mixed_precision=True,
 | 
				
			||||||
                                                     trust_remote_code=True,
 | 
					                                                     trust_remote_code=True,
 | 
				
			||||||
                                                     save_directory=args.save_directory)
 | 
					                                                     save_directory=args.save_directory)
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -79,8 +81,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            max_prompt_len=args.max_prompt_len,
 | 
					            max_prompt_len=args.max_prompt_len,
 | 
				
			||||||
            pipeline=True,
 | 
					            pipeline=True,
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache)
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache)
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if args.disable_streaming:
 | 
					    if args.disable_streaming:
 | 
				
			||||||
        streamer = None
 | 
					        streamer = None
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -79,6 +79,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            save_directory=args.save_directory
 | 
					            save_directory=args.save_directory
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -90,8 +92,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            trust_remote_code=True,
 | 
					            trust_remote_code=True,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DEFAULT_SYSTEM_PROMPT = """\
 | 
					    DEFAULT_SYSTEM_PROMPT = """\
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -43,7 +43,6 @@ if __name__ == '__main__':
 | 
				
			||||||
    args = parser.parse_args()
 | 
					    args = parser.parse_args()
 | 
				
			||||||
    model_path = args.repo_id_or_model_path
 | 
					    model_path = args.repo_id_or_model_path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not args.lowbit_path or not os.path.exists(args.lowbit_path):
 | 
					    if not args.lowbit_path or not os.path.exists(args.lowbit_path):
 | 
				
			||||||
        model = AutoModelForCausalLM.from_pretrained(
 | 
					        model = AutoModelForCausalLM.from_pretrained(
 | 
				
			||||||
| 
						 | 
					@ -52,6 +51,8 @@ if __name__ == '__main__':
 | 
				
			||||||
            load_in_low_bit=args.load_in_low_bit,
 | 
					            load_in_low_bit=args.load_in_low_bit,
 | 
				
			||||||
            attn_implementation="eager"
 | 
					            attn_implementation="eager"
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.lowbit_path)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.lowbit_path,
 | 
					            args.lowbit_path,
 | 
				
			||||||
| 
						 | 
					@ -59,6 +60,7 @@ if __name__ == '__main__':
 | 
				
			||||||
            bigdl_transformers_low_bit=args.load_in_low_bit,
 | 
					            bigdl_transformers_low_bit=args.load_in_low_bit,
 | 
				
			||||||
            attn_implementation="eager"
 | 
					            attn_implementation="eager"
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.lowbit_path, trust_remote_code=True)        
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    print(model)
 | 
					    print(model)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -79,6 +79,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            save_directory=args.save_directory
 | 
					            save_directory=args.save_directory
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -89,8 +91,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            max_prompt_len=args.max_prompt_len,
 | 
					            max_prompt_len=args.max_prompt_len,
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DEFAULT_SYSTEM_PROMPT = """\
 | 
					    DEFAULT_SYSTEM_PROMPT = """\
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -80,6 +80,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            save_directory=args.save_directory
 | 
					            save_directory=args.save_directory
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -90,8 +92,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            max_prompt_len=args.max_prompt_len,
 | 
					            max_prompt_len=args.max_prompt_len,
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DEFAULT_SYSTEM_PROMPT = """\
 | 
					    DEFAULT_SYSTEM_PROMPT = """\
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -65,6 +65,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            save_directory=args.save_directory
 | 
					            save_directory=args.save_directory
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -76,7 +78,7 @@ if __name__ == "__main__":
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
            trust_remote_code=True,
 | 
					            trust_remote_code=True,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print("-" * 80)
 | 
					    print("-" * 80)
 | 
				
			||||||
    print("done")
 | 
					    print("done")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -71,6 +71,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            quantization_group_size=args.quantization_group_size,
 | 
					            quantization_group_size=args.quantization_group_size,
 | 
				
			||||||
            save_directory=args.save_directory
 | 
					            save_directory=args.save_directory
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
 | 
					        tokenizer.save_pretrained(args.save_directory)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        model = AutoModelForCausalLM.load_low_bit(
 | 
					        model = AutoModelForCausalLM.load_low_bit(
 | 
				
			||||||
            args.save_directory,
 | 
					            args.save_directory,
 | 
				
			||||||
| 
						 | 
					@ -81,8 +83,8 @@ if __name__ == "__main__":
 | 
				
			||||||
            max_prompt_len=args.max_prompt_len,
 | 
					            max_prompt_len=args.max_prompt_len,
 | 
				
			||||||
            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
					            transpose_value_cache=not args.disable_transpose_value_cache,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print("-" * 80)
 | 
					    print("-" * 80)
 | 
				
			||||||
    print("done")
 | 
					    print("done")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue