Fix NPU LLM example save/load tokenizer (#12485)

This commit is contained in:
Jin, Qiao 2024-12-03 16:30:55 +08:00 committed by GitHub
parent 5fe766788e
commit 7082844f3f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 33 additions and 11 deletions

View file

@ -79,6 +79,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True, trust_remote_code=True,
save_directory=args.save_directory) save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -90,8 +92,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True trust_remote_code=True
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if args.disable_streaming: if args.disable_streaming:
streamer = None streamer = None

View file

@ -78,6 +78,8 @@ if __name__ == "__main__":
attn_implementation="eager", attn_implementation="eager",
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory) save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -88,8 +90,8 @@ if __name__ == "__main__":
pipeline=True, pipeline=True,
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if args.disable_streaming: if args.disable_streaming:
streamer = None streamer = None

View file

@ -84,6 +84,8 @@ if __name__ == "__main__":
attn_implementation="eager", attn_implementation="eager",
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory) save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -94,8 +96,8 @@ if __name__ == "__main__":
pipeline=True, pipeline=True,
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if args.disable_streaming: if args.disable_streaming:
streamer = None streamer = None

View file

@ -66,6 +66,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True, trust_remote_code=True,
save_directory=args.save_directory) save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -77,8 +79,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True trust_remote_code=True
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if args.disable_streaming: if args.disable_streaming:
streamer = None streamer = None

View file

@ -70,6 +70,8 @@ if __name__ == "__main__":
mixed_precision=True, mixed_precision=True,
trust_remote_code=True, trust_remote_code=True,
save_directory=args.save_directory) save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -79,8 +81,8 @@ if __name__ == "__main__":
max_prompt_len=args.max_prompt_len, max_prompt_len=args.max_prompt_len,
pipeline=True, pipeline=True,
transpose_value_cache=not args.disable_transpose_value_cache) transpose_value_cache=not args.disable_transpose_value_cache)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if args.disable_streaming: if args.disable_streaming:
streamer = None streamer = None

View file

@ -79,6 +79,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory save_directory=args.save_directory
) )
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -90,8 +92,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True, trust_remote_code=True,
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
DEFAULT_SYSTEM_PROMPT = """\ DEFAULT_SYSTEM_PROMPT = """\
""" """

View file

@ -43,7 +43,6 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
model_path = args.repo_id_or_model_path model_path = args.repo_id_or_model_path
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if not args.lowbit_path or not os.path.exists(args.lowbit_path): if not args.lowbit_path or not os.path.exists(args.lowbit_path):
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
@ -52,6 +51,8 @@ if __name__ == '__main__':
load_in_low_bit=args.load_in_low_bit, load_in_low_bit=args.load_in_low_bit,
attn_implementation="eager" attn_implementation="eager"
) )
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.lowbit_path)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.lowbit_path, args.lowbit_path,
@ -59,6 +60,7 @@ if __name__ == '__main__':
bigdl_transformers_low_bit=args.load_in_low_bit, bigdl_transformers_low_bit=args.load_in_low_bit,
attn_implementation="eager" attn_implementation="eager"
) )
tokenizer = AutoTokenizer.from_pretrained(args.lowbit_path, trust_remote_code=True)
print(model) print(model)

View file

@ -79,6 +79,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory save_directory=args.save_directory
) )
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -89,8 +91,8 @@ if __name__ == "__main__":
max_prompt_len=args.max_prompt_len, max_prompt_len=args.max_prompt_len,
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
DEFAULT_SYSTEM_PROMPT = """\ DEFAULT_SYSTEM_PROMPT = """\
""" """

View file

@ -80,6 +80,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory save_directory=args.save_directory
) )
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -90,8 +92,8 @@ if __name__ == "__main__":
max_prompt_len=args.max_prompt_len, max_prompt_len=args.max_prompt_len,
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
DEFAULT_SYSTEM_PROMPT = """\ DEFAULT_SYSTEM_PROMPT = """\
""" """

View file

@ -65,6 +65,8 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory save_directory=args.save_directory
) )
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -76,7 +78,7 @@ if __name__ == "__main__":
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True, trust_remote_code=True,
) )
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
print("-" * 80) print("-" * 80)
print("done") print("done")

View file

@ -71,6 +71,8 @@ if __name__ == "__main__":
quantization_group_size=args.quantization_group_size, quantization_group_size=args.quantization_group_size,
save_directory=args.save_directory save_directory=args.save_directory
) )
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else: else:
model = AutoModelForCausalLM.load_low_bit( model = AutoModelForCausalLM.load_low_bit(
args.save_directory, args.save_directory,
@ -81,8 +83,8 @@ if __name__ == "__main__":
max_prompt_len=args.max_prompt_len, max_prompt_len=args.max_prompt_len,
transpose_value_cache=not args.disable_transpose_value_cache, transpose_value_cache=not args.disable_transpose_value_cache,
) )
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
print("-" * 80) print("-" * 80)
print("done") print("done")