Fix NPU LLM example save/load tokenizer (#12485)
This commit is contained in:
parent
5fe766788e
commit
7082844f3f
11 changed files with 33 additions and 11 deletions
|
|
@ -79,6 +79,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
save_directory=args.save_directory)
|
save_directory=args.save_directory)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -90,8 +92,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
trust_remote_code=True
|
trust_remote_code=True
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
if args.disable_streaming:
|
if args.disable_streaming:
|
||||||
streamer = None
|
streamer = None
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,8 @@ if __name__ == "__main__":
|
||||||
attn_implementation="eager",
|
attn_implementation="eager",
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
save_directory=args.save_directory)
|
save_directory=args.save_directory)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -88,8 +90,8 @@ if __name__ == "__main__":
|
||||||
pipeline=True,
|
pipeline=True,
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
if args.disable_streaming:
|
if args.disable_streaming:
|
||||||
streamer = None
|
streamer = None
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,8 @@ if __name__ == "__main__":
|
||||||
attn_implementation="eager",
|
attn_implementation="eager",
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
save_directory=args.save_directory)
|
save_directory=args.save_directory)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -94,8 +96,8 @@ if __name__ == "__main__":
|
||||||
pipeline=True,
|
pipeline=True,
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
if args.disable_streaming:
|
if args.disable_streaming:
|
||||||
streamer = None
|
streamer = None
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
save_directory=args.save_directory)
|
save_directory=args.save_directory)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -77,8 +79,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
trust_remote_code=True
|
trust_remote_code=True
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
if args.disable_streaming:
|
if args.disable_streaming:
|
||||||
streamer = None
|
streamer = None
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,8 @@ if __name__ == "__main__":
|
||||||
mixed_precision=True,
|
mixed_precision=True,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
save_directory=args.save_directory)
|
save_directory=args.save_directory)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -79,8 +81,8 @@ if __name__ == "__main__":
|
||||||
max_prompt_len=args.max_prompt_len,
|
max_prompt_len=args.max_prompt_len,
|
||||||
pipeline=True,
|
pipeline=True,
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache)
|
transpose_value_cache=not args.disable_transpose_value_cache)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
if args.disable_streaming:
|
if args.disable_streaming:
|
||||||
streamer = None
|
streamer = None
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
save_directory=args.save_directory
|
save_directory=args.save_directory
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -90,8 +92,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
DEFAULT_SYSTEM_PROMPT = """\
|
DEFAULT_SYSTEM_PROMPT = """\
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,6 @@ if __name__ == '__main__':
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
model_path = args.repo_id_or_model_path
|
model_path = args.repo_id_or_model_path
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
if not args.lowbit_path or not os.path.exists(args.lowbit_path):
|
if not args.lowbit_path or not os.path.exists(args.lowbit_path):
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
|
@ -52,6 +51,8 @@ if __name__ == '__main__':
|
||||||
load_in_low_bit=args.load_in_low_bit,
|
load_in_low_bit=args.load_in_low_bit,
|
||||||
attn_implementation="eager"
|
attn_implementation="eager"
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.lowbit_path)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.lowbit_path,
|
args.lowbit_path,
|
||||||
|
|
@ -59,6 +60,7 @@ if __name__ == '__main__':
|
||||||
bigdl_transformers_low_bit=args.load_in_low_bit,
|
bigdl_transformers_low_bit=args.load_in_low_bit,
|
||||||
attn_implementation="eager"
|
attn_implementation="eager"
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.lowbit_path, trust_remote_code=True)
|
||||||
|
|
||||||
print(model)
|
print(model)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
save_directory=args.save_directory
|
save_directory=args.save_directory
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -89,8 +91,8 @@ if __name__ == "__main__":
|
||||||
max_prompt_len=args.max_prompt_len,
|
max_prompt_len=args.max_prompt_len,
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
DEFAULT_SYSTEM_PROMPT = """\
|
DEFAULT_SYSTEM_PROMPT = """\
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
save_directory=args.save_directory
|
save_directory=args.save_directory
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -90,8 +92,8 @@ if __name__ == "__main__":
|
||||||
max_prompt_len=args.max_prompt_len,
|
max_prompt_len=args.max_prompt_len,
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
DEFAULT_SYSTEM_PROMPT = """\
|
DEFAULT_SYSTEM_PROMPT = """\
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,8 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
save_directory=args.save_directory
|
save_directory=args.save_directory
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -76,7 +78,7 @@ if __name__ == "__main__":
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
)
|
)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
print("-" * 80)
|
print("-" * 80)
|
||||||
print("done")
|
print("done")
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,8 @@ if __name__ == "__main__":
|
||||||
quantization_group_size=args.quantization_group_size,
|
quantization_group_size=args.quantization_group_size,
|
||||||
save_directory=args.save_directory
|
save_directory=args.save_directory
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
tokenizer.save_pretrained(args.save_directory)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.load_low_bit(
|
model = AutoModelForCausalLM.load_low_bit(
|
||||||
args.save_directory,
|
args.save_directory,
|
||||||
|
|
@ -81,8 +83,8 @@ if __name__ == "__main__":
|
||||||
max_prompt_len=args.max_prompt_len,
|
max_prompt_len=args.max_prompt_len,
|
||||||
transpose_value_cache=not args.disable_transpose_value_cache,
|
transpose_value_cache=not args.disable_transpose_value_cache,
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
print("-" * 80)
|
print("-" * 80)
|
||||||
print("done")
|
print("done")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue