LLM: remove ipex.optimize for gpt-j (#10606)

* remove ipex.optimize

* fix

* fix
This commit is contained in:
Ruonan Wang 2024-04-01 12:21:49 +08:00 committed by GitHub
parent 59058bb206
commit d6af4877dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 0 additions and 19 deletions

View file

@ -427,9 +427,6 @@ def run_transformer_int4_gpu(repo_id,
trust_remote_code=True, use_cache=True).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = model.to('xpu')
if isinstance(model, GPTJForCausalLM):
# For gpt-j model family, this optimization can provide a better performance.
model = ipex.optimize(model.eval(), inplace=True)
end = time.perf_counter()
load_time = end - st
print(">> loading of model costs {}s and {}GB".format(load_time, torch.xpu.memory.memory_reserved()/(1024**3)))
@ -519,9 +516,6 @@ def run_optimize_model_gpu(repo_id,
model = optimize_model(model, low_bit=low_bit)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = model.to('xpu')
if isinstance(model, GPTJForCausalLM):
# For gpt-j model family, this optimization can provide a better performance.
model = ipex.optimize(model.eval(), inplace=True)
end = time.perf_counter()
load_time = end - st
print(">> loading of model costs {}s".format(load_time))
@ -594,9 +588,6 @@ def run_ipex_fp16_gpu(repo_id,
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, use_cache=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = model.half().to('xpu')
if isinstance(model, GPTJForCausalLM):
# For gpt-j model family, this optimization can provide a better performance.
model = ipex.optimize(model.eval(), inplace=True)
end = time.perf_counter()
load_time = end - st
print(">> loading of model costs {}s".format(load_time))
@ -852,9 +843,6 @@ def run_transformer_int4_gpu_win(repo_id,
trust_remote_code=True, use_cache=True, cpu_embedding=cpu_embedding).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = model.to('xpu')
if isinstance(model, GPTJForCausalLM):
# For gpt-j model family, this optimization can provide a better performance.
model = ipex.optimize(model.eval(), inplace=True)
end = time.perf_counter()
load_time = end - st
print(">> loading of model costs {}s and {}GB".format(load_time, torch.xpu.memory.memory_reserved()/(1024**3)))
@ -962,9 +950,6 @@ def run_transformer_int4_fp16_gpu_win(repo_id,
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = model.half()
model = model.to('xpu')
if isinstance(model, GPTJForCausalLM):
# For gpt-j model family, this optimization can provide a better performance.
model = ipex.optimize(model.eval(), inplace=True)
end = time.perf_counter()
load_time = end - st
print(">> loading of model costs {}s and {}GB".format(load_time, torch.xpu.memory.memory_reserved()/(1024**3)))
@ -1067,9 +1052,6 @@ def run_transformer_int4_loadlowbit_gpu_win(repo_id,
use_cache=True, cpu_embedding=cpu_embedding).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path+'-'+low_bit, trust_remote_code=True)
model = model.to('xpu')
if isinstance(model, GPTJForCausalLM):
# For gpt-j model family, this optimization can provide a better performance.
model = ipex.optimize(model.eval(), inplace=True)
end = time.perf_counter()
load_time = end - st
print(">> loading of model costs {}s and {}GB".format(load_time, torch.xpu.memory.memory_reserved()/(1024**3)))

View file

@ -46,7 +46,6 @@ if __name__ == '__main__':
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = ipex.optimize(model.eval(), dtype="float16", inplace=True)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,