change 5 pytorch/huggingface models to fp16 (#11894)

This commit is contained in:
Jinhe 2024-08-22 16:12:09 +08:00 committed by GitHub
parent 5c4ed00593
commit 18662dca1c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 7 additions and 7 deletions

View file

@ -47,7 +47,7 @@ if __name__ == '__main__':
optimize_model=False,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')
# Load tokenizer
tokenizer = CodeLlamaTokenizer.from_pretrained(model_path,

View file

@ -47,7 +47,7 @@ if __name__ == '__main__':
optimize_model=False,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,

View file

@ -47,7 +47,7 @@ if __name__ == '__main__':
load_in_4bit=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,

View file

@ -50,7 +50,7 @@ if __name__ == '__main__':
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
model = optimize_model(model)
model = model.to('xpu')
model = model.half().to('xpu')
# Load tokenizer
tokenizer = CodeLlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)

View file

@ -46,7 +46,7 @@ if __name__ == '__main__':
use_cache=True)
model = optimize_model(model)
model = model.to('xpu')
model = model.half().to('xpu')
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,

View file

@ -49,7 +49,7 @@ if __name__ == '__main__':
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
model = optimize_model(model)
model = model.to('xpu')
model = model.half().to('xpu')
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

View file

@ -49,7 +49,7 @@ if __name__ == '__main__':
# When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
model = optimize_model(model)
model = model.to('xpu')
model = model.half().to('xpu')
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)