change 5 pytorch/huggingface models to fp16 (#11894)
This commit is contained in:
parent
5c4ed00593
commit
18662dca1c
7 changed files with 7 additions and 7 deletions
|
|
@ -47,7 +47,7 @@ if __name__ == '__main__':
|
|||
optimize_model=False,
|
||||
trust_remote_code=True,
|
||||
use_cache=True)
|
||||
model = model.to('xpu')
|
||||
model = model.half().to('xpu')
|
||||
|
||||
# Load tokenizer
|
||||
tokenizer = CodeLlamaTokenizer.from_pretrained(model_path,
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ if __name__ == '__main__':
|
|||
optimize_model=False,
|
||||
trust_remote_code=True,
|
||||
use_cache=True)
|
||||
model = model.to('xpu')
|
||||
model = model.half().to('xpu')
|
||||
|
||||
# Load tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path,
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ if __name__ == '__main__':
|
|||
load_in_4bit=True,
|
||||
trust_remote_code=True,
|
||||
use_cache=True)
|
||||
model = model.to('xpu')
|
||||
model = model.half().to('xpu')
|
||||
|
||||
# Load tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path,
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ if __name__ == '__main__':
|
|||
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
|
||||
model = optimize_model(model)
|
||||
|
||||
model = model.to('xpu')
|
||||
model = model.half().to('xpu')
|
||||
|
||||
# Load tokenizer
|
||||
tokenizer = CodeLlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ if __name__ == '__main__':
|
|||
use_cache=True)
|
||||
model = optimize_model(model)
|
||||
|
||||
model = model.to('xpu')
|
||||
model = model.half().to('xpu')
|
||||
|
||||
# Load tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path,
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ if __name__ == '__main__':
|
|||
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
|
||||
model = optimize_model(model)
|
||||
|
||||
model = model.to('xpu')
|
||||
model = model.half().to('xpu')
|
||||
|
||||
# Load tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ if __name__ == '__main__':
|
|||
# When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
|
||||
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
|
||||
model = optimize_model(model)
|
||||
model = model.to('xpu')
|
||||
model = model.half().to('xpu')
|
||||
|
||||
# Load tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
|
|
|
|||
Loading…
Reference in a new issue