Modify example from fp32 to fp16 (#10528)

* Modify example from fp32 to fp16

* Remove Falcon from fp16 example for now

* Remove MPT from fp16 example
This commit is contained in:
Keyan (Kyrie) Zhang 2024-04-09 00:45:49 -07:00 committed by GitHub
parent 44922bb5c2
commit 1e27e08322
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 7 additions and 7 deletions

View file

@ -51,7 +51,7 @@ if __name__ == '__main__':
load_in_4bit=True, load_in_4bit=True,
trust_remote_code=True, trust_remote_code=True,
use_cache=True) use_cache=True)
model = model.to('xpu') model = model.half().to('xpu')
# Load tokenizer # Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, tokenizer = AutoTokenizer.from_pretrained(model_path,

View file

@ -48,7 +48,7 @@ if __name__ == '__main__':
optimize_model=True, optimize_model=True,
trust_remote_code=True, trust_remote_code=True,
use_cache=True) use_cache=True)
model = model.to('xpu') model = model.half().to('xpu')
# Load tokenizer # Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, tokenizer = AutoTokenizer.from_pretrained(model_path,

View file

@ -48,7 +48,7 @@ if __name__ == '__main__':
optimize_model=True, optimize_model=True,
trust_remote_code=True, trust_remote_code=True,
use_cache=True) use_cache=True)
model = model.to('xpu') model = model.half().to('xpu')
# Load tokenizer # Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, tokenizer = AutoTokenizer.from_pretrained(model_path,

View file

@ -61,7 +61,7 @@ if __name__ == '__main__':
optimize_model=True, optimize_model=True,
trust_remote_code=True, trust_remote_code=True,
use_cache=True) use_cache=True)
model = model.to('xpu') model = model.half().to('xpu')
# Load tokenizer # Load tokenizer
tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)

View file

@ -47,7 +47,7 @@ if __name__ == '__main__':
optimize_model=True, optimize_model=True,
trust_remote_code=True, trust_remote_code=True,
use_cache=True) use_cache=True)
model = model.to('xpu') model = model.half().to('xpu')
# Load tokenizer # Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

View file

@ -54,7 +54,7 @@ if __name__ == '__main__':
optimize_model=True, optimize_model=True,
trust_remote_code=True, trust_remote_code=True,
use_cache=True) use_cache=True)
model = model.to('xpu') model = model.half().to('xpu')
# Load tokenizer # Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, tokenizer = AutoTokenizer.from_pretrained(model_path,

View file

@ -44,7 +44,7 @@ if __name__ == '__main__':
model = AutoModelForCausalLM.from_pretrained(model_path, model = AutoModelForCausalLM.from_pretrained(model_path,
load_in_4bit=True, load_in_4bit=True,
trust_remote_code=True) trust_remote_code=True)
model = model.to("xpu") model = model.half().to("xpu")
# Load tokenizer # Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, tokenizer = AutoTokenizer.from_pretrained(model_path,