Modify example from fp32 to fp16 (#10528)
* Modify example from fp32 to fp16 * Remove Falcon from fp16 example for now * Remove MPT from fp16 example
This commit is contained in:
		
							parent
							
								
									44922bb5c2
								
							
						
					
					
						commit
						1e27e08322
					
				
					 7 changed files with 7 additions and 7 deletions
				
			
		| 
						 | 
					@ -51,7 +51,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 load_in_4bit=True,
 | 
					                                                 load_in_4bit=True,
 | 
				
			||||||
                                                 trust_remote_code=True,
 | 
					                                                 trust_remote_code=True,
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -48,7 +48,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                      optimize_model=True,
 | 
					                                      optimize_model=True,
 | 
				
			||||||
                                      trust_remote_code=True,
 | 
					                                      trust_remote_code=True,
 | 
				
			||||||
                                      use_cache=True)
 | 
					                                      use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -48,7 +48,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                      optimize_model=True,
 | 
					                                      optimize_model=True,
 | 
				
			||||||
                                      trust_remote_code=True,
 | 
					                                      trust_remote_code=True,
 | 
				
			||||||
                                      use_cache=True)
 | 
					                                      use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -61,7 +61,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 optimize_model=True,
 | 
					                                                 optimize_model=True,
 | 
				
			||||||
                                                 trust_remote_code=True,
 | 
					                                                 trust_remote_code=True,
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					    tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -47,7 +47,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 optimize_model=True,
 | 
					                                                 optimize_model=True,
 | 
				
			||||||
                                                 trust_remote_code=True,
 | 
					                                                 trust_remote_code=True,
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -54,7 +54,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 optimize_model=True,
 | 
					                                                 optimize_model=True,
 | 
				
			||||||
                                                 trust_remote_code=True,
 | 
					                                                 trust_remote_code=True,
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -44,7 +44,7 @@ if __name__ == '__main__':
 | 
				
			||||||
    model = AutoModelForCausalLM.from_pretrained(model_path,
 | 
					    model = AutoModelForCausalLM.from_pretrained(model_path,
 | 
				
			||||||
                                                 load_in_4bit=True,
 | 
					                                                 load_in_4bit=True,
 | 
				
			||||||
                                                 trust_remote_code=True)
 | 
					                                                 trust_remote_code=True)
 | 
				
			||||||
    model = model.to("xpu")
 | 
					    model = model.half().to("xpu")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue