change 5 pytorch/huggingface models to fp16 (#11894)
This commit is contained in:
		
							parent
							
								
									5c4ed00593
								
							
						
					
					
						commit
						18662dca1c
					
				
					 7 changed files with 7 additions and 7 deletions
				
			
		| 
						 | 
					@ -47,7 +47,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 optimize_model=False,
 | 
					                                                 optimize_model=False,
 | 
				
			||||||
                                                 trust_remote_code=True,
 | 
					                                                 trust_remote_code=True,
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = CodeLlamaTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = CodeLlamaTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -47,7 +47,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 optimize_model=False,
 | 
					                                                 optimize_model=False,
 | 
				
			||||||
                                                 trust_remote_code=True,
 | 
					                                                 trust_remote_code=True,
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -47,7 +47,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 load_in_4bit=True,
 | 
					                                                 load_in_4bit=True,
 | 
				
			||||||
                                                 trust_remote_code=True,
 | 
					                                                 trust_remote_code=True,
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -50,7 +50,7 @@ if __name__ == '__main__':
 | 
				
			||||||
    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
					    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
				
			||||||
    model = optimize_model(model)
 | 
					    model = optimize_model(model)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = CodeLlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					    tokenizer = CodeLlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -46,7 +46,7 @@ if __name__ == '__main__':
 | 
				
			||||||
                                                 use_cache=True)
 | 
					                                                 use_cache=True)
 | 
				
			||||||
    model = optimize_model(model)
 | 
					    model = optimize_model(model)
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,7 +49,7 @@ if __name__ == '__main__':
 | 
				
			||||||
    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
					    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
				
			||||||
    model = optimize_model(model)
 | 
					    model = optimize_model(model)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,7 +49,7 @@ if __name__ == '__main__':
 | 
				
			||||||
    # When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
 | 
					    # When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
 | 
				
			||||||
    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
					    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
				
			||||||
    model = optimize_model(model)
 | 
					    model = optimize_model(model)
 | 
				
			||||||
    model = model.to('xpu')
 | 
					    model = model.half().to('xpu')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load tokenizer
 | 
					    # Load tokenizer
 | 
				
			||||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
					    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue