change 5 pytorch/huggingface models to fp16 (#11894)
This commit is contained in:
		
							parent
							
								
									5c4ed00593
								
							
						
					
					
						commit
						18662dca1c
					
				
					 7 changed files with 7 additions and 7 deletions
				
			
		| 
						 | 
				
			
			@ -47,7 +47,7 @@ if __name__ == '__main__':
 | 
			
		|||
                                                 optimize_model=False,
 | 
			
		||||
                                                 trust_remote_code=True,
 | 
			
		||||
                                                 use_cache=True)
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = model.half().to('xpu')
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = CodeLlamaTokenizer.from_pretrained(model_path,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -47,7 +47,7 @@ if __name__ == '__main__':
 | 
			
		|||
                                                 optimize_model=False,
 | 
			
		||||
                                                 trust_remote_code=True,
 | 
			
		||||
                                                 use_cache=True)
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = model.half().to('xpu')
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -47,7 +47,7 @@ if __name__ == '__main__':
 | 
			
		|||
                                                 load_in_4bit=True,
 | 
			
		||||
                                                 trust_remote_code=True,
 | 
			
		||||
                                                 use_cache=True)
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = model.half().to('xpu')
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -50,7 +50,7 @@ if __name__ == '__main__':
 | 
			
		|||
    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
			
		||||
    model = optimize_model(model)
 | 
			
		||||
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = model.half().to('xpu')
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = CodeLlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -46,7 +46,7 @@ if __name__ == '__main__':
 | 
			
		|||
                                                 use_cache=True)
 | 
			
		||||
    model = optimize_model(model)
 | 
			
		||||
    
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = model.half().to('xpu')
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -49,7 +49,7 @@ if __name__ == '__main__':
 | 
			
		|||
    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
			
		||||
    model = optimize_model(model)
 | 
			
		||||
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = model.half().to('xpu')
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -49,7 +49,7 @@ if __name__ == '__main__':
 | 
			
		|||
    # When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
 | 
			
		||||
    # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
 | 
			
		||||
    model = optimize_model(model)
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = model.half().to('xpu')
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue