Add half precision for fastchat models (#11130)
This commit is contained in:
		
							parent
							
								
									65f4212f89
								
							
						
					
					
						commit
						7f772c5a4f
					
				
					 1 changed files with 2 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -91,6 +91,8 @@ def load_model(
 | 
			
		|||
 | 
			
		||||
    if device == "xpu":
 | 
			
		||||
        import intel_extension_for_pytorch as ipex
 | 
			
		||||
        print("Convert model to half precision...")
 | 
			
		||||
        model = model.half()
 | 
			
		||||
        model = model.to('xpu')
 | 
			
		||||
 | 
			
		||||
    return model, tokenizer
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue