Update optimize_model=True in llama2 chatglm2 arc examples (#8878)
* add optimize_model=True in llama2 chatglm2 examples * add ipex optimize in gpt-j example
This commit is contained in:
		
							parent
							
								
									5e58f698cd
								
							
						
					
					
						commit
						74a2c2ddf5
					
				
					 4 changed files with 3 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -44,7 +44,7 @@ if __name__ == '__main__':
 | 
			
		|||
    # which convert the relevant layers in the model into INT4 format
 | 
			
		||||
    model = AutoModel.from_pretrained(model_path,
 | 
			
		||||
                                      load_in_4bit=True,
 | 
			
		||||
                                      optimize_model=False,
 | 
			
		||||
                                      optimize_model=True,
 | 
			
		||||
                                      trust_remote_code=True)
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,6 +45,7 @@ if __name__ == '__main__':
 | 
			
		|||
                                                 optimize_model=False,
 | 
			
		||||
                                                 trust_remote_code=True)
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
    model = ipex.optimize(model.eval(), dtype="float16", inplace=True)
 | 
			
		||||
 | 
			
		||||
    # Load tokenizer
 | 
			
		||||
    tokenizer = AutoTokenizer.from_pretrained(model_path,
 | 
			
		||||
| 
						 | 
				
			
			@ -57,7 +57,7 @@ if __name__ == '__main__':
 | 
			
		|||
    # which convert the relevant layers in the model into INT4 format
 | 
			
		||||
    model = AutoModelForCausalLM.from_pretrained(model_path,
 | 
			
		||||
                                                 load_in_4bit=True,
 | 
			
		||||
                                                 optimize_model=False,
 | 
			
		||||
                                                 optimize_model=True,
 | 
			
		||||
                                                 trust_remote_code=True)
 | 
			
		||||
    model = model.to('xpu')
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue