Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								c44c6dc43a 
								
							 
						 
						
							
							
								
								LLM: add chatglm3 examples ( #9305 )  
							
							 
							
							
							
						 
						
							2023-11-01 09:50:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								06447a3ef6 
								
							 
						 
						
							
							
								
								add malloc and intel openmp to llm deps ( #9322 )  
							
							 
							
							
							
						 
						
							2023-11-01 09:47:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								d638b93dfe 
								
							 
						 
						
							
							
								
								Add test script and workflow for qlora fine-tuning ( #9295 )  
							
							 
							
							... 
							
							
							
							* Add test script and workflow for qlora fine-tuning
* Test fix export model
* Download dataset
* Fix export model issue
* Reduce number of training steps
* Rename script
* Correction 
							
						 
						
							2023-11-01 09:39:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d383ee8efb 
								
							 
						 
						
							
							
								
								LLM: update QLoRA example about accelerate version( #9314 )  
							
							 
							
							
							
						 
						
							2023-10-31 13:54:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								cee9eaf542 
								
							 
						 
						
							
							
								
								[LLM] Fix llm arc ut oom ( #9300 )  
							
							 
							
							... 
							
							
							
							* Move model to cpu after testing so that gpu memory is deallocated
* Add code comment
---------
Co-authored-by: sgwhat <ge.song@intel.com> 
							
						 
						
							2023-10-30 14:38:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								ee5becdd61 
								
							 
						 
						
							
							
								
								use coco image in Qwen-VL ( #9298 )  
							
							 
							
							... 
							
							
							
							* use coco image
* add output
* address yuwen's comments 
							
						 
						
							2023-10-30 14:32:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								163d033616 
								
							 
						 
						
							
							
								
								Support qlora in CPU ( #9233 )  
							
							 
							
							... 
							
							
							
							* support qlora in CPU
* revert example
* fix style 
							
						 
						
							2023-10-27 14:01:15 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								8838707009 
								
							 
						 
						
							
							
								
								Add deepspeed autotp example readme ( #9289 )  
							
							 
							
							... 
							
							
							
							* Add deepspeed autotp example readme
* change word 
							
						 
						
							2023-10-27 13:04:38 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								f053688cad 
								
							 
						 
						
							
							
								
								add cpu example of LLaVA ( #9269 )  
							
							 
							
							... 
							
							
							
							* add LLaVA cpu example
* Small text updates
* update link
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2023-10-27 18:59:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								7f2ad182fd 
								
							 
						 
						
							
							
								
								Minor Fixes of README ( #9294 )  
							
							 
							
							
							
						 
						
							2023-10-27 18:25:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								1bff54a378 
								
							 
						 
						
							
							
								
								Display demo.jpg n the README.md of HuggingFace Transformers Agent ( #9293 )  
							
							 
							
							... 
							
							
							
							* Display demo.jpg
* remove demo.jpg 
							
						 
						
							2023-10-27 18:00:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								a4a1dec064 
								
							 
						 
						
							
							
								
								Add a cpu example of HuggingFace Transformers Agent (use vicuna-7b-v1.5) ( #9284 )  
							
							 
							
							... 
							
							
							
							* Add examples of HF Agent
* Modify folder structure and add link of demo.jpg
* Fixes of readme
* Merge applications and Applications 
							
						 
						
							2023-10-27 17:14:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
							
							
								
							
							
								aa319de5e8 
								
							 
						 
						
							
							
								
								Add streaming-llm using llama2 on CPU ( #9265 )  
							
							 
							
							... 
							
							
							
							Enable streaming-llm to let model take infinite inputs, tested on desktop and SPR10 
							
						 
						
							2023-10-27 01:30:39 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								6c9ae420a5 
								
							 
						 
						
							
							
								
								Add regression test for optimize_model on gpu ( #9268 )  
							
							 
							
							... 
							
							
							
							* Add MPT model to transformer API test
* Add regression test for optimize_model on gpu.
---------
Co-authored-by: sgwhat <ge.song@intel.com> 
							
						 
						
							2023-10-27 09:23:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								44b5fcc190 
								
							 
						 
						
							
							
								
								LLM: fix pretraining_tp argument issue. ( #9281 )  
							
							 
							
							
							
						 
						
							2023-10-26 18:43:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								6b2a32eba2 
								
							 
						 
						
							
							
								
								LLM: add missing function for PyTorch InternLM model ( #9285 )  
							
							 
							
							
							
						 
						
							2023-10-26 18:05:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								f879c48f98 
								
							 
						 
						
							
							
								
								fp8 convert use ggml code ( #9277 )  
							
							 
							
							
							
						 
						
							2023-10-26 17:03:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								e2264e8845 
								
							 
						 
						
							
							
								
								Support arc fp4 ( #9266 )  
							
							 
							
							... 
							
							
							
							* support arc fp4
* fix style
* fix style 
							
						 
						
							2023-10-25 15:42:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								ab40607b87 
								
							 
						 
						
							
							
								
								Enable unit test workflow on Arc ( #9213 )  
							
							 
							
							... 
							
							
							
							* Add gpu workflow and a transformers API inference test
* Set device-specific env variables in script instead of workflow
* Fix status message
---------
Co-authored-by: sgwhat <ge.song@intel.com> 
							
						 
						
							2023-10-25 15:17:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								160a1e5ee7 
								
							 
						 
						
							
							
								
								[WIP] Add UT for Mistral Optimized Model ( #9248 )  
							
							 
							
							... 
							
							
							
							* add ut for mistral model
* update
* fix model path
* upgrade transformers version for mistral model
* refactor correctness ut for mustral model
* refactor mistral correctness ut
* revert test_optimize_model back
* remove mistral from test_optimize_model
* add to revert transformers version back to 4.31.0 
							
						 
						
							2023-10-25 15:14:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								067c7e8098 
								
							 
						 
						
							
							
								
								Support deepspeed AutoTP ( #9230 )  
							
							 
							
							... 
							
							
							
							* Support deepspeed
* add test script
* refactor convert
* refine example
* refine
* refine example
* fix style
* refine example and adapte latest ipex
* fix style 
							
						 
						
							2023-10-24 23:46:28 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yining Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a6a8afc47e 
								
							 
						 
						
							
							
								
								Add qwen vl CPU example  ( #9221 )  
							
							 
							
							... 
							
							
							
							* eee
* add examples on CPU and GPU
* fix
* fix
* optimize model examples
* add Qwen-VL-Chat CPU example
* Add Qwen-VL CPU example
* fix optimize problem
* fix error
* Have updated, benchmark fix removed from this PR
* add generate API example
* Change formats in qwen-vl example
* Add CPU transformer int4 example for qwen-vl
* fix repo-id problem and add Readme
* change picture url
* Remove unnecessary file
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2023-10-25 13:22:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								f597a9d4f5 
								
							 
						 
						
							
							
								
								LLM: update perf test configuration ( #9264 )  
							
							 
							
							
							
						 
						
							2023-10-25 12:35:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								770ac70b00 
								
							 
						 
						
							
							
								
								LLM: add low_bit option in benchmark scripts ( #9257 )  
							
							 
							
							
							
						 
						
							2023-10-25 10:27:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								ec9195da42 
								
							 
						 
						
							
							
								
								LLM: using html to visualize the perf result for Arc ( #9228 )  
							
							 
							
							... 
							
							
							
							* LLM: using html to visualize the perf result for Arc
* deploy the html file
* add python license
* reslove some comments 
							
						 
						
							2023-10-24 18:05:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								90162264a3 
								
							 
						 
						
							
							
								
								LLM: replace torch.float32 with auto type ( #9261 )  
							
							 
							
							
							
						 
						
							2023-10-24 17:12:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								bd5215d75b 
								
							 
						 
						
							
							
								
								[LLM] Reimplement chatglm fuse rms optimization ( #9260 )  
							
							 
							
							... 
							
							
							
							* re-implement chatglm rope rms
* update 
							
						 
						
							2023-10-24 16:35:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								5a2ce421af 
								
							 
						 
						
							
							
								
								add cpu and gpu examples of flan-t5 ( #9171 )  
							
							 
							
							... 
							
							
							
							* add cpu and gpu examples of flan-t5
* address yuwen's comments
* Add explanation  why we add modules to not convert
* Refine prompt and add a translation example
* Add a empty line at the end of files
* add examples of flan-t5 using optimize_mdoel api
* address bin's comments
* address binbin's comments
* add flan-t5 in readme 
							
						 
						
							2023-10-24 15:24:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yining Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4a19f50d16 
								
							 
						 
						
							
							
								
								phi-1_5 CPU and GPU examples ( #9173 )  
							
							 
							
							... 
							
							
							
							* eee
* add examples on CPU and GPU
* fix
* fix
* optimize model examples
* have updated
* Warmup and configs added
* Update two tables 
							
						 
						
							2023-10-24 15:08:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								bfc1e2d733 
								
							 
						 
						
							
							
								
								add fused rms optimization for chatglm model ( #9256 )  
							
							 
							
							
							
						 
						
							2023-10-24 14:40:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b15656229e 
								
							 
						 
						
							
							
								
								LLM: fix benchmark issue ( #9255 )  
							
							 
							
							
							
						 
						
							2023-10-24 14:15:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								f37547249d 
								
							 
						 
						
							
							
								
								Refine README/CICD ( #9253 )  
							
							 
							
							
							
						 
						
							2023-10-24 12:56:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								db37edae8a 
								
							 
						 
						
							
							
								
								LLM: update langchain api document page ( #9222 )  
							
							 
							
							
							
						 
						
							2023-10-24 10:13:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								0c5055d38c 
								
							 
						 
						
							
							
								
								add position_ids and fuse embedding for falcon ( #9242 )  
							
							 
							
							... 
							
							
							
							* add position_ids for falcon
* add cpu
* add cpu
* add license 
							
						 
						
							2023-10-24 09:58:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								c14a61681b 
								
							 
						 
						
							
							
								
								Add load low-bit in model-serving for reduce EPC ( #9239 )  
							
							 
							
							... 
							
							
							
							* init load low-bit
* fix
* fix 
							
						 
						
							2023-10-23 11:28:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								0383306688 
								
							 
						 
						
							
							
								
								Add arc fp8 support ( #9232 )  
							
							 
							
							... 
							
							
							
							* add fp8 support
* add log
* fix style 
							
						 
						
							2023-10-20 17:15:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								118249b011 
								
							 
						 
						
							
							
								
								support transformers 4.34+ for llama ( #9229 )  
							
							 
							
							
							
						 
						
							2023-10-19 22:36:30 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								5850241423 
								
							 
						 
						
							
							
								
								correct Readme GPU example and API docstring ( #9225 )  
							
							 
							
							... 
							
							
							
							* update readme to correct GPU usage
* update from_pretrained supported low bit options
* fix stype check 
							
						 
						
							2023-10-19 16:08:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								f87f67ee1c 
								
							 
						 
						
							
							
								
								LLM: arc perf test for some popular models ( #9188 )  
							
							 
							
							
							
						 
						
							2023-10-19 15:56:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b0ddde0410 
								
							 
						 
						
							
							
								
								Fix removing convert dtype bug ( #9216 )  
							
							 
							
							... 
							
							
							
							* Fix removing convert dtype bug
* fix style 
							
						 
						
							2023-10-18 11:24:22 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								942d6418e7 
								
							 
						 
						
							
							
								
								LLM: fix chatglm kv cache ( #9215 )  
							
							 
							
							
							
						 
						
							2023-10-18 19:09:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0765f94770 
								
							 
						 
						
							
							
								
								[LLM] Optimize kv_cache for mistral model family ( #9189 )  
							
							 
							
							... 
							
							
							
							* add kv_cache optimization for mistral model
* kv_cache optimize for mistral
* update stylr
* update 
							
						 
						
							2023-10-18 15:13:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3555ebc148 
								
							 
						 
						
							
							
								
								LLM: fix wrong length in gptj kv_cache optimization ( #9210 )  
							
							 
							
							... 
							
							
							
							* fix wrong length in gptj kv cache
* update 
							
						 
						
							2023-10-18 14:59:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								6dad8d16df 
								
							 
						 
						
							
							
								
								optimize NormHead for Baichuan2 ( #9205 )  
							
							 
							
							... 
							
							
							
							* optimize NormHead for Baichuan2
* fix ut and change name
* rename functions 
							
						 
						
							2023-10-18 14:05:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								a3b664ed03 
								
							 
						 
						
							
							
								
								LLM: add GPU More-Data-Types and Save/Load example ( #9199 )  
							
							 
							
							
							
						 
						
							2023-10-18 13:13:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								b9194c5786 
								
							 
						 
						
							
							
								
								LLM: skip some model tests using certain api ( #9163 )  
							
							 
							
							... 
							
							
							
							* LLM: Skip some model tests using certain api
* initialize variable named result 
							
						 
						
							2023-10-18 09:39:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								09815f7064 
								
							 
						 
						
							
							
								
								LLM: fix RMSNorm optimization of Baichuan2-13B/Baichuan-13B ( #9204 )  
							
							 
							
							... 
							
							
							
							* fix rmsnorm of baichuan2-13B
* update baichuan1-13B too
* fix style 
							
						 
						
							2023-10-17 18:40:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								d7ce78edf0 
								
							 
						 
						
							
							
								
								LLM: fix portable zip README image link ( #9201 )  
							
							 
							
							... 
							
							
							
							* LLM: fix portable zip readme img link
* LLM: make README first image center align 
							
						 
						
							2023-10-17 16:38:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								66c2e45634 
								
							 
						 
						
							
							
								
								Add unit tests for optimized model correctness ( #9151 )  
							
							 
							
							... 
							
							
							
							* Add test to check correctness of optimized model
* Refactor optimized model test
* Use models in llm-unit-test
* Use AutoTokenizer for bloom
* Print out each passed test
* Remove unused tokenizer from import 
							
						 
						
							2023-10-17 14:46:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								d946bd7c55 
								
							 
						 
						
							
							
								
								LLM: add CPU More-Data-Types and Save-Load examples ( #9179 )  
							
							 
							
							
							
						 
						
							2023-10-17 14:38:52 +08:00