Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1aa0c623ce 
								
							 
						 
						
							
							
								
								disable fused layer norm on UHD ( #10130 )  
							
							 
							
							
							
						 
						
							2024-02-08 10:20:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								a8450fc300 
								
							 
						 
						
							
							
								
								[LLM] Support MLP optimization for Qwen1.5 ( #10123 )  
							
							 
							
							
							
						 
						
							2024-02-08 09:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								81ed65fbe7 
								
							 
						 
						
							
							
								
								[LLM] Add qwen1.5-7B in iGPU perf ( #10127 )  
							
							 
							
							... 
							
							
							
							* Add qwen1.5 test config yaml with transformers 4.37.0
* Update for yaml file 
							
						 
						
							2024-02-07 22:31:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0fcfbfaf6f 
								
							 
						 
						
							
							
								
								LLM: add rwkv5 eagle GPU HF example ( #10122 )  
							
							 
							
							... 
							
							
							
							* LLM: add rwkv5 eagle example
* fix
* fix link 
							
						 
						
							2024-02-07 16:58:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								925f82107e 
								
							 
						 
						
							
							
								
								LLM: support models hosted by modelscope ( #10106 )  
							
							 
							
							
							
						 
						
							2024-02-07 16:46:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								c1ec3d8921 
								
							 
						 
						
							
							
								
								LLM: update FAQ about too many open files ( #10119 )  
							
							 
							
							
							
						 
						
							2024-02-07 15:02:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								2e80701f58 
								
							 
						 
						
							
							
								
								Unit test on final logits and the logits of the last attention layer ( #10093 )  
							
							 
							
							... 
							
							
							
							* Add unit test on final logits and attention
* Add unit test on final logits and attention
* Modify unit test on final logits and attention 
							
						 
						
							2024-02-07 14:25:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								3832eb0ce0 
								
							 
						 
						
							
							
								
								Add ChatGLM C-Eval Evaluator ( #10095 )  
							
							 
							
							... 
							
							
							
							* Add ChatGLM ceval evaluator
* Modify ChatGLM Evaluator Reference 
							
						 
						
							2024-02-07 11:27:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								63050c954d 
								
							 
						 
						
							
							
								
								fix ( #10117 )  
							
							 
							
							
							
						 
						
							2024-02-07 11:05:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								d3d2ee1b63 
								
							 
						 
						
							
							
								
								LLM: add speech T5 GPU example ( #10090 )  
							
							 
							
							... 
							
							
							
							* add speech t5 example
* fix
* fix 
							
						 
						
							2024-02-07 10:50:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								2f4c754759 
								
							 
						 
						
							
							
								
								LLM: add bark gpu example ( #10091 )  
							
							 
							
							... 
							
							
							
							* add bark gpu example
* fix
* fix license
* add bark
* add example
* fix
* another way 
							
						 
						
							2024-02-07 10:47:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								8953acd7d6 
								
							 
						 
						
							
							
								
								[LLM] Fix log condition for BIGDL_OPT_IPEX ( #10115 )  
							
							 
							
							... 
							
							
							
							Fix log condition for BIGDL_OPT_IPEX 
							
						 
						
							2024-02-07 10:27:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0eccb94d75 
								
							 
						 
						
							
							
								
								remove text-generation-webui from bigdl repo ( #10107 )  
							
							 
							
							
							
						 
						
							2024-02-06 17:46:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								2aaa21c41d 
								
							 
						 
						
							
							
								
								LLM: Update ppl tests ( #10092 )  
							
							 
							
							... 
							
							
							
							* update ppl tests
* use load_dataset api
* add exception handling
* add language argument
* address comments 
							
						 
						
							2024-02-06 17:31:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								3a46b57253 
								
							 
						 
						
							
							
								
								[LLM] Add RWKV4 HF GPU Example ( #10105 )  
							
							 
							
							... 
							
							
							
							* Add GPU HF example for RWKV 4
* Add link to rwkv4
* fix 
							
						 
						
							2024-02-06 16:30:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								518ef95abc 
								
							 
						 
						
							
							
								
								Small fix for Nonetype error ( #10104 )  
							
							 
							
							
							
						 
						
							2024-02-06 14:58:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d61f4905ac 
								
							 
						 
						
							
							
								
								LLM: 2bit quantization initial support ( #10042 )  
							
							 
							
							... 
							
							
							
							* basis quantize support
* fix new module name
* small update
* and mixed int4 with iq2_xxs
* remove print
* code refactor
* fix style
* meet code review 
							
						 
						
							2024-02-06 14:58:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								36c9442c6d 
								
							 
						 
						
							
							
								
								Arc Stable version test ( #10087 )  
							
							 
							
							... 
							
							
							
							* add batch_size in stable version test
* add batch_size in excludes
* add excludes for batch_size
* fix ci
* triger regression test
* fix xpu version
* disable ci
* address kai's comment
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-02-06 10:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								33b9e7744d 
								
							 
						 
						
							
							
								
								fix dimension ( #10097 )  
							
							 
							
							
							
						 
						
							2024-02-05 15:07:38 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								4b02ff188b 
								
							 
						 
						
							
							
								
								[WebUI] Add prompt format and stopping words for Qwen ( #10066 )  
							
							 
							
							... 
							
							
							
							* add prompt format and stopping_words for qwen mdoel
* performance optimization
* optimize
* update
* meet comments 
							
						 
						
							2024-02-05 18:23:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								0aecd8637b 
								
							 
						 
						
							
							
								
								LLM: small fix for the html script ( #10094 )  
							
							 
							
							
							
						 
						
							2024-02-05 17:27:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								7d2be7994f 
								
							 
						 
						
							
							
								
								add phixtral and optimize phi-moe ( #10052 )  
							
							 
							
							
							
						 
						
							2024-02-05 11:12:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								676d6923f2 
								
							 
						 
						
							
							
								
								LLM: modify transformersembeddings.embed() in langchain ( #10051 )  
							
							 
							
							
							
						 
						
							2024-02-05 10:42:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								ad050107b3 
								
							 
						 
						
							
							
								
								LLM: fix mpt load_low_bit issue ( #10075 )  
							
							 
							
							... 
							
							
							
							* fix
* retry
* retry 
							
						 
						
							2024-02-05 10:17:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								9050991e4e 
								
							 
						 
						
							
							
								
								fix gradio check issue temply ( #10082 )  
							
							 
							
							
							
						 
						
							2024-02-04 16:46:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								c2e562d037 
								
							 
						 
						
							
							
								
								LLM: add batch_size to the csv and html ( #10080 )  
							
							 
							
							... 
							
							
							
							* LLM: add batch_size to the csv and html
* small fix 
							
						 
						
							2024-02-04 16:35:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								7e49fbc5dd 
								
							 
						 
						
							
							
								
								LLM: make finetuning examples more common for other models ( #10078 )  
							
							 
							
							
							
						 
						
							2024-02-04 16:03:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								90f004b80b 
								
							 
						 
						
							
							
								
								remove benchmarkwrapper form deepspeed example ( #10079 )  
							
							 
							
							
							
						 
						
							2024-02-04 15:42:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								8e33cb0f38 
								
							 
						 
						
							
							
								
								LLM: support speecht5_tts ( #10077 )  
							
							 
							
							... 
							
							
							
							* support speecht5_tts
* fix 
							
						 
						
							2024-02-04 13:26:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ivy-lv11 
								
							 
						 
						
							
							
							
							
								
							
							
								428b7105f6 
								
							 
						 
						
							
							
								
								Add HF and PyTorch example InternLM2 ( #10061 )  
							
							 
							
							
							
						 
						
							2024-02-04 10:25:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								77be19bb97 
								
							 
						 
						
							
							
								
								LLM: Support gpt-j in speculative decoding ( #10067 )  
							
							 
							
							... 
							
							
							
							* gptj
* support gptj in speculative decoding
* fix
* update readme
* small fix 
							
						 
						
							2024-02-02 14:54:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								19183ef476 
								
							 
						 
						
							
							
								
								[WebUI] Reset bigdl-llm loader options with default value ( #10064 )  
							
							 
							
							... 
							
							
							
							* reset bigdl-llm loader options with default value
* remove options which maybe complex for naive users 
							
						 
						
							2024-02-01 15:45:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								6e0f1a1e92 
								
							 
						 
						
							
							
								
								use apply_rotary_pos_emb_cache_freq_xpu in mixtral ( #10060 )  
							
							 
							
							... 
							
							
							
							* use apply_rotary_pos_emb_cache_freq_xpu in mixtral
* fix style 
							
						 
						
							2024-02-01 15:40:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								aae20d728e 
								
							 
						 
						
							
							
								
								LLM: Add initial DPO finetuning example ( #10021 )  
							
							 
							
							
							
						 
						
							2024-02-01 14:18:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								601024f418 
								
							 
						 
						
							
							
								
								Mistral CPU example of speculative decoding ( #10024 )  
							
							 
							
							... 
							
							
							
							* Mistral CPU example of speculative decoding
* update transformres version
* update example
* Update README.md 
							
						 
						
							2024-02-01 10:52:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								968e70544d 
								
							 
						 
						
							
							
								
								Enable IPEX Mistral in Speculative ( #10059 )  
							
							 
							
							
							
						 
						
							2024-02-01 10:48:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								3ca03d4e97 
								
							 
						 
						
							
							
								
								Add deepmind sample into bigdl-llm speculative decoding ( #10041 )  
							
							 
							
							... 
							
							
							
							* migrate deepmind sample
* update
* meet comments
* fix style
* fix style 
							
						 
						
							2024-02-01 09:57:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								d2d3f6b091 
								
							 
						 
						
							
							
								
								LLM: ensure the result of daily arc perf test ( #10016 )  
							
							 
							
							... 
							
							
							
							* ensure the result of daily arc perf test
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* concat more csvs
* small fix
* revert some files 
							
						 
						
							2024-01-31 18:26:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								9724939499 
								
							 
						 
						
							
							
								
								temporarily disable bloom 2k input ( #10056 )  
							
							 
							
							
							
						 
						
							2024-01-31 17:49:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								8c8fc148c9 
								
							 
						 
						
							
							
								
								LLM: add rwkv 5 ( #10048 )  
							
							 
							
							
							
						 
						
							2024-01-31 15:54:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								a9018a0e95 
								
							 
						 
						
							
							
								
								LLM: modify the GPU example for redpajama model ( #10044 )  
							
							 
							
							... 
							
							
							
							* LLM: modify the GPU example for redpajama model
* small fix 
							
						 
						
							2024-01-31 14:32:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								95636cad97 
								
							 
						 
						
							
							
								
								Add AutoGen CPU and XPU Example ( #9980 )  
							
							 
							
							... 
							
							
							
							* Add AutoGen example
* Adjust AutoGen README
* Adjust AutoGen README
* Change AutoGen README
* Change AutoGen README 
							
						 
						
							2024-01-31 11:31:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								7284edd9b7 
								
							 
						 
						
							
							
								
								Vicuna CPU example of speculative decoding ( #10018 )  
							
							 
							
							... 
							
							
							
							* Vicuna CPU example of speculative decoding
* Update speculative.py
* Update README.md
* add requirements for ipex
* Update README.md
* Update speculative.py
* Update speculative.py 
							
						 
						
							2024-01-31 11:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								7e5cd42a5c 
								
							 
						 
						
							
							
								
								LLM : Update optimize ipex bf16 ( #10038 )  
							
							 
							
							... 
							
							
							
							* use 4.35.2 and remove
* update rmsnorm
* remove
* remove
* update python style
* update
* update python style
* update
* fix style
* update
* remove whitespace 
							
						 
						
							2024-01-31 10:59:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								fb53b994f8 
								
							 
						 
						
							
							
								
								LLM : Add llama ipex optimized ( #10046 )  
							
							 
							
							... 
							
							
							
							* init ipex
* remove padding 
							
						 
						
							2024-01-31 10:38:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3685622f29 
								
							 
						 
						
							
							
								
								LLM: fix llama 4.36 forward( #10047 )  
							
							 
							
							
							
						 
						
							2024-01-31 10:31:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								53a5140eff 
								
							 
						 
						
							
							
								
								Optimize rwkv v5 rest token again ( #10043 )  
							
							 
							
							
							
						 
						
							2024-01-31 10:01:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								b1ff28ceb6 
								
							 
						 
						
							
							
								
								LLama2 CPU example of speculative decoding ( #9962 )  
							
							 
							
							... 
							
							
							
							* LLama2 example of speculative decoding
* add docs
* Update speculative.py
* Update README.md
* Update README.md
* Update speculative.py
* remove autocast 
							
						 
						
							2024-01-31 09:45:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								0fcad6ce14 
								
							 
						 
						
							
							
								
								LLM: add gpu example for redpajama models ( #10040 )  
							
							 
							
							
							
						 
						
							2024-01-30 19:39:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								9978089796 
								
							 
						 
						
							
							
								
								[LLM] Enable BIGDL_OPT_IPEX in speculative baichuan2 13b example  ( #10028 )  
							
							 
							
							... 
							
							
							
							Enable BIGDL_OPT_IPEX in speculative baichuan2 13b example 
							
						 
						
							2024-01-30 17:11:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								226f398c2a 
								
							 
						 
						
							
							
								
								fix ppl test errors ( #10036 )  
							
							 
							
							
							
						 
						
							2024-01-30 16:26:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								13e61738c5 
								
							 
						 
						
							
							
								
								hide detail memory for each token in benchmark_utils.py ( #10037 )  
							
							 
							
							
							
						 
						
							2024-01-30 16:04:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6b63ba23d1 
								
							 
						 
						
							
							
								
								LLM: add full module name during convert ( #10035 )  
							
							 
							
							
							
						 
						
							2024-01-30 14:43:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7dfa6dbe46 
								
							 
						 
						
							
							
								
								add rwkv time shift optimization ( #10032 )  
							
							 
							
							
							
						 
						
							2024-01-30 14:10:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								f57d0fda8b 
								
							 
						 
						
							
							
								
								[LLM] Use IPEX Optimization for Self Speculative Decoding ( #9997 )  
							
							 
							
							... 
							
							
							
							Use IPEX Optimization for Self Speculative Decoding 
							
						 
						
							2024-01-30 09:11:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ccf8f613fb 
								
							 
						 
						
							
							
								
								LLM: update fp16 Linear on ARC/FLEX ( #10023 )  
							
							 
							
							
							
						 
						
							2024-01-29 18:25:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								824c8029d7 
								
							 
						 
						
							
							
								
								Fix "local variable 'model' referenced before assignment" ( #10022 )  
							
							 
							
							
							
						 
						
							2024-01-29 16:18:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								cc3f122f6a 
								
							 
						 
						
							
							
								
								Baichuan2 CPU example of speculative decoding ( #10003 )  
							
							 
							
							... 
							
							
							
							* Baichuan2 CPU example of speculative decoding
* Update generate.py
* Update README.md
* Update generate.py
* Update generate.py
* Update generate.py
* fix default model
* fix wrong chinese coding
* Update generate.py
* update prompt
* update sample outputs
* baichuan 7b needs transformers==4.31.0
* rename example file's name 
							
						 
						
							2024-01-29 14:21:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								f37e4702bc 
								
							 
						 
						
							
							
								
								[LLM] Use IPEX Optimization for BF16 Model ( #9988 )  
							
							 
							
							... 
							
							
							
							Use IPEX Optimization for BF16 Model by env BIGDL_OPT_IPEX=true 
							
						 
						
							2024-01-29 11:28:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								440cfe18ed 
								
							 
						 
						
							
							
								
								LLM: GPU Example Updates for Windows ( #9992 )  
							
							 
							
							... 
							
							
							
							* modify aquila
* modify aquila2
* add baichuan
* modify baichuan2
* modify blue-lm
* modify chatglm3
* modify chinese-llama2
* modiy codellama
* modify distil-whisper
* modify dolly-v1
* modify dolly-v2
* modify falcon
* modify flan-t5
* modify gpt-j
* modify internlm
* modify llama2
* modify mistral
* modify mixtral
* modify mpt
* modify phi-1_5
* modify qwen
* modify qwen-vl
* modify replit
* modify solar
* modify starcoder
* modify vicuna
* modify voiceassistant
* modify whisper
* modify yi
* modify aquila2
* modify baichuan
* modify baichuan2
* modify blue-lm
* modify chatglm2
* modify chatglm3
* modify codellama
* modify distil-whisper
* modify dolly-v1
* modify dolly-v2
* modify flan-t5
* modify llama2
* modify llava
* modify mistral
* modify mixtral
* modify phi-1_5
* modify qwen-vl
* modify replit
* modify solar
* modify starcoder
* modify yi
* correct the comments
* remove cpu_embedding in code for whisper and distil-whisper
* remove comment
* remove cpu_embedding for voice assistant
* revert modify voice assistant
* modify for voice assistant
* add comment for voice assistant
* fix comments
* fix comments 
							
						 
						
							2024-01-29 11:25:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								c6d4f91777 
								
							 
						 
						
							
							
								
								[LLM] Add UTs of load_low_bit for transformers-style API ( #10001 )  
							
							 
							
							... 
							
							
							
							* Add uts for transformers api load_low_bit generation
* Small fixes
* Remove replit-code for CPU tests due to current load_low_bit issue on MPT
* Small change
* Small reorganization to llm unit tests on CPU
* Small fixes 
							
						 
						
							2024-01-29 10:18:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d720554d43 
								
							 
						 
						
							
							
								
								simplify quantize kv cache api ( #10011 )  
							
							 
							
							
							
						 
						
							2024-01-29 09:23:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								a3322e2a6c 
								
							 
						 
						
							
							
								
								add fp8 e5 to use_xmx ( #10015 )  
							
							 
							
							
							
						 
						
							2024-01-26 18:29:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
							
							
								
							
							
								9e18ea187f 
								
							 
						 
						
							
							
								
								[LLM] Avoid KV Cache OOM when seq len is larger than 1 ( #10006 )  
							
							 
							
							... 
							
							
							
							* Avoid OOM during muti-round streaming chat with kv cache
* For llama like kv cache, i.e., [bs, n_head, seq_len, head_dim], use is_enough_kv_cache_room_4_31.
* Other models need to compare kv cache size with kv_len. 
							
						 
						
							2024-01-26 17:30:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								e5ae6f2c13 
								
							 
						 
						
							
							
								
								LLM: fix truncation logic of past_key_values in chatglm multi turn chat ( #10007 )  
							
							 
							
							... 
							
							
							
							* Avoid frequently truncating past_key_values  when its length is larger than required. 
							
						 
						
							2024-01-26 16:56:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								1eaaace2dc 
								
							 
						 
						
							
							
								
								Update perf test all-in-one config for batch_size arg ( #10012 )  
							
							 
							
							
							
						 
						
							2024-01-26 16:46:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								7952bbc919 
								
							 
						 
						
							
							
								
								add conf batch_size to run_model ( #10010 )  
							
							 
							
							
							
						 
						
							2024-01-26 15:48:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								421e7cee80 
								
							 
						 
						
							
							
								
								[LLM] Add Text_Generation_WebUI Support ( #9884 )  
							
							 
							
							... 
							
							
							
							* initially add text_generation_webui support
* add env requirements install
* add necessary dependencies
* update for starting webui
* update shared and noted to place models
* update heading of part3
* meet comments
* add copyright license
* remove extensions
* convert tutorial to windows side
* add warm-up to optimize performance 
							
						 
						
							2024-01-26 15:12:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								f0da0c131b 
								
							 
						 
						
							
							
								
								Disable llama2 optimize model true or false test for now in Arc UTs ( #10008 )  
							
							 
							
							
							
						 
						
							2024-01-26 14:42:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a00efa0564 
								
							 
						 
						
							
							
								
								LLM: add mlp & qkv fusion for FP16 Llama-7B ( #9932 )  
							
							 
							
							... 
							
							
							
							* add mlp fusion for llama
* add mlp fusion
* fix style
* update
* add mm_qkv_out
* fix style
* update
* meet code review
* meet code review 
							
						 
						
							2024-01-26 11:50:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								98ea3459e5 
								
							 
						 
						
							
							
								
								LLM : Fix llama draft_model dtype error ( #10005 )  
							
							 
							
							... 
							
							
							
							* fix llama draft_model dtype error
* updat 
							
						 
						
							2024-01-26 10:59:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								aae1870096 
								
							 
						 
						
							
							
								
								fix qwen kv cache length ( #9998 )  
							
							 
							
							
							
						 
						
							2024-01-26 10:15:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								762adc4f9d 
								
							 
						 
						
							
							
								
								Reformat summary table ( #9942 )  
							
							 
							
							... 
							
							
							
							* reformat the table
* refactor the file
* read result.json only 
							
						 
						
							2024-01-25 23:49:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								171fb2d185 
								
							 
						 
						
							
							
								
								LLM: reorganize GPU finetuning examples ( #9952 )  
							
							 
							
							
							
						 
						
							2024-01-25 19:02:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								24b34b6e46 
								
							 
						 
						
							
							
								
								change xmx condition ( #10000 )  
							
							 
							
							
							
						 
						
							2024-01-25 17:48:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								8b08ad408b 
								
							 
						 
						
							
							
								
								Add batch_size in all_in_one ( #9999 )  
							
							 
							
							... 
							
							
							
							Add batch_size in all_in_one, except run_native_int4 
							
						 
						
							2024-01-25 17:43:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								093e6f8f73 
								
							 
						 
						
							
							
								
								LLM: Add qwen CPU speculative example ( #9985 )  
							
							 
							
							... 
							
							
							
							* init from gpu
* update for cpu
* update
* update
* fix xpu readme
* update
* update example prompt
* update prompt and add 72b
* update
* update 
							
						 
						
							2024-01-25 17:01:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bf65548d29 
								
							 
						 
						
							
							
								
								Add quantize kv cache support for chaglm2/3 ( #9996 )  
							
							 
							
							
							
						 
						
							2024-01-25 16:55:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								86055d76d5 
								
							 
						 
						
							
							
								
								fix optimize_model not working ( #9995 )  
							
							 
							
							
							
						 
						
							2024-01-25 16:39:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								9bff84e6fd 
								
							 
						 
						
							
							
								
								LLM: Convert draft_model kv_cache from bf16 to fp32 ( #9964 )  
							
							 
							
							... 
							
							
							
							* convert bf16 to fp32
* update
* change when init
* init first and cut off after
* init and exchange
* update python type
* update
* fix bug
* update
* update 
							
						 
						
							2024-01-25 11:20:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								99ff6cf048 
								
							 
						 
						
							
							
								
								Update gpu spec decoding baichuan2 example dependency ( #9990 )  
							
							 
							
							... 
							
							
							
							* add dependency
* update
* update 
							
						 
						
							2024-01-25 11:05:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								27338540c3 
								
							 
						 
						
							
							
								
								Fix repetition_penalty not activated issue ( #9989 )  
							
							 
							
							
							
						 
						
							2024-01-25 10:40:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								3bc3d0bbcd 
								
							 
						 
						
							
							
								
								Update self-speculative readme ( #9986 )  
							
							 
							
							
							
						 
						
							2024-01-24 22:37:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								b27e5a27b9 
								
							 
						 
						
							
							
								
								Remove the check for meta device in _replace_with_low_bit_linear ( #9984 )  
							
							 
							
							
							
						 
						
							2024-01-24 18:15:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d4f65a6033 
								
							 
						 
						
							
							
								
								LLM: add mistral speculative example ( #9976 )  
							
							 
							
							... 
							
							
							
							* add mistral example
* update 
							
						 
						
							2024-01-24 17:35:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								b176cad75a 
								
							 
						 
						
							
							
								
								LLM: Add baichuan2 gpu spec example ( #9973 )  
							
							 
							
							... 
							
							
							
							* add baichuan2 gpu spec example
* update readme & example
* remove print
* fix typo
* meet comments
* revert
* update 
							
						 
						
							2024-01-24 16:40:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinyi Wan 
								
							 
						 
						
							
							
							
							
								
							
							
								ec2d9de0ea 
								
							 
						 
						
							
							
								
								Fix README.md for solar ( #9957 )  
							
							 
							
							
							
						 
						
							2024-01-24 15:50:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Mingyu Wei 
								
							 
						 
						
							
							
							
							
								
							
							
								bc9cff51a8 
								
							 
						 
						
							
							
								
								LLM GPU Example Update for Windows Support ( #9902 )  
							
							 
							
							... 
							
							
							
							* Update README in LLM GPU Examples
* Update reference of Intel GPU
* add cpu_embedding=True in comment
* small fixes
* update GPU/README.md and add explanation for cpu_embedding=True
* address comments
* fix small typos
* add backtick for cpu_embedding=True
* remove extra backtick in the doc
* add period mark
* update readme 
							
						 
						
							2024-01-24 13:42:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								e0db44dcb6 
								
							 
						 
						
							
							
								
								fix unexpected keyword argument 'device'  ( #9982 )  
							
							 
							
							... 
							
							
							
							* add device for chatglm3 only
* add comment for this change
* fix style
* fix style
* fix style again..
* finally fixed style 
							
						 
						
							2024-01-24 13:20:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Mingyu Wei 
								
							 
						 
						
							
							
							
							
								
							
							
								50a851e3b3 
								
							 
						 
						
							
							
								
								LLM: separate arc ut for disable XMX ( #9953 )  
							
							 
							
							... 
							
							
							
							* separate test_optimize_model api with disabled xmx
* delete test_optimize_model in test_transformers_api.py
* set env variable in .sh/ put back test_optimize_model
* unset env variable
* remove env setting in .py
* address errors in action
* remove import ipex
* lower tolerance 
							
						 
						
							2024-01-23 19:04:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								8d28aa8e2b 
								
							 
						 
						
							
							
								
								[LLM] Fix the model.device problem when cpu_embedding=True ( #9971 )  
							
							 
							
							... 
							
							
							
							* Overwrite the device attribute for CPUPinnedParam
* Expose cpu_embedding=True for Linux users
* Fix python style 
							
						 
						
							2024-01-23 18:51:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f82782cd3b 
								
							 
						 
						
							
							
								
								fix starcoder ( #9975 )  
							
							 
							
							
							
						 
						
							2024-01-23 17:24:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								be5836bee1 
								
							 
						 
						
							
							
								
								LLM: fix outlier value ( #9945 )  
							
							 
							
							... 
							
							
							
							* fix outlier value
* small fix 
							
						 
						
							2024-01-23 17:04:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								2c8a9aaf0d 
								
							 
						 
						
							
							
								
								fix qwen causal mask when quantize_kv_cache=True ( #9968 )  
							
							 
							
							
							
						 
						
							2024-01-23 16:34:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								5aa4b32c1b 
								
							 
						 
						
							
							
								
								LLM: Add qwen spec gpu example ( #9965 )  
							
							 
							
							... 
							
							
							
							* add qwen spec gpu example
* update readme
---------
Co-authored-by: rnwang04 <ruonan1.wang@intel.com> 
							
						 
						
							2024-01-23 15:59:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								36c665667d 
								
							 
						 
						
							
							
								
								Add logits processor & qwen eos stop in speculative decoding ( #9963 )  
							
							 
							
							... 
							
							
							
							* add logits processor & qwen eos
* fix style
* fix
* fix
* fix style
* fix style
* support transformers 4.31
* fix style
* fix style
---------
Co-authored-by: rnwang04 <ruonan1.wang@intel.com> 
							
						 
						
							2024-01-23 15:57:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								60b35db1f1 
								
							 
						 
						
							
							
								
								LLM: add chatglm3 speculative decoding example ( #9966 )  
							
							 
							
							... 
							
							
							
							* add chatglm3 example
* update
* fix 
							
						 
						
							2024-01-23 15:54:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								da4687c917 
								
							 
						 
						
							
							
								
								fix fp16 ( #9970 )  
							
							 
							
							
							
						 
						
							2024-01-23 15:53:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								301425e377 
								
							 
						 
						
							
							
								
								harness tests on pvc multiple xpus ( #9908 )  
							
							 
							
							... 
							
							
							
							* add run_multi_llb.py
* update readme
* add job hint 
							
						 
						
							2024-01-23 13:20:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								27b19106f3 
								
							 
						 
						
							
							
								
								LLM: add readme for speculative decoding gpu examples ( #9961 )  
							
							 
							
							... 
							
							
							
							* add readme
* add readme
* meet code review 
							
						 
						
							2024-01-23 12:54:19 +08:00