Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8f45e22072 
								
							 
						 
						
							
							
								
								fix llama2 ( #10710 )  
							
							 
							
							
							
						 
						
							2024-04-09 17:28:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e438f941f2 
								
							 
						 
						
							
							
								
								disable rwkv5 fp16 ( #10699 )  
							
							 
							
							
							
						 
						
							2024-04-09 16:42:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6a32216269 
								
							 
						 
						
							
							
								
								LLM: add llama2 8k input example. ( #10696 )  
							
							 
							
							... 
							
							
							
							* LLM: add llama2-32K example.
* refactor name.
* fix comments.
* add IPEX_LLM_LOW_MEM notes and update sample output. 
							
						 
						
							2024-04-09 16:02:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								289cc99cd6 
								
							 
						 
						
							
							
								
								Update README.md ( #10700 )  
							
							 
							
							... 
							
							
							
							Edit "summarize the results" 
							
						 
						
							2024-04-09 16:01:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wenjing Margaret Mao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d3116de0db 
								
							 
						 
						
							
							
								
								Update README.md ( #10701 )  
							
							 
							
							... 
							
							
							
							edit "summarize the results" 
							
						 
						
							2024-04-09 15:50:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d59e0cce5c 
								
							 
						 
						
							
							
								
								Migrate harness to ipexllm ( #10703 )  
							
							 
							
							... 
							
							
							
							* migrate to ipexlm
* fix workflow
* fix run_multi
* fix precision map
* rename ipexlm to ipexllm
* rename bigdl to ipex  in comments 
							
						 
						
							2024-04-09 15:48:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1e27e08322 
								
							 
						 
						
							
							
								
								Modify example from fp32 to fp16 ( #10528 )  
							
							 
							
							... 
							
							
							
							* Modify example from fp32 to fp16
* Remove Falcon from fp16 example for now
* Remove MPT from fp16 example 
							
						 
						
							2024-04-09 15:45:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								44922bb5c2 
								
							 
						 
						
							
							
								
								LLM: support baichuan2-13b using AutoTP ( #10691 )  
							
							 
							
							
							
						 
						
							2024-04-09 14:06:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c7422712fc 
								
							 
						 
						
							
							
								
								mistral 4.36 use fp16 sdp ( #10704 )  
							
							 
							
							
							
						 
						
							2024-04-09 13:50:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dcb2038aad 
								
							 
						 
						
							
							
								
								Enable optimization for sentence_transformers ( #10679 )  
							
							 
							
							... 
							
							
							
							* enable optimization for sentence_transformers
* fix python style check failure 
							
						 
						
							2024-04-09 12:33:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5a1f446d3c 
								
							 
						 
						
							
							
								
								support fp8 in xetla ( #10555 )  
							
							 
							
							... 
							
							
							
							* support fp8 in xetla
* change name
* adjust model file
* support convert back to cpu
* factor
* fix bug
* fix style 
							
						 
						
							2024-04-08 13:22:09 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									jenniew 
								
							 
						 
						
							
							
							
							
								
							
							
								591bae092c 
								
							 
						 
						
							
							
								
								combine english and chinese, remove nan  
							
							 
							
							
							
						 
						
							2024-04-08 19:37:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7c43ac0164 
								
							 
						 
						
							
							
								
								LLM: optimize llama natvie sdp for split qkv tensor ( #10693 )  
							
							 
							
							... 
							
							
							
							* LLM: optimize llama natvie sdp for split qkv tensor.
* fix block real size.
* fix comment.
* fix style.
* refactor. 
							
						 
						
							2024-04-08 17:48:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1274cba79b 
								
							 
						 
						
							
							
								
								stablelm fp8 kv cache ( #10672 )  
							
							 
							
							... 
							
							
							
							* stablelm fp8 kvcache
* update
* fix
* change to fp8 matmul
* fix style
* fix
* fix
* meet code review
* add comment 
							
						 
						
							2024-04-08 15:16:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								65127622aa 
								
							 
						 
						
							
							
								
								fix UT threshold ( #10689 )  
							
							 
							
							
							
						 
						
							2024-04-08 14:58:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c0cd238e40 
								
							 
						 
						
							
							
								
								LLM: support llama2 8k input with w4a16. ( #10677 )  
							
							 
							
							... 
							
							
							
							* LLM: support llama2 8k input with w4a16.
* fix comment and style.
* fix style.
* fix comments and split tensor to quantized attention forward.
* fix style.
* refactor name.
* fix style.
* fix style.
* fix style.
* refactor checker name.
* refactor native sdp split qkv tensor name.
* fix style.
* fix comment rename variables.
* fix co-exist of intermedia results. 
							
						 
						
							2024-04-08 11:43:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								321bc69307 
								
							 
						 
						
							
							
								
								Fix llamaindex ut ( #10673 )  
							
							 
							
							... 
							
							
							
							* fix llamaindex ut
* add GPU ut 
							
						 
						
							2024-04-08 09:47:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2d88bb9b4b 
								
							 
						 
						
							
							
								
								add test api transformer_int4_fp16_gpu ( #10627 )  
							
							 
							
							... 
							
							
							
							* add test api transformer_int4_fp16_gpu
* update config.yaml and README.md in all-in-one
* modify run.py in all-in-one
* re-order test-api
* re-order test-api in config
* modify README.md in all-in-one
* modify README.md in all-in-one
* modify config.yaml
---------
Co-authored-by: pengyb2001 <arda@arda-arc21.sh.intel.com>
Co-authored-by: ivy-lv11 <zhicunlv@gmail.com> 
							
						 
						
							2024-04-07 15:47:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								47cabe8fcc 
								
							 
						 
						
							
							
								
								LLM: Fix no return_last_logit running bigdl_ipex chatglm3 ( #10678 )  
							
							 
							
							... 
							
							
							
							* fix no return_last_logits
* update only for chatglm 
							
						 
						
							2024-04-07 15:27:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9ad4b29697 
								
							 
						 
						
							
							
								
								LLM: CPU benchmark using tcmalloc ( #10675 )  
							
							 
							
							
							
						 
						
							2024-04-07 14:17:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d9a1153b4e 
								
							 
						 
						
							
							
								
								LLM: upgrade deepspeed in AutoTP on GPU ( #10647 )  
							
							 
							
							
							
						 
						
							2024-04-07 14:05:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								56dfcb2ade 
								
							 
						 
						
							
							
								
								Migrate portable zip to ipex-llm ( #10617 )  
							
							 
							
							... 
							
							
							
							* change portable zip prompt to ipex-llm
* fix chat with ui
* add no proxy 
							
						 
						
							2024-04-07 13:58:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9d8ba64c0d 
								
							 
						 
						
							
							
								
								Llamaindex: add tokenizer_id and support chat ( #10590 )  
							
							 
							
							... 
							
							
							
							* add tokenizer_id
* fix
* modify
* add from_model_id and from_mode_id_low_bit
* fix typo and add comment
* fix python code style
---------
Co-authored-by: pengyb2001 <284261055@qq.com> 
							
						 
						
							2024-04-07 13:51:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								10ee786920 
								
							 
						 
						
							
							
								
								Replace with IPEX-LLM in example comments ( #10671 )  
							
							 
							
							... 
							
							
							
							* Replace with IPEX-LLM in example comments
* More replacement
* revert some changes 
							
						 
						
							2024-04-07 13:29:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								08018a18df 
								
							 
						 
						
							
							
								
								Remove not-imported MistralConfig ( #10670 )  
							
							 
							
							
							
						 
						
							2024-04-07 10:32:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1a9b8204a4 
								
							 
						 
						
							
							
								
								LLM: support int4 fp16 chatglm2-6b 8k input. ( #10648 )  
							
							 
							
							
							
						 
						
							2024-04-07 09:39:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69bdbf5806 
								
							 
						 
						
							
							
								
								Fix vllm print error message issue ( #10664 )  
							
							 
							
							... 
							
							
							
							* update chatglm readme
* Add condition to invalidInputError
* update
* update
* style 
							
						 
						
							2024-04-05 15:08:13 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								29d97e4678 
								
							 
						 
						
							
							
								
								Update readme ( #10665 )  
							
							 
							
							
							
						 
						
							2024-04-05 18:01:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4c3e493b2d 
								
							 
						 
						
							
							
								
								fix stablelm2 1.6b ( #10656 )  
							
							 
							
							... 
							
							
							
							* fix stablelm2 1.6b
* meet code review 
							
						 
						
							2024-04-03 22:15:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cc8b3be11c 
								
							 
						 
						
							
							
								
								Add GPU and CPU example for stablelm-zephyr-3b ( #10643 )  
							
							 
							
							... 
							
							
							
							* Add example for StableLM
* fix
* add to readme 
							
						 
						
							2024-04-03 16:28:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6000241b10 
								
							 
						 
						
							
							
								
								Add Deepspeed Example of FLEX Mistral ( #10640 )  
							
							 
							
							
							
						 
						
							2024-04-03 16:04:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d18dbfb097 
								
							 
						 
						
							
							
								
								update spr perf test ( #10644 )  
							
							 
							
							
							
						 
						
							2024-04-03 15:53:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								702e686901 
								
							 
						 
						
							
							
								
								optimize starcoder normal kv cache ( #10642 )  
							
							 
							
							
							
						 
						
							2024-04-03 15:27:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3a9ab8f1ae 
								
							 
						 
						
							
							
								
								fix stablelm logits diff ( #10636 )  
							
							 
							
							... 
							
							
							
							* fix logits diff
* Small fixes
---------
Co-authored-by: Yuwen Hu <yuwen.hu@intel.com> 
							
						 
						
							2024-04-03 15:08:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b827f534d5 
								
							 
						 
						
							
							
								
								Add tokenizer_id in Langchain ( #10588 )  
							
							 
							
							... 
							
							
							
							* fix low-bit
* fix
* fix style
---------
Co-authored-by: arda <arda@arda-arc12.sh.intel.com> 
							
						 
						
							2024-04-03 14:25:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f6fef09933 
								
							 
						 
						
							
							
								
								fix prompt format for llama-2 in langchain ( #10637 )  
							
							 
							
							
							
						 
						
							2024-04-03 14:17:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								330d4b4f4b 
								
							 
						 
						
							
							
								
								update readme ( #10631 )  
							
							 
							
							
							
						 
						
							2024-04-02 23:08:02 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c875b3c858 
								
							 
						 
						
							
							
								
								Add seq len check for llama softmax upcast to fp32 ( #10629 )  
							
							 
							
							
							
						 
						
							2024-04-03 12:05:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4431134ec5 
								
							 
						 
						
							
							
								
								update readme ( #10632 )  
							
							 
							
							
							
						 
						
							2024-04-02 19:54:30 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								23e33a0ca1 
								
							 
						 
						
							
							
								
								Fix qwen-vl style ( #10633 )  
							
							 
							
							... 
							
							
							
							* update
* update 
							
						 
						
							2024-04-02 18:41:38 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2bbd8a1548 
								
							 
						 
						
							
							
								
								LLM: fix llama2 FP16 & bs>1 & autotp on PVC and ARC ( #10611 )  
							
							 
							
							
							
						 
						
							2024-04-03 09:28:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								654dc5ba57 
								
							 
						 
						
							
							
								
								Fix Qwen-VL example problem ( #10582 )  
							
							 
							
							... 
							
							
							
							* update
* update
* update
* update 
							
						 
						
							2024-04-02 12:17:30 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fd384ddfb8 
								
							 
						 
						
							
							
								
								Optimize StableLM ( #10619 )  
							
							 
							
							... 
							
							
							
							* Initial commit for stablelm optimizations
* Small style fix
* add dependency
* Add mlp optimizations
* Small fix
* add attention forward
* Remove quantize kv for now as head_dim=80
* Add merged qkv
* fix lisence
* Python style fix
---------
Co-authored-by: qiuxin2012 <qiuxin2012cs@gmail.com> 
							
						 
						
							2024-04-02 18:58:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								27be448920 
								
							 
						 
						
							
							
								
								LLM: add cpu_embedding and peak memory record for deepspeed autotp script ( #10621 )  
							
							 
							
							
							
						 
						
							2024-04-02 17:32:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ba8cc6bd68 
								
							 
						 
						
							
							
								
								optimize starcoder2-3b ( #10625 )  
							
							 
							
							
							
						 
						
							2024-04-02 17:16:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a10f5a1b8d 
								
							 
						 
						
							
							
								
								add python style check ( #10620 )  
							
							 
							
							... 
							
							
							
							* add python style check
* fix style checks
* update runner
* add ipex-llm-finetune-qlora-cpu-k8s to manually_build workflow
* update tag to 2.1.0-SNAPSHOT 
							
						 
						
							2024-04-02 16:17:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								58b57177e3 
								
							 
						 
						
							
							
								
								LLM: support bigdl quantize kv cache env and add warning. ( #10623 )  
							
							 
							
							... 
							
							
							
							* LLM: support bigdl quantize kv cache env and add warnning.
* fix style.
* fix comments. 
							
						 
						
							2024-04-02 15:41:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a95c556a1 
								
							 
						 
						
							
							
								
								Fix starcoder first token perf ( #10612 )  
							
							 
							
							... 
							
							
							
							* add bias check
* update 
							
						 
						
							2024-04-02 09:21:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e567956121 
								
							 
						 
						
							
							
								
								LLM: add memory optimization for llama. ( #10592 )  
							
							 
							
							... 
							
							
							
							* add initial memory optimization.
* fix logic.
* fix logic,
* remove env var check in mlp split. 
							
						 
						
							2024-04-02 09:07:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								01f491757a 
								
							 
						 
						
							
							
								
								Modify the link in Langchain-upstream ut ( #10608 )  
							
							 
							
							... 
							
							
							
							* Modify the link in Langchain-upstream ut
* fix langchain-upstream ut 
							
						 
						
							2024-04-01 17:03:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bfc1caa5e5 
								
							 
						 
						
							
							
								
								LLM: support iq1s for llama2-70b-hf ( #10596 )  
							
							 
							
							
							
						 
						
							2024-04-01 13:13:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d6af4877dd 
								
							 
						 
						
							
							
								
								LLM: remove ipex.optimize for gpt-j ( #10606 )  
							
							 
							
							... 
							
							
							
							* remove ipex.optimize
* fix
* fix 
							
						 
						
							2024-04-01 12:21:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								437a349dd6 
								
							 
						 
						
							
							
								
								fix rwkv with pip installer ( #10591 )  
							
							 
							
							
							
						 
						
							2024-03-29 17:56:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9a83f21b86 
								
							 
						 
						
							
							
								
								LLM: check user env ( #10580 )  
							
							 
							
							... 
							
							
							
							* LLM: check user env
* small fix
* small fix
* small fix 
							
						 
						
							2024-03-29 17:19:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								848fa04dd6 
								
							 
						 
						
							
							
								
								Fix typo in Baichuan2 example ( #10589 )  
							
							 
							
							
							
						 
						
							2024-03-29 13:31:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0136fad1d4 
								
							 
						 
						
							
							
								
								LLM: support iq1_s ( #10564 )  
							
							 
							
							... 
							
							
							
							* init version
* update utils
* remove unsed code 
							
						 
						
							2024-03-29 09:43:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f4537798c1 
								
							 
						 
						
							
							
								
								Enable kv cache quantization by default for flex when 1 < batch <= 8 ( #10584 )  
							
							 
							
							... 
							
							
							
							* Enable kv cache quantization by default for flex when 1 < batch <= 8.
* Change up bound from <8 to <=8. 
							
						 
						
							2024-03-29 09:43:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b44f7adbad 
								
							 
						 
						
							
							
								
								LLM: Disable esimd sdp for PVC GPU when batch size>1 ( #10579 )  
							
							 
							
							... 
							
							
							
							* llm: disable esimd sdp for pvc bz>1.
* fix logic.
* fix: avoid call get device name twice. 
							
						 
						
							2024-03-28 22:55:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5963239b46 
								
							 
						 
						
							
							
								
								Fix qwen's position_ids no enough ( #10572 )  
							
							 
							
							... 
							
							
							
							* fix position_ids
* fix position_ids 
							
						 
						
							2024-03-28 17:05:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								52a2135d83 
								
							 
						 
						
							
							
								
								Replace ipex with ipex-llm ( #10554 )  
							
							 
							
							... 
							
							
							
							* fix ipex with ipex_llm
* fix ipex with ipex_llm
* update
* update
* update
* update
* update
* update
* update
* update 
							
						 
						
							2024-03-28 13:54:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1c5eb14128 
								
							 
						 
						
							
							
								
								Update pip install to use --extra-index-url for ipex package ( #10557 )  
							
							 
							
							... 
							
							
							
							* Change to 'pip install .. --extra-index-url' for readthedocs
* Change to 'pip install .. --extra-index-url' for examples
* Change to 'pip install .. --extra-index-url' for remaining files
* Fix URL for ipex
* Add links for ipex US and CN servers
* Update ipex cpu url
* remove readme
* Update for github actions
* Update for dockerfiles 
							
						 
						
							2024-03-28 09:56:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								92dfed77be 
								
							 
						 
						
							
							
								
								LLM: fix abnormal output of fp16 deepspeed autotp ( #10558 )  
							
							 
							
							
							
						 
						
							2024-03-28 09:35:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c450c85489 
								
							 
						 
						
							
							
								
								Delete llm/readme.md ( #10569 )  
							
							 
							
							
							
						 
						
							2024-03-27 20:06:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								51d34ca68e 
								
							 
						 
						
							
							
								
								Fix wrong import in speculative ( #10562 )  
							
							 
							
							
							
						 
						
							2024-03-27 18:21:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f239bc329b 
								
							 
						 
						
							
							
								
								Specify oneAPI minor version in documentation ( #10561 )  
							
							 
							
							
							
						 
						
							2024-03-27 17:58:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fbeb10c796 
								
							 
						 
						
							
							
								
								LLM: Set different env based on different Linux kernels ( #10566 )  
							
							 
							
							
							
						 
						
							2024-03-27 17:56:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d86477f14d 
								
							 
						 
						
							
							
								
								Remove native_int4 in LangChain examples ( #10510 )  
							
							 
							
							... 
							
							
							
							* rebase the modify to ipex-llm
* modify the typo 
							
						 
						
							2024-03-27 17:48:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								04baac5a2e 
								
							 
						 
						
							
							
								
								Fix fastchat top_k ( #10560 )  
							
							 
							
							... 
							
							
							
							* fix -1 top_k
* fix
* done 
							
						 
						
							2024-03-27 16:01:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fc8c7904f0 
								
							 
						 
						
							
							
								
								LLM: fix torch_dtype setting of apply fp16 optimization through optimize_model ( #10556 )  
							
							 
							
							
							
						 
						
							2024-03-27 14:18:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea4bc450c4 
								
							 
						 
						
							
							
								
								LLM: add esimd sdp for pvc ( #10543 )  
							
							 
							
							... 
							
							
							
							* add esimd sdp for pvc
* update
* fix
* fix batch 
							
						 
						
							2024-03-26 19:04:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b78289a595 
								
							 
						 
						
							
							
								
								Remove ipex-llm dependency in readme ( #10544 )  
							
							 
							
							
							
						 
						
							2024-03-26 18:25:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								11550d3f25 
								
							 
						 
						
							
							
								
								LLM: Add length check for IPEX-CPU speculative decoding  ( #10529 )  
							
							 
							
							... 
							
							
							
							Add length check for IPEX-CPU speculative decoding. 
							
						 
						
							2024-03-26 17:47:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a3b007f3b1 
								
							 
						 
						
							
							
								
								[Serving] Fix fastchat breaks ( #10548 )  
							
							 
							
							... 
							
							
							
							* fix fastchat
* fix doc 
							
						 
						
							2024-03-26 17:03:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69a28d6b4c 
								
							 
						 
						
							
							
								
								fix chatglm ( #10540 )  
							
							 
							
							
							
						 
						
							2024-03-26 16:01:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c563b41491 
								
							 
						 
						
							
							
								
								add nightly_build workflow ( #10533 )  
							
							 
							
							... 
							
							
							
							* add nightly_build workflow
* add create-job-status-badge action
* update
* update
* update
* update setup.py
* release
* revert 
							
						 
						
							2024-03-26 12:47:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a3e4e788f 
								
							 
						 
						
							
							
								
								LLM: fix mistral hidden_size setting for deepspeed autotp ( #10527 )  
							
							 
							
							
							
						 
						
							2024-03-26 10:55:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1dd40b429c 
								
							 
						 
						
							
							
								
								enable fp4 fused mlp and qkv ( #10531 )  
							
							 
							
							... 
							
							
							
							* enable fp4 fused mlp and qkv
* update qwen
* update qwen2 
							
						 
						
							2024-03-26 08:34:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16b2ef49c6 
								
							 
						 
						
							
							
								
								Update_document by heyang ( #30 )  
							
							 
							
							
							
						 
						
							2024-03-25 10:06:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1048ca7f6 
								
							 
						 
						
							
							
								
								Update setup.py and add new actions and add compatible mode ( #25 )  
							
							 
							
							... 
							
							
							
							* update setup.py
* add new action
* add compatible mode 
							
						 
						
							2024-03-22 15:44:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9df70d95eb 
								
							 
						 
						
							
							
								
								Refactor bigdl.llm to  ipex_llm ( #24 )  
							
							 
							
							... 
							
							
							
							* Rename bigdl/llm to ipex_llm
* rm python/llm/src/bigdl
* from bigdl.llm to from ipex_llm 
							
						 
						
							2024-03-22 15:41:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								cc5806f4bc 
								
							 
						 
						
							
							
								
								LLM: add save/load example for hf-transformers ( #10432 )  
							
							 
							
							
							
						 
						
							2024-03-22 13:57:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								34d0a9328c 
								
							 
						 
						
							
							
								
								LLM: Speed-up mixtral in pipeline parallel inference ( #10472 )  
							
							 
							
							... 
							
							
							
							* speed-up mixtral
* fix style 
							
						 
						
							2024-03-22 11:06:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b9d4280892 
								
							 
						 
						
							
							
								
								LLM: fix baichuan7b quantize kv abnormal output. ( #10504 )  
							
							 
							
							... 
							
							
							
							* fix abnormal output.
* fix style.
* fix style. 
							
						 
						
							2024-03-22 10:00:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f0f317b6cf 
								
							 
						 
						
							
							
								
								fix a typo in yuan ( #10503 )  
							
							 
							
							
							
						 
						
							2024-03-22 09:40:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								3a3756b51d 
								
							 
						 
						
							
							
								
								Add FastChat bigdl_worker ( #10493 )  
							
							 
							
							... 
							
							
							
							* done
* fix format
* add licence
* done
* fix doc
* refactor folder
* add license 
							
						 
						
							2024-03-21 18:35:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								dba7ddaab3 
								
							 
						 
						
							
							
								
								add sdp fp8 for qwen llama436 baichuan mistral baichuan2 ( #10485 )  
							
							 
							
							... 
							
							
							
							* add sdp fp8
* fix style
* fix qwen
* fix baichuan 13
* revert baichuan 13b and baichuan2-13b
* fix style
* update 
							
						 
						
							2024-03-21 17:23:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								30f111cd32 
								
							 
						 
						
							
							
								
								lm_head empty_cache for more models ( #10490 )  
							
							 
							
							... 
							
							
							
							* modify constraint
* fix style 
							
						 
						
							2024-03-21 17:11:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								1579ee4421 
								
							 
						 
						
							
							
								
								[LLM] Add nightly igpu perf test for INT4+FP16 1024-128 ( #10496 )  
							
							 
							
							
							
						 
						
							2024-03-21 16:07:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2958ca49c0 
								
							 
						 
						
							
							
								
								LLM: add patching function for llm finetuning ( #10247 )  
							
							 
							
							
							
						 
						
							2024-03-21 16:01:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								5b97fdb87b 
								
							 
						 
						
							
							
								
								update deepseek example readme ( #10420 )  
							
							 
							
							... 
							
							
							
							* update readme
* update
* update readme 
							
						 
						
							2024-03-21 15:21:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								a5f35757a4 
								
							 
						 
						
							
							
								
								Migrate langchain rag cpu example to gpu ( #10450 )  
							
							 
							
							... 
							
							
							
							* add langchain rag on gpu
* add rag example in readme
* add trust_remote_code in TransformersEmbeddings.from_model_id
* add trust_remote_code in TransformersEmbeddings.from_model_id in cpu 
							
						 
						
							2024-03-21 15:20:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								85ef3f1d99 
								
							 
						 
						
							
							
								
								LLM: add empty cache in deepspeed autotp benchmark script ( #10488 )  
							
							 
							
							
							
						 
						
							2024-03-21 10:51:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								5a5fd5af5b 
								
							 
						 
						
							
							
								
								LLM: Add speculative benchmark on CPU/XPU ( #10464 )  
							
							 
							
							... 
							
							
							
							Add speculative benchmark on CPU/XPU. 
							
						 
						
							2024-03-21 09:51:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								28c315a5b9 
								
							 
						 
						
							
							
								
								LLM: fix deepspeed error of finetuning on xpu ( #10484 )  
							
							 
							
							
							
						 
						
							2024-03-21 09:46:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								021d77fd22 
								
							 
						 
						
							
							
								
								Remove softmax upcast fp32 in llama ( #10481 )  
							
							 
							
							... 
							
							
							
							* update
* fix style 
							
						 
						
							2024-03-20 18:17:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cfdf8ad496 
								
							 
						 
						
							
							
								
								Fix modules_not_to_convert argument ( #10483 )  
							
							 
							
							
							
						 
						
							2024-03-20 17:47:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								cbe24cc7e6 
								
							 
						 
						
							
							
								
								LLM: Enable BigDL IPEX Int8 ( #10480 )  
							
							 
							
							... 
							
							
							
							Enable BigDL IPEX Int8 
							
						 
						
							2024-03-20 15:59:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								1d062e24db 
								
							 
						 
						
							
							
								
								Update serving doc ( #10475 )  
							
							 
							
							... 
							
							
							
							* update serving doc
* add tob
* update
* update
* update
* update vllm worker 
							
						 
						
							2024-03-20 14:44:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4581e4f17f 
								
							 
						 
						
							
							
								
								LLM: fix whiper model missing config. ( #10473 )  
							
							 
							
							... 
							
							
							
							* fix whiper model missing config.
* fix style.
* fix style.
* style. 
							
						 
						
							2024-03-20 14:22:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e41d556436 
								
							 
						 
						
							
							
								
								LLM: change fp16 benchmark to model.half ( #10477 )  
							
							 
							
							... 
							
							
							
							* LLM: change fp16 benchmark to model.half
* fix 
							
						 
						
							2024-03-20 13:38:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								749bedaf1e 
								
							 
						 
						
							
							
								
								fix rwkv v5 fp16 ( #10474 )  
							
							 
							
							
							
						 
						
							2024-03-20 13:15:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								72bcc27da9 
								
							 
						 
						
							
							
								
								[LLM] Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings ( #10459 )  
							
							 
							
							... 
							
							
							
							* Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings
* Small fixes 
							
						 
						
							2024-03-19 18:04:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								463a86cd5d 
								
							 
						 
						
							
							
								
								LLM: fix qwen-vl interpolation gpu abnormal results. ( #10457 )  
							
							 
							
							... 
							
							
							
							* fix qwen-vl interpolation gpu abnormal results.
* fix style.
* update qwen-vl gpu example.
* fix comment and update example.
* fix style. 
							
						 
						
							2024-03-19 16:59:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e9055c32f9 
								
							 
						 
						
							
							
								
								LLM: fix fp16 mem record in benchmark ( #10461 )  
							
							 
							
							... 
							
							
							
							* LLM: fix fp16 mem record in benchmark
* change style 
							
						 
						
							2024-03-19 16:17:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f3fefdc9ce 
								
							 
						 
						
							
							
								
								fix pad_token_id issue ( #10425 )  
							
							 
							
							
							
						 
						
							2024-03-18 23:30:28 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								74e7490fda 
								
							 
						 
						
							
							
								
								Fix Baichuan2 prompt format ( #10334 )  
							
							 
							
							... 
							
							
							
							* Fix Baichuan2 prompt format
* Fix Baichuan2 README
* Change baichuan2 prompt info
* Change baichuan2 prompt info 
							
						 
						
							2024-03-19 12:48:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0451103a43 
								
							 
						 
						
							
							
								
								LLM: add int4+fp16 benchmark script for windows benchmarking ( #10449 )  
							
							 
							
							... 
							
							
							
							* LLM: add fp16 for benchmark script
* remove transformer_int4_fp16_loadlowbit_gpu_win 
							
						 
						
							2024-03-19 11:11:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								bbd749dceb 
								
							 
						 
						
							
							
								
								qwen2 fp8 cache ( #10446 )  
							
							 
							
							... 
							
							
							
							* qwen2 fp8 cache
* fix style check 
							
						 
						
							2024-03-19 08:32:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								9e763b049c 
								
							 
						 
						
							
							
								
								Support running pipeline parallel inference by vertically partitioning model to different devices ( #10392 )  
							
							 
							
							... 
							
							
							
							* support pipeline parallel inference
* fix logging
* remove benchmark file
* fic
* need to warmup twice
* support qwen and qwen2
* fix lint
* remove genxir
* refine 
							
						 
						
							2024-03-18 13:04:45 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								66b4bb5c5d 
								
							 
						 
						
							
							
								
								LLM: update setup to provide cpp for windows ( #10448 )  
							
							 
							
							
							
						 
						
							2024-03-18 18:20:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								dbdeaddd6a 
								
							 
						 
						
							
							
								
								LLM: Fix log condition for BIGDL_OPT_IPEX ( #10441 )  
							
							 
							
							... 
							
							
							
							remove log for BIGDL_OPT_IPEX 
							
						 
						
							2024-03-18 16:03:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								1de13ea578 
								
							 
						 
						
							
							
								
								LLM: remove CPU english_quotes dataset and update docker example ( #10399 )  
							
							 
							
							... 
							
							
							
							* update dataset
* update readme
* update docker cpu
* update xpu docker 
							
						 
						
							2024-03-18 10:45:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								399843faf0 
								
							 
						 
						
							
							
								
								Baichuan 7b fp16 sdp and qwen2 pvc sdp ( #10435 )  
							
							 
							
							... 
							
							
							
							* add baichuan sdp
* update
* baichuan2
* fix
* fix style
* revert 13b
* revert 
							
						 
						
							2024-03-18 10:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								5ab52ef5b5 
								
							 
						 
						
							
							
								
								update ( #10424 )  
							
							 
							
							
							
						 
						
							2024-03-15 09:24:26 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bd64488b2a 
								
							 
						 
						
							
							
								
								add mask support for llama/chatglm fp8 sdp ( #10433 )  
							
							 
							
							... 
							
							
							
							* add mask support for fp8 sdp
* fix chatglm2 dtype
* update 
							
						 
						
							2024-03-15 17:36:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								444b11af22 
								
							 
						 
						
							
							
								
								Add LangChain upstream ut test for ipynb ( #10387 )  
							
							 
							
							... 
							
							
							
							* Add LangChain upstream ut test for ipynb
* Integrate unit test for LangChain upstream ut and ipynb into one file
* Modify file name
* Remove LangChain version update in unit test
* Move Langchain upstream ut job to arc
* Modify path in .yml file
* Modify path in llm_unit_tests.yml
* Avoid create directory repeatedly 
							
						 
						
							2024-03-15 16:31:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								ca372f6dab 
								
							 
						 
						
							
							
								
								LLM: add save/load example for ModelScope ( #10397 )  
							
							 
							
							... 
							
							
							
							* LLM: add sl example for modelscope
* fix according to comments
* move file 
							
						 
						
							2024-03-15 15:17:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								24473e331a 
								
							 
						 
						
							
							
								
								Qwen2 fp16 sdp ( #10427 )  
							
							 
							
							... 
							
							
							
							* qwen2 sdp and refine
* update
* update
* fix style
* remove use_flash_attention 
							
						 
						
							2024-03-15 13:12:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								1315150e64 
								
							 
						 
						
							
							
								
								Add baichuan2-13b 1k to arc nightly perf ( #10406 )  
							
							 
							
							
							
						 
						
							2024-03-15 10:29:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b036205be2 
								
							 
						 
						
							
							
								
								LLM: add fp8 sdp for chatglm2/3 ( #10411 )  
							
							 
							
							... 
							
							
							
							* add fp8 sdp for chatglm2
* fix style 
							
						 
						
							2024-03-15 09:38:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								fe8976a00f 
								
							 
						 
						
							
							
								
								LLM: Support gguf models use low_bit and fix no json( #10408 )  
							
							 
							
							... 
							
							
							
							* support others model use low_bit
* update readme
* update to add *.json 
							
						 
						
							2024-03-15 09:34:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								cda38f85a9 
								
							 
						 
						
							
							
								
								Qwen fp16 sdp ( #10401 )  
							
							 
							
							... 
							
							
							
							* qwen sdp
* fix
* update
* update
* update sdp
* update
* fix style check
* add to origin type 
							
						 
						
							2024-03-15 08:51:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								1c0f7ed3fa 
								
							 
						 
						
							
							
								
								add xpu support ( #10419 )  
							
							 
							
							
							
						 
						
							2024-03-14 17:13:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								7d29765092 
								
							 
						 
						
							
							
								
								refactor qwen2 forward to enable XPU ( #10409 )  
							
							 
							
							... 
							
							
							
							* refactor awen2 forward to enable XPU
* Update qwen2.py 
							
						 
						
							2024-03-14 11:03:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								f36224aac4 
								
							 
						 
						
							
							
								
								Fix ceval run.sh ( #10410 )  
							
							 
							
							
							
						 
						
							2024-03-14 10:57:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								f66329e35d 
								
							 
						 
						
							
							
								
								Fix multiple get_enable_ipex function error ( #10400 )  
							
							 
							
							... 
							
							
							
							* fix multiple get_enable_ipex function error
* remove get_enable_ipex_low_bit function 
							
						 
						
							2024-03-14 10:14:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								76e30d8ec8 
								
							 
						 
						
							
							
								
								Empty cache for lm_head ( #10317 )  
							
							 
							
							... 
							
							
							
							* empty cache
* add comments 
							
						 
						
							2024-03-13 20:31:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								2be8bbd236 
								
							 
						 
						
							
							
								
								LLM: add cpp option in setup.py ( #10403 )  
							
							 
							
							... 
							
							
							
							* add llama_cpp option
* meet code review 
							
						 
						
							2024-03-13 20:12:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								0dbce53464 
								
							 
						 
						
							
							
								
								LLM: Add decoder/layernorm unit tests ( #10211 )  
							
							 
							
							... 
							
							
							
							* add decoder/layernorm unit tests
* update tests
* delete decoder tests
* address comments
* remove none type check
* restore nonetype checks
* delete nonetype checks; add decoder tests for Llama
* add gc
* deal with tuple output 
							
						 
						
							2024-03-13 19:41:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								06a851afa9 
								
							 
						 
						
							
							
								
								support new baichuan model ( #10404 )  
							
							 
							
							
							
						 
						
							2024-03-13 17:45:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								a90e9b6ec2 
								
							 
						 
						
							
							
								
								Fix C-Eval Workflow ( #10359 )  
							
							 
							
							... 
							
							
							
							* Fix Baichuan2 prompt format
* Fix ceval workflow errors
* Fix ceval workflow error
* Fix ceval error
* Fix ceval error
* Test ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Add ceval dependency test
* Fix ceval
* Fix ceval
* Test full ceval
* Test full ceval
* Fix ceval
* Fix ceval 
							
						 
						
							2024-03-13 17:23:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b268baafd6 
								
							 
						 
						
							
							
								
								use fp8 sdp in llama ( #10396 )  
							
							 
							
							
							
						 
						
							2024-03-13 16:45:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								60043a3ae8 
								
							 
						 
						
							
							
								
								LLM: Support Baichuan2-13b in BigDL-vLLM ( #10398 )  
							
							 
							
							... 
							
							
							
							Support Baichuan2-13b in BigDL-vLLM. 
							
						 
						
							2024-03-13 16:21:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								e10de2c42d 
								
							 
						 
						
							
							
								
								[Fix] LLM: Fix condition check error for speculative decoding on CPU ( #10402 )  
							
							 
							
							... 
							
							
							
							Fix condition check error for speculative decoding on CPU 
							
						 
						
							2024-03-13 16:05:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f158b49835 
								
							 
						 
						
							
							
								
								[LLM] Recover arc ut test for Falcon ( #10385 )  
							
							 
							
							
							
						 
						
							2024-03-13 13:31:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								d72c0fad0d 
								
							 
						 
						
							
							
								
								Qwen2 SDPA forward on CPU ( #10395 )  
							
							 
							
							... 
							
							
							
							* Fix Qwen1.5 CPU forward
* Update convert.py
* Update qwen2.py 
							
						 
						
							2024-03-13 13:10:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ca58a69b97 
								
							 
						 
						
							
							
								
								fix arc rms norm UT ( #10394 )  
							
							 
							
							
							
						 
						
							2024-03-13 13:09:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								0193f29411 
								
							 
						 
						
							
							
								
								LLM : Enable  gguf float16 and Yuan2 model ( #10372 )  
							
							 
							
							... 
							
							
							
							* enable float16
* add yun files
* enable yun
* enable set low_bit on yuan2
* update
* update license
* update generate
* update readme
* update python style
* update 
							
						 
						
							2024-03-13 10:19:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								f5d65203c0 
								
							 
						 
						
							
							
								
								First token lm_head optimization ( #10318 )  
							
							 
							
							... 
							
							
							
							* add lm head linear
* update
* address comments and fix style
* address comment 
							
						 
						
							2024-03-13 10:11:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								7cf01e6ec8 
								
							 
						 
						
							
							
								
								Add LangChain upstream ut test ( #10349 )  
							
							 
							
							... 
							
							
							
							* Add LangChain upstream ut test
* Add LangChain upstream ut test
* Specify version numbers in yml script
* Correct langchain-community version 
							
						 
						
							2024-03-13 09:52:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								28c4a8cf5c 
								
							 
						 
						
							
							
								
								Qwen fused qkv ( #10368 )  
							
							 
							
							... 
							
							
							
							* fused qkv + rope for qwen
* quantized kv cache
* fix
* update qwen
* fixed quantized qkv
* fix
* meet code review
* update split
* convert.py
* extend when no enough kv
* fix 
							
						 
						
							2024-03-12 17:39:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								741c2bf1df 
								
							 
						 
						
							
							
								
								use new rms norm ( #10384 )  
							
							 
							
							
							
						 
						
							2024-03-12 17:29:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								0ded0b4b13 
								
							 
						 
						
							
							
								
								LLM: Enable BigDL IPEX optimization for int4 ( #10319 )  
							
							 
							
							... 
							
							
							
							Enable BigDL IPEX optimization for int4 
							
						 
						
							2024-03-12 17:08:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								5d7e044dbc 
								
							 
						 
						
							
							
								
								LLM: add low bit option in deepspeed autotp example ( #10382 )  
							
							 
							
							
							
						 
						
							2024-03-12 17:07:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								df3bcc0e65 
								
							 
						 
						
							
							
								
								LLM: remove english_quotes dataset ( #10370 )  
							
							 
							
							
							
						 
						
							2024-03-12 16:57:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								df2b84f7de 
								
							 
						 
						
							
							
								
								Enable kv cache on arc batch ( #10308 )  
							
							 
							
							
							
						 
						
							2024-03-12 16:46:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Lilac09 
								
							 
						 
						
							
							
							
							
								
							
							
								5809a3f5fe 
								
							 
						 
						
							
							
								
								Add run-hbm.sh & add user guide for spr and hbm ( #10357 )  
							
							 
							
							... 
							
							
							
							* add run-hbm.sh
* add spr and hbm guide
* only support quad mode
* only support quad mode
* update special cases
* update special cases 
							
						 
						
							2024-03-12 16:15:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								5d996a5caf 
								
							 
						 
						
							
							
								
								LLM: add benchmark script for deepspeed autotp on gpu ( #10380 )  
							
							 
							
							
							
						 
						
							2024-03-12 15:19:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f9c144dc4c 
								
							 
						 
						
							
							
								
								Fix final logits ut failure ( #10377 )  
							
							 
							
							... 
							
							
							
							* Fix final logits ut failure
* Fix final logits ut failure
* Remove Falcon from completion test for now
* Remove Falcon from unit test for now 
							
						 
						
							2024-03-12 14:34:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								cc4148636d 
								
							 
						 
						
							
							
								
								[FastChat-integration] Add initial implementation for loader ( #10323 )  
							
							 
							
							... 
							
							
							
							* add initial implementation for loader
* add test method for model_loader
* data
* Refine 
							
						 
						
							2024-03-12 10:54:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								17bdb1a60b 
								
							 
						 
						
							
							
								
								LLM: add whisper models into nightly test ( #10193 )  
							
							 
							
							... 
							
							
							
							* LLM: add whisper models into nightly test
* small fix
* small fix
* add more whisper models
* test all cases
* test specific cases
* collect the csv
* store the resut
* to html
* small fix
* small test
* test all cases
* modify whisper_csv_to_html 
							
						 
						
							2024-03-11 20:00:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								dbcfc5c2fa 
								
							 
						 
						
							
							
								
								LLM: fix error of 'AI-ModelScope/phi-2' hosted by ModelScope hub ( #10364 )  
							
							 
							
							
							
						 
						
							2024-03-11 16:19:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								fe27a6971c 
								
							 
						 
						
							
							
								
								LLM: update modelscope version ( #10367 )  
							
							 
							
							
							
						 
						
							2024-03-11 16:18:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								a425eaabfc 
								
							 
						 
						
							
							
								
								fix from_pretrained when device_map=None ( #10361 )  
							
							 
							
							... 
							
							
							
							* pr trigger
* fix error when device_map=None
* fix device_map=None 
							
						 
						
							2024-03-11 16:06:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								d7b765fd3f 
								
							 
						 
						
							
							
								
								serving xpu memory opt ( #10358 )  
							
							 
							
							
							
						 
						
							2024-03-11 15:21:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								be29833b2b 
								
							 
						 
						
							
							
								
								LLM: fix qwen2 ( #10356 )  
							
							 
							
							
							
						 
						
							2024-03-11 09:29:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								9026c08633 
								
							 
						 
						
							
							
								
								Fix llamaindex AutoTokenizer bug ( #10345 )  
							
							 
							
							... 
							
							
							
							* fix tokenizer
* fix AutoTokenizer bug
* modify code style 
							
						 
						
							2024-03-08 16:24:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								2a10b53d73 
								
							 
						 
						
							
							
								
								rename docqa.py->rag.py ( #10353 )  
							
							 
							
							
							
						 
						
							2024-03-08 16:07:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f1825d7408 
								
							 
						 
						
							
							
								
								Add RMSNorm unit test ( #10190 )  
							
							 
							
							
							
						 
						
							2024-03-08 15:51:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								370c52090c 
								
							 
						 
						
							
							
								
								Langchain readme ( #10348 )  
							
							 
							
							... 
							
							
							
							* update langchain readme
* update readme
* create new README
* Update README_nativeint4.md 
							
						 
						
							2024-03-08 14:57:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								7a621a4db0 
								
							 
						 
						
							
							
								
								Fix device_map bug by raise an error when using device_map=xpu ( #10340 )  
							
							 
							
							... 
							
							
							
							* Fix device_map bug by raise an error when using device_map=xpu
* Fix sync error
* Fix python style
* Use invalidInputError instead of invalidOperationError 
							
						 
						
							2024-03-08 13:38:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1ac193ba02 
								
							 
						 
						
							
							
								
								add rope theta argument ( #10343 )  
							
							 
							
							
							
						 
						
							2024-03-07 17:27:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								0c8d3c9830 
								
							 
						 
						
							
							
								
								Add C-Eval HTML report ( #10294 )  
							
							 
							
							... 
							
							
							
							* Add C-Eval HTML report
* Fix C-Eval workflow pr trigger path
* Fix C-Eval workflow typos
* Add permissions to C-Eval workflow
* Fix C-Eval workflow typo
* Add pandas dependency
* Fix C-Eval workflow typo 
							
						 
						
							2024-03-07 16:44:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								496d18ab6d 
								
							 
						 
						
							
							
								
								LLM: add quantize kv cache support for baichuan 7b and 13b. ( #10330 )  
							
							 
							
							... 
							
							
							
							* add quantize kv cache for baichuan 7b and 13b.
* fix typo.
* fix.
* fix style.
* fix style. 
							
						 
						
							2024-03-07 16:17:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								b7db21414e 
								
							 
						 
						
							
							
								
								Update llamaindex ut ( #10338 )  
							
							 
							
							... 
							
							
							
							* add test_llamaindex of gpu
* add llamaindex gpu tests bash
* add llamaindex cpu tests bash
* update name of Run LLM langchain GPU test
* import llama_index in llamaindex gpu ut
* update the dependency of test_llamaindex
* add Run LLM llamaindex GPU test
* modify import dependency of llamaindex cpu test
* add Run LLM llamaindex test
* update llama_model_path
* delete unused model path
* add LLAMA2_7B_ORIGIN_PATH in llamaindex cpu test 
							
						 
						
							2024-03-07 10:06:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								267de7abc3 
								
							 
						 
						
							
							
								
								fix fschat DEP version error ( #10325 )  
							
							 
							
							
							
						 
						
							2024-03-06 16:15:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								9ea499ca68 
								
							 
						 
						
							
							
								
								Optimize speculative decoding PVC memory usage ( #10329 )  
							
							 
							
							... 
							
							
							
							* optimize memory
* update
* update
* update
* support other models
* update
* fix style 
							
						 
						
							2024-03-06 09:54:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								cc796848ea 
								
							 
						 
						
							
							
								
								fix typos ( #10274 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 18:38:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								af11c53473 
								
							 
						 
						
							
							
								
								Add the installation step of postgresql and pgvector on windows in LlamaIndex GPU  support ( #10328 )  
							
							 
							
							... 
							
							
							
							* add the installation of postgresql and pgvector of windows
* fix some format 
							
						 
						
							2024-03-05 18:31:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								0011ff9f64 
								
							 
						 
						
							
							
								
								optimize bge large performance ( #10324 )  
							
							 
							
							
							
						 
						
							2024-03-05 17:06:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								178eea5009 
								
							 
						 
						
							
							
								
								upload bigdl-llm wheel to sourceforge for backup ( #10321 )  
							
							 
							
							... 
							
							
							
							* test: upload to sourceforge
* update scripts
* revert 
							
						 
						
							2024-03-05 16:36:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								30d009bca7 
								
							 
						 
						
							
							
								
								LLM: support quantized kv cache for Mistral in transformers >=4.36.0 ( #10326 )  
							
							 
							
							... 
							
							
							
							* support quantize kv for mistral in transformers 4.36
* update mistral support.
* fix style. 
							
						 
						
							2024-03-05 16:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								1e6f0c6f1a 
								
							 
						 
						
							
							
								
								Add llamaindex gpu example ( #10314 )  
							
							 
							
							... 
							
							
							
							* add llamaindex example
* fix core dump
* refine readme
* add trouble shooting
* refine readme
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:36:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								fc7f10cd12 
								
							 
						 
						
							
							
								
								add langchain gpu example ( #10277 )  
							
							 
							
							... 
							
							
							
							* first draft
* fix
* add readme for transformer_int4_gpu
* fix doc
* check device_map
* add arc ut test
* fix ut test
* fix langchain ut
* Refine README
* fix gpu mem too high
* fix ut test
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:33:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								5dbbe1a826 
								
							 
						 
						
							
							
								
								[LLM] Support for new arc ut runner ( #10311 )  
							
							 
							
							... 
							
							
							
							* Support for new arc ut runner
* Comment unnecessary OMP_NUM_THREADS related settings for arc uts 
							
						 
						
							2024-03-04 18:42:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								d45e577d8c 
								
							 
						 
						
							
							
								
								[LLM] Test load_low_bit in iGPU perf test on Windows ( #10313 )  
							
							 
							
							
							
						 
						
							2024-03-04 18:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								fd81d66047 
								
							 
						 
						
							
							
								
								LLM: Compress some models to save space ( #10315 )  
							
							 
							
							... 
							
							
							
							* LLM: compress some models to save space
* add deleted comments 
							
						 
						
							2024-03-04 17:53:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								bab2ee5f9e 
								
							 
						 
						
							
							
								
								update nightly spr perf test ( #10178 )  
							
							 
							
							... 
							
							
							
							* update nightly spr perf test
* update
* update runner lable
* update
* update
* update folder
* revert 
							
						 
						
							2024-03-04 13:46:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ab9fc2485f 
								
							 
						 
						
							
							
								
								LLM: add quantize kv support for llama transformer 4.36 ( #10298 )  
							
							 
							
							... 
							
							
							
							* add quantize kv support for llama transformer 4.36
* fix style.
* fix style. 
							
						 
						
							2024-03-04 10:33:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								58208a5883 
								
							 
						 
						
							
							
								
								Update FAQ document. ( #10300 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md 
							
						 
						
							2024-03-04 08:35:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								27d9a14989 
								
							 
						 
						
							
							
								
								[LLM] all-on-one update: memory optimize and streaming output ( #10302 )  
							
							 
							
							... 
							
							
							
							* Memory saving for continous in-out pair run and add support for streaming output on MTL iGPU
* Small fix
* Small fix
* Add things back 
							
						 
						
							2024-03-01 18:02:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0ab40917fb 
								
							 
						 
						
							
							
								
								[LLM] Split merged_qk to separated q/k linear ( #10299 )  
							
							 
							
							... 
							
							
							
							* modify merge_qk_linear to separated q/k linear
* update 
							
						 
						
							2024-03-01 16:48:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f4d7dbcde2 
								
							 
						 
						
							
							
								
								use fused qkv forward in qwen2 ( #10185 )  
							
							 
							
							... 
							
							
							
							* use fused qkv forward in qwen2
* support both
* fix style
* fix rope
* remove pring
* fix style
* clean up 
							
						 
						
							2024-03-01 16:46:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								509e206de0 
								
							 
						 
						
							
							
								
								update doc about gemma random and unreadable output. ( #10297 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update README.md
* Update README.md 
							
						 
						
							2024-03-01 15:41:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								beb9433cec 
								
							 
						 
						
							
							
								
								LLM: Reduce speculative _ipex_optimize_model memory use ( #10281 )  
							
							 
							
							... 
							
							
							
							* use tpp
* update ipex 
							
						 
						
							2024-03-01 13:48:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								f0ff0eebe1 
								
							 
						 
						
							
							
								
								[LLM] Support quantize kv cache for Baichuan2 7B ( #10280 )  
							
							 
							
							... 
							
							
							
							* Add quatized kv cache framework for Baichuan2 7B
* Support quantize kv cache for baichuan2
* Small fix
* Fix python style 
							
						 
						
							2024-03-01 13:35:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								273de341d7 
								
							 
						 
						
							
							
								
								hot-fix silu error import ( #10292 )  
							
							 
							
							
							
						 
						
							2024-03-01 10:11:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								bcfad555df 
								
							 
						 
						
							
							
								
								revise llamaindex readme ( #10283 )  
							
							 
							
							
							
						 
						
							2024-02-29 17:19:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								232273a1b5 
								
							 
						 
						
							
							
								
								Enable Gemma fused mlp + Gelu ( #10276 )  
							
							 
							
							... 
							
							
							
							* update llama mlp forward
* add all
* fix style check
* split
* update
* update
* update
* fix style 
							
						 
						
							2024-02-29 16:53:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								2d930bdca8 
								
							 
						 
						
							
							
								
								Add vLLM bf16 support ( #10278 )  
							
							 
							
							... 
							
							
							
							* add argument load_in_low_bit
* add docs
* modify gpu doc
* done
---------
Co-authored-by: ivy-lv11 <lvzc@lamda.nju.edu.cn> 
							
						 
						
							2024-02-29 16:33:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								13b0bc9075 
								
							 
						 
						
							
							
								
								[LLM] Add quantize_kv optimization for yuan2 model ( #10243 )  
							
							 
							
							... 
							
							
							
							* add initial quantize_kv support for yuan2 model
* fix yuan2 quantize_kv generation
* apply fp16 conv layer optimizations
* disable mlp for quantize_kv 
							
						 
						
							2024-02-29 16:33:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								4e6cc424f1 
								
							 
						 
						
							
							
								
								Add LlamaIndex RAG ( #10263 )  
							
							 
							
							... 
							
							
							
							* run demo
* format code
* add llamaindex
* add custom LLM with bigdl
* update
* add readme
* begin ut
* add unit test
* add license
* add license
* revised
* update
* modify docs
* remove data folder
* update
* modify prompt
* fixed
* fixed
* fixed 
							
						 
						
							2024-02-29 15:21:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								5d7243067c 
								
							 
						 
						
							
							
								
								LLM: add Baichuan2-13B-Chat 2048-256 to MTL perf ( #10273 )  
							
							 
							
							
							
						 
						
							2024-02-29 13:48:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a9fd20b6ba 
								
							 
						 
						
							
							
								
								LLM: Update qkv fusion for GGUF-IQ2 ( #10271 )  
							
							 
							
							... 
							
							
							
							* first commit
* update mistral
* fix transformers==4.36.0
* fix
* disable qk for mixtral now
* fix style 
							
						 
						
							2024-02-29 12:49:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6fb65bb9d2 
								
							 
						 
						
							
							
								
								fix in transformers 4.36 ( #10150 )  
							
							 
							
							
							
						 
						
							2024-02-28 18:43:01 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								43dac97e03 
								
							 
						 
						
							
							
								
								Update README.md ( #10260 )  
							
							 
							
							
							
						 
						
							2024-02-29 10:41:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4b08bc1417 
								
							 
						 
						
							
							
								
								LLM: relax batch check of flash atttention by double check attention mask ( #10270 )  
							
							 
							
							... 
							
							
							
							* relax batch check
* fix
* fix style 
							
						 
						
							2024-02-29 09:39:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								07f36fbfcc 
								
							 
						 
						
							
							
								
								Fix gptj failed to extend ( #10269 )  
							
							 
							
							
							
						 
						
							2024-02-29 09:39:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cccb02dad1 
								
							 
						 
						
							
							
								
								fix baichuan2 13b 2k input ( #10267 )  
							
							 
							
							
							
						 
						
							2024-02-28 17:20:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								7244fd1ba5 
								
							 
						 
						
							
							
								
								Fix Arc StarCoder wrong query_shape when input is long ( #10268 )  
							
							 
							
							... 
							
							
							
							* Fix Arc StarCoder wrong query_shape when input is long
* Update gptbigcode.py 
							
						 
						
							2024-02-28 17:07:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								a4de3095f3 
								
							 
						 
						
							
							
								
								LLM: Support quantize kv cache in mistral. ( #10261 )  
							
							 
							
							... 
							
							
							
							* init
* update quantize kv. 
							
						 
						
							2024-02-28 14:08:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								db0d129226 
								
							 
						 
						
							
							
								
								Revert "Add rwkv example ( #9432 )" ( #10264 )  
							
							 
							
							... 
							
							
							
							This reverts commit 6930422b42 . 
							
						 
						
							2024-02-28 11:48:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yining Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6930422b42 
								
							 
						 
						
							
							
								
								Add rwkv example ( #9432 )  
							
							 
							
							... 
							
							
							
							* codeshell fix wrong urls
* restart runner
* add RWKV CPU & GPU example (rwkv-4-world-7b)
* restart runner
* update submodule
* fix runner
* runner-test
---------
Co-authored-by: Shengsheng Huang <shengsheng.huang@intel.com> 
							
						 
						
							2024-02-28 11:41:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								59861f73e5 
								
							 
						 
						
							
							
								
								Add Deepseek-6.7B ( #9991 )  
							
							 
							
							... 
							
							
							
							* Add new example Deepseek
* Add new example Deepseek
* Add new example Deepseek
* Add new example Deepseek
* Add new example Deepseek
* modify deepseek
* modify deepseek
* Add verified model in README
* Turn cpu_embedding=True in Deepseek example
---------
Co-authored-by: Shengsheng Huang <shengsheng.huang@intel.com> 
							
						 
						
							2024-02-28 11:36:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								2524273198 
								
							 
						 
						
							
							
								
								Update AutoGen README ( #10255 )  
							
							 
							
							... 
							
							
							
							* Update AutoGen README
* Fix AutoGen README typos
* Update AutoGen README
* Update AutoGen README 
							
						 
						
							2024-02-28 11:34:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								2347f611cf 
								
							 
						 
						
							
							
								
								Add cpu and gpu examples of Mamba ( #9797 )  
							
							 
							
							... 
							
							
							
							* Add mamba cpu example
* Add mamba gpu example
* Use a smaller model as the example
* minor fixes
---------
Co-authored-by: Shengsheng Huang <shengsheng.huang@intel.com> 
							
						 
						
							2024-02-28 11:33:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								937e1f7c74 
								
							 
						 
						
							
							
								
								rebase ( #9104 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2024-02-28 11:18:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									JunX 
								
							 
						 
						
							
							
							
							
								
							
							
								4833067489 
								
							 
						 
						
							
							
								
								fix GPU example link in README.md ( #9533 )  
							
							 
							
							... 
							
							
							
							* fix GPU example link in README.md
* fix GPU links in llm README.md 
							
						 
						
							2024-02-28 11:13:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								308e637d0d 
								
							 
						 
						
							
							
								
								Add DeepSeek-MoE-16B-Chat ( #10155 )  
							
							 
							
							... 
							
							
							
							* dsmoe-hf add
* add dsmoe pytorch
* update README
* modify comment
* remove GPU example
* update model name
* format code 
							
						 
						
							2024-02-28 10:12:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
							
							
								
							
							
								f4a2e32106 
								
							 
						 
						
							
							
								
								Stream llm example for both GPU and CPU ( #9390 )  
							
							 
							
							
							
						 
						
							2024-02-27 15:54:47 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c581c6db30 
								
							 
						 
						
							
							
								
								draft mmint4 ( #10031 )  
							
							 
							
							... 
							
							
							
							change to llm.cpp
support transposed format
revert
implement qkv fuse
fix style
change to vertically pack
change to enable_xetla
fix mlp_fusion_check
remove comments
address comments
add some comments
fix style 
							
						 
						
							2024-02-27 14:55:16 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								cba61a2909 
								
							 
						 
						
							
							
								
								Add html report of ppl ( #10218 )  
							
							 
							
							... 
							
							
							
							* remove include and language option, select the corresponding dataset based on the model name in Run
* change the nightly test time
* change the nightly test time of harness and ppl
* save the ppl result to json file
* generate csv file and print table result
* generate html
* modify the way to get parent folder
* update html in parent folder
* add llm-ppl-summary and llm-ppl-summary-html
* modify echo single result
* remove download fp16.csv
* change model name of PR
* move ppl nightly related files to llm/test folder
* reformat
* seperate make_table from make_table_and_csv.py
* separate make_csv from make_table_and_csv.py
* update llm-ppl-html
* remove comment
* add Download fp16.results 
							
						 
						
							2024-02-27 17:37:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								6d60982746 
								
							 
						 
						
							
							
								
								Env script: add license ( #10257 )  
							
							 
							
							... 
							
							
							
							* env script
* update README.md
* modify README
* modify cpu info output
* add env-check.sh
* add env-check.bat
* add windows
* modify bat
* add license 
							
						 
						
							2024-02-27 15:29:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b4fa4ab46f 
								
							 
						 
						
							
							
								
								optimize yuan 2.0 again ( #10252 )  
							
							 
							
							
							
						 
						
							2024-02-27 14:51:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								03b9c4930a 
								
							 
						 
						
							
							
								
								UX: Script to print env info ( #10088 )  
							
							 
							
							... 
							
							
							
							* env script
* update README.md
* modify README
* modify cpu info output
* add env-check.sh
* add env-check.bat
* add windows
* modify bat 
							
						 
						
							2024-02-27 14:45:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								843fe546b0 
								
							 
						 
						
							
							
								
								Add CPU and GPU examples for DeciLM-7B ( #9867 )  
							
							 
							
							... 
							
							
							
							* Add cpu and gpu examples for DeciLM-7B
* Add cpu and gpu examples for DeciLM-7B
* Add DeciLM-7B to README table
* modify deciLM
* modify deciLM
* modify deciLM
* Add verified model in README
* Add cpu_embedding=True 
							
						 
						
							2024-02-27 13:15:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								38ae4b372f 
								
							 
						 
						
							
							
								
								Add yuan2-2b to win igpu perf test ( #10250 )  
							
							 
							
							
							
						 
						
							2024-02-27 11:08:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								36a9e88104 
								
							 
						 
						
							
							
								
								Speculative Starcoder on CPU ( #10138 )  
							
							 
							
							... 
							
							
							
							* Speculative Starcoder on CPU
* enable kv-cache pre-allocation
* refine codes
* refine
* fix style
* fix style
* fix style
* refine
* refine
* Update speculative.py
* Update gptbigcode.py
* fix style
* Update speculative.py
* enable mixed-datatype layernorm on top of torch API
* adaptive dtype
* Update README.md 
							
						 
						
							2024-02-27 09:57:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a47989c860 
								
							 
						 
						
							
							
								
								optimize yuan 2.0 performance ( #10244 )  
							
							 
							
							
							
						 
						
							2024-02-26 17:20:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								6c74b99a28 
								
							 
						 
						
							
							
								
								LLM: Update qwen readme ( #10245 )  
							
							 
							
							
							
						 
						
							2024-02-26 17:03:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								15ad2fd72e 
								
							 
						 
						
							
							
								
								Merge pull request  #10226  from zhentaocc/fix_harness  
							
							 
							
							... 
							
							
							
							Fix harness 
							
						 
						
							2024-02-26 16:49:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								f9b75f900b 
								
							 
						 
						
							
							
								
								LLM: Enable qwen target_model ipex ( #10232 )  
							
							 
							
							... 
							
							
							
							* change order
* enable qwen ipex
* update qwen example
* update
* fix style
* update 
							
						 
						
							2024-02-26 16:41:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								3e6d188553 
								
							 
						 
						
							
							
								
								LLM: add baichuan2-13b to mtl perf ( #10238 )  
							
							 
							
							
							
						 
						
							2024-02-26 15:55:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								e38e29511c 
								
							 
						 
						
							
							
								
								[LLM] Yuan2 MLP and Rotary optimization ( #10231 )  
							
							 
							
							... 
							
							
							
							* Add optimization for rotary embedding
* Add mlp fused optimizatgion
* Python style fix
* Fix rotary embedding due to logits difference
* Small fix 
							
						 
						
							2024-02-26 15:10:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ea23afc8ec 
								
							 
						 
						
							
							
								
								[LLM]update ipex part in mistral example readme ( #10239 )  
							
							 
							
							... 
							
							
							
							* update ipex part in mistral example readme 
							
						 
						
							2024-02-26 14:35:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								df2f3885ba 
								
							 
						 
						
							
							
								
								[LLM] Enable kv_cache and forward_qkv optimizations for yuan2 ( #10225 )  
							
							 
							
							... 
							
							
							
							* add init kv_cache support for yuan2
* add forward qkv in yuan 
							
						 
						
							2024-02-26 11:29:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								85a99e13e8 
								
							 
						 
						
							
							
								
								LLM: Fix ChatGLM3 Speculative Example ( #10236 )  
							
							 
							
							... 
							
							
							
							Fix ChatGLM3 Speculative Example. 
							
						 
						
							2024-02-26 10:57:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								213ef06691 
								
							 
						 
						
							
							
								
								fix readme  
							
							 
							
							
							
						 
						
							2024-02-24 00:38:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								28513f3978 
								
							 
						 
						
							
							
								
								LLM: support fp16 embedding & add mlp fusion for iq2_xxs ( #10219 )  
							
							 
							
							... 
							
							
							
							* add fp16 embed
* small fixes
* fix style
* fix style
* fix comment 
							
						 
						
							2024-02-23 17:26:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								eeecd9fc08 
								
							 
						 
						
							
							
								
								Python style fix ( #10230 )  
							
							 
							
							
							
						 
						
							2024-02-23 17:21:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								e511bbd8f1 
								
							 
						 
						
							
							
								
								[LLM] Add basic optimization framework for Yuan2 ( #10227 )  
							
							 
							
							... 
							
							
							
							* Add basic optimization framework for Yuan2
* Small fix
* Python style fix
* Small fix
* Small fix 
							
						 
						
							2024-02-23 17:05:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								8ef5482da2 
								
							 
						 
						
							
							
								
								update Gemma readme ( #10229 )  
							
							 
							
							... 
							
							
							
							* Update README.md
* Update README.md
* Update README.md
* Update README.md 
							
						 
						
							2024-02-23 16:57:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								6fe5344fa6 
								
							 
						 
						
							
							
								
								separate make_csv from the file  
							
							 
							
							
							
						 
						
							2024-02-23 16:33:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								bfa98666a6 
								
							 
						 
						
							
							
								
								fall back to make_table.py  
							
							 
							
							
							
						 
						
							2024-02-23 16:33:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								19260492c7 
								
							 
						 
						
							
							
								
								LLM: fix action/installation error of mpmath ( #10223 )  
							
							 
							
							... 
							
							
							
							* fix
* test
* fix
* update 
							
						 
						
							2024-02-23 16:14:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								aabfc06977 
								
							 
						 
						
							
							
								
								add gemma example ( #10224 )  
							
							 
							
							... 
							
							
							
							* add gemma gpu example
* Update README.md
* add cpu example
* Update README.md
* Update README.md
* Update generate.py
* Update generate.py 
							
						 
						
							2024-02-23 15:20:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								a2c1675546 
								
							 
						 
						
							
							
								
								Add CPU and GPU examples for Yuan2-2B-hf ( #9946 )  
							
							 
							
							... 
							
							
							
							* Add a new CPU example of Yuan2-2B-hf
* Add a new CPU generate.py of Yuan2-2B-hf example
* Add a new GPU example of Yuan2-2B-hf
* Add Yuan2 to README table
* In CPU example:1.Use English as default prompt; 2.Provide modified files in yuan2-2B-instruct
* In GPU example:1.Use English as default prompt;2.Provide modified files
* GPU example:update README
* update Yuan2-2B-hf in README table
* Add CPU example for Yuan2-2B in Pytorch-Models
* Add GPU example for Yuan2-2B in Pytorch-Models
* Add license in generate.py; Modify README
* In GPU Add license in generate.py; Modify README
* In CPU yuan2 modify README
* In GPU yuan2 modify README
* In CPU yuan2 modify README
* In GPU example, updated the readme for Windows GPU supports
* In GPU torch example, updated the readme for Windows GPU supports
* GPU hf example README modified
* GPU example README modified 
							
						 
						
							2024-02-23 14:09:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								f1f4094a09 
								
							 
						 
						
							
							
								
								Add CPU and GPU examples of phi-2 ( #10014 )  
							
							 
							
							... 
							
							
							
							* Add CPU and GPU examples of phi-2
* In GPU hf example, updated the readme for Windows GPU supports
* In GPU torch example, updated the readme for Windows GPU supports
* update the table in BigDL/README.md
* update the table in BigDL/python/llm/README.md 
							
						 
						
							2024-02-23 14:05:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								f315c7f93a 
								
							 
						 
						
							
							
								
								Move harness nightly related files to llm/test folder ( #10209 )  
							
							 
							
							... 
							
							
							
							* move harness nightly files to test folder
* change workflow file path accordingly
* use arc01 when pr
* fix path
* fix fp16 csv path 
							
						 
						
							2024-02-23 11:12:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								30795bdfbc 
								
							 
						 
						
							
							
								
								Gemma optimization: rms_norm, kv_cache, fused_rope, fused_rope+qkv ( #10212 )  
							
							 
							
							... 
							
							
							
							* gemma optimization
* update
* update
* fix style
* meet code review 
							
						 
						
							2024-02-23 10:07:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
							
							
								
							
							
								63681af97e 
								
							 
						 
						
							
							
								
								falcon for transformers 4.36 ( #9960 )  
							
							 
							
							... 
							
							
							
							* falcon for transformers 4.36 
							
						 
						
							2024-02-22 17:04:40 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								84d5f40936 
								
							 
						 
						
							
							
								
								Update README.md ( #10213 )  
							
							 
							
							
							
						 
						
							2024-02-22 17:22:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								ce5840a8b7 
								
							 
						 
						
							
							
								
								GPT-J rope optimization on xpu ( #10182 )  
							
							 
							
							... 
							
							
							
							* optimize
* update
* fix style & move use_fuse_rope
* add ipex version check
* fix style
* update
* fix style
* meet comments
* address comments
* fix style 
							
						 
						
							2024-02-22 16:25:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								f445217d02 
								
							 
						 
						
							
							
								
								LLM: Update IPEX to 2.2.0+cpu and Refactor for _ipex_optimize ( #10189 )  
							
							 
							
							... 
							
							
							
							Update IPEX to 2.2.0+cpu and refactor for _ipex_optimize. 
							
						 
						
							2024-02-22 16:01:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								c876d9b5ca 
								
							 
						 
						
							
							
								
								Support for MPT rotary embedding ( #10208 )  
							
							 
							
							
							
						 
						
							2024-02-22 15:16:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								5e1fee5e05 
								
							 
						 
						
							
							
								
								LLM: add GGUF-IQ2 examples ( #10207 )  
							
							 
							
							... 
							
							
							
							* add iq2 examples
* small fix
* meet code review
* fix
* meet review
* small fix 
							
						 
						
							2024-02-22 14:18:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								21de2613ce 
								
							 
						 
						
							
							
								
								[LLM] Add model loading time record for all-in-one benchmark ( #10201 )  
							
							 
							
							... 
							
							
							
							* Add model loading time record in csv for all-in-one benchmark
* Small fix
* Small fix to number after . 
							
						 
						
							2024-02-22 13:57:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								60e11b6739 
								
							 
						 
						
							
							
								
								LLM: Add mlp layer unit tests ( #10200 )  
							
							 
							
							... 
							
							
							
							* add mlp layer unit tests
* add download baichuan-13b
* exclude llama for now
* install additional packages
* rename bash file
* switch to Baichuan2
* delete attention related code
* fix name errors in yml file 
							
						 
						
							2024-02-22 13:44:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								ca1166a0e5 
								
							 
						 
						
							
							
								
								[LLM] Add quantize kv_cache for Baichuan2-13B ( #10203 )  
							
							 
							
							... 
							
							
							
							* add quantize kv_cache for baichuan2-13b
* style fix 
							
						 
						
							2024-02-22 13:43:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								34ee1aa91f 
								
							 
						 
						
							
							
								
								LLM: add esimd sdp support for chatglm3 ( #10205 )  
							
							 
							
							... 
							
							
							
							* add esimd sdp support
* fix style 
							
						 
						
							2024-02-22 13:37:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								7cbc2429a6 
								
							 
						 
						
							
							
								
								Fix C-Eval ChatGLM loading issue ( #10206 )  
							
							 
							
							... 
							
							
							
							* Add c-eval workflow and modify running files
* Modify the chatglm evaluator file
* Modify the ceval workflow for triggering test
* Modify the ceval workflow file
* Modify the ceval workflow file
* Modify ceval workflow
* Adjust the ceval dataset download
* Add ceval workflow dependencies
* Modify ceval workflow dataset download
* Add ceval test dependencies
* Add ceval test dependencies
* Correct the result print
* Fix the nightly test trigger time
* Fix ChatGLM loading issue 
							
						 
						
							2024-02-22 10:00:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								94cb16fe40 
								
							 
						 
						
							
							
								
								[LLM] Small updates to Win GPU Install Doc ( #10199 )  
							
							 
							
							... 
							
							
							
							* Make Offline installer as default for win gpu doc for oneAPI
* Small other fixes 
							
						 
						
							2024-02-21 17:58:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								9975b029c5 
								
							 
						 
						
							
							
								
								LLM: add qlora finetuning example using trl.SFTTrainer ( #10183 )  
							
							 
							
							
							
						 
						
							2024-02-21 16:40:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f7c96b19ef 
								
							 
						 
						
							
							
								
								LLM: support iq2 for mixtral ( #10191 )  
							
							 
							
							... 
							
							
							
							* support name mapping for mixtral
* support mixtral mixed quantization
* fix style
* fix 
							
						 
						
							2024-02-21 16:00:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								b1a97b71a9 
								
							 
						 
						
							
							
								
								Harness eval: Add is_last parameter and fix logical operator in highlight_vals ( #10192 )  
							
							 
							
							... 
							
							
							
							* Add is_last parameter and fix logical operator in highlight_vals
* Add script to update HTML files in parent folder
* Add running update_html_in_parent_folder.py in summarize step
* Add licence info
* Remove update_html_in_parent_folder.py in Summarize the results for pull request 
							
						 
						
							2024-02-21 14:45:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								c7e839e66c 
								
							 
						 
						
							
							
								
								Add Qwen1.5-7B-Chat ( #10113 )  
							
							 
							
							... 
							
							
							
							* add Qwen1.5-7B-Chat
* modify Qwen1.5 example
* update README
* update prompt format
* update folder name and example README
* add Chinese prompt sample output
* update link in README
* correct the link
* update transformer version 
							
						 
						
							2024-02-21 13:29:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								56ad781f2f 
								
							 
						 
						
							
							
								
								qwen2 cpu fix ( #10187 )  
							
							 
							
							
							
						 
						
							2024-02-21 11:23:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								39d37bd042 
								
							 
						 
						
							
							
								
								upgrade harness package version in workflow ( #10188 )  
							
							 
							
							... 
							
							
							
							* upgrade harness
* update readme 
							
						 
						
							2024-02-21 11:21:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								001c13243e 
								
							 
						 
						
							
							
								
								[LLM] Add support for low_low_bit benchmark on Windows GPU ( #10167 )  
							
							 
							
							... 
							
							
							
							* Add support for low_low_bit performance test on Windows GPU
* Small fix
* Small fix
* Save memory during converting model process
* Drop the results for first time when loading in low bit on mtl igpu for better performance
* Small fix 
							
						 
						
							2024-02-21 10:51:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								276ef0e885 
								
							 
						 
						
							
							
								
								Speculative Ziya on CPU ( #10160 )  
							
							 
							
							... 
							
							
							
							* Speculative Ziya on CPU
* Without part of Accelerate with BIGDL_OPT_IPEX 
							
						 
						
							2024-02-21 10:30:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								4fbf449c2d 
								
							 
						 
						
							
							
								
								for rwkv4 ( #10179 )  
							
							 
							
							
							
						 
						
							2024-02-21 10:11:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								de3dc609ee 
								
							 
						 
						
							
							
								
								Modify harness evaluation workflow ( #10174 )  
							
							 
							
							... 
							
							
							
							* Modify table head in harness
* Specify the file path of fp16.csv
* change run to run nightly and run pr to debug
* Modify the way to get fp16.csv to downloading from github
* Change the method to calculate diff in html table
* Change the method to calculate diff in html table
* Re-arrange job order
* Re-arrange job order
* Change limit
* Change fp16.csv  path
* Change highlight rules
* Change limit 
							
						 
						
							2024-02-20 18:55:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3288acb8de 
								
							 
						 
						
							
							
								
								LLM : Support embedding quantization (only q2k now) ( #10170 )  
							
							 
							
							... 
							
							
							
							* basic logic added
* basic support
* support save&load, update mixed strategy
* fix style
* use int8 for lm_head
* add check for xpu 
							
						 
						
							2024-02-20 16:56:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								6e10d98a8d 
								
							 
						 
						
							
							
								
								Fix some typos ( #10175 )  
							
							 
							
							... 
							
							
							
							* add llm-ppl workflow
* update the DATASET_DIR
* test multiple precisions
* modify nightly test
* match the updated ppl code
* add matrix.include
* fix the include error
* update the include
* add more model
* update the precision of include
* update nightly time and add more models
* fix the workflow_dispatch description, change default model of pr and modify the env
* modify workflow_dispatch language options
* modify options
* modify language options
* modeify workflow_dispatch type
* modify type
* modify the type of language
* change seq_len type
* fix some typos
* revert changes to stress_test.txt 
							
						 
						
							2024-02-20 14:14:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								add3899311 
								
							 
						 
						
							
							
								
								Add ziya CPU example ( #10114 )  
							
							 
							
							... 
							
							
							
							* ziya on CPU
* add README for ziya
* specify use_cache
* add arc CPU
* update prompt format
* update link
* add comments to emphasize use_cache
* update pip cmd 
							
						 
						
							2024-02-20 13:59:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2bb96c775c 
								
							 
						 
						
							
							
								
								LLM: fix device setting during saving optimized model ( #10154 )  
							
							 
							
							
							
						 
						
							2024-02-20 09:52:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								1f6d5b9f30 
								
							 
						 
						
							
							
								
								enable fused rmsnorm and rope qwen2 ( #10163 )  
							
							 
							
							... 
							
							
							
							* qwen2
* change convert
* cleanup 
							
						 
						
							2024-02-20 08:33:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								e31210ba00 
								
							 
						 
						
							
							
								
								Modify html table style and add fp16.csv in harness ( #10169 )  
							
							 
							
							... 
							
							
							
							* Specify the version of pandas in harness evaluation workflow
* Specify the version of pandas in harness evaluation workflow
* Modify html table style and add fp16.csv in harness
* Modify comments 
							
						 
						
							2024-02-19 18:13:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								6c09aed90d 
								
							 
						 
						
							
							
								
								LLM: add qwen_1.5_7b model for arc perf test ( #10166 )  
							
							 
							
							... 
							
							
							
							* LLM: add qwen_1.5_7b model for arc perf test
* small fix
* revert some codes 
							
						 
						
							2024-02-19 17:21:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								209122559a 
								
							 
						 
						
							
							
								
								Add Ceval workflow and modify the result printing ( #10140 )  
							
							 
							
							... 
							
							
							
							* Add c-eval workflow and modify running files
* Modify the chatglm evaluator file
* Modify the ceval workflow for triggering test
* Modify the ceval workflow file
* Modify the ceval workflow file
* Modify ceval workflow
* Adjust the ceval dataset download
* Add ceval workflow dependencies
* Modify ceval workflow dataset download
* Add ceval test dependencies
* Add ceval test dependencies
* Correct the result print 
							
						 
						
							2024-02-19 17:06:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								f8730e8dc1 
								
							 
						 
						
							
							
								
								Skip rescale rwkv linear when load_low_bit ( #10164 )  
							
							 
							
							... 
							
							
							
							* rwkv_ld 
							
						 
						
							2024-02-19 15:56:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								3e2af5ec0a 
								
							 
						 
						
							
							
								
								Fix IPEX Baichuan Speculative ( #10162 )  
							
							 
							
							... 
							
							
							
							* Fix IPEX Baichuan Speculative
* compatible with 13B
* Update speculative.py 
							
						 
						
							2024-02-19 15:27:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								23c91cdce6 
								
							 
						 
						
							
							
								
								[LLM] Add min_step_draft in speculative decoding ( #10142 )  
							
							 
							
							... 
							
							
							
							* Fix gptj kvcache & position id
* Add min_draft_tokens in speculative decoding
* fix style
* update 
							
						 
						
							2024-02-19 14:31:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								14ba2c5135 
								
							 
						 
						
							
							
								
								Harness: remove deprecated files ( #10165 )  
							
							 
							
							
							
						 
						
							2024-02-19 14:27:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								d3591383d5 
								
							 
						 
						
							
							
								
								LLM : Add CPU chatglm3 speculative example ( #10004 )  
							
							 
							
							... 
							
							
							
							* init chatglm
* update
* update 
							
						 
						
							2024-02-19 13:38:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								f2417e083c 
								
							 
						 
						
							
							
								
								LLM: enable chatglm3-6b target_model ipex ( #10085 )  
							
							 
							
							... 
							
							
							
							* init
* always make casual_mask
* not return last tensor
* update
* optimize_model = False
* enable optimized=False
* enable optimized_model=true
* speed_up ipex target_model
* remove if True
* use group_size
* update python style
* update
* update 
							
						 
						
							2024-02-19 13:38:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								177273c1a4 
								
							 
						 
						
							
							
								
								IPEX Speculative Support for Baichuan2 7B ( #10112 )  
							
							 
							
							... 
							
							
							
							* IPEX Speculative Support for Baichuan2 7B
* fix license problems
* refine 
							
						 
						
							2024-02-19 09:12:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								1508d6b089 
								
							 
						 
						
							
							
								
								Fix gptj kvcache & position id ( #10141 )  
							
							 
							
							
							
						 
						
							2024-02-18 10:02:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								b4dc33def6 
								
							 
						 
						
							
							
								
								In harness-evaluation workflow, add statistical tables ( #10118 )  
							
							 
							
							... 
							
							
							
							* chnage storage
* fix typo
* change label
* change label to arc03
* change needs in the last step
* add generate csv in harness/make_table_results.py
* modify needs in the last job
* add csv to html
* mfix path issue in llm-harness-summary-nightly
* modify output_path
* modify args in make_table_results.py
* modify make table command in summary
* change pr env label
* remove irrelevant code in summary; add set output path step; add limit in harness run
* re-organize code structure
* modify limit in run harness
* modify csv_to_html input path
* modify needs in summary-nightly 
							
						 
						
							2024-02-08 19:01:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4d33aac7f9 
								
							 
						 
						
							
							
								
								quick fix qwen2 fp8 kv cache ( #10135 )  
							
							 
							
							
							
						 
						
							2024-02-08 17:04:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								39d90839aa 
								
							 
						 
						
							
							
								
								LLM: add quantize kv cache for llama. ( #10086 )  
							
							 
							
							... 
							
							
							
							* feat: add quantize kv cache for llama.
* fix style.
* add quantized attention forward function.
* revert style.
* fix style.
* fix style.
* update quantized kv cache and add quantize_qkv
* fix style.
* fix style.
* optimize quantize kv cache.
* fix style. 
							
						 
						
							2024-02-08 16:49:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d848efe17c 
								
							 
						 
						
							
							
								
								add quantize kv cache support for qwen2 ( #10134 )  
							
							 
							
							
							
						 
						
							2024-02-08 16:17:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								3f79128ed7 
								
							 
						 
						
							
							
								
								[LLM] Enable kv_cache optimization for Qwen2 on transformers-v4.37.0 ( #10131 )  
							
							 
							
							... 
							
							
							
							* add support for kv_cache optimization on transformers-v4.37.0
* enable attention forward
* style fix
* disable rotary for now 
							
						 
						
							2024-02-08 14:20:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								063dc145ac 
								
							 
						 
						
							
							
								
								LLM: basic support for q2k ( #10132 )  
							
							 
							
							... 
							
							
							
							* basic support for q2k
* fix style 
							
						 
						
							2024-02-08 13:52:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								11fe5a87ec 
								
							 
						 
						
							
							
								
								LLM: add Modelscope model example ( #10126 )  
							
							 
							
							
							
						 
						
							2024-02-08 11:18:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								0cf6a12691 
								
							 
						 
						
							
							
								
								LLM: add default torch_dtype for fp16. ( #10124 )  
							
							 
							
							... 
							
							
							
							* set default torch_dtype for fp16.
* fix style.
* bug fix.
* update bug fix. 
							
						 
						
							2024-02-08 10:24:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1aa0c623ce 
								
							 
						 
						
							
							
								
								disable fused layer norm on UHD ( #10130 )  
							
							 
							
							
							
						 
						
							2024-02-08 10:20:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								a8450fc300 
								
							 
						 
						
							
							
								
								[LLM] Support MLP optimization for Qwen1.5 ( #10123 )  
							
							 
							
							
							
						 
						
							2024-02-08 09:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								81ed65fbe7 
								
							 
						 
						
							
							
								
								[LLM] Add qwen1.5-7B in iGPU perf ( #10127 )  
							
							 
							
							... 
							
							
							
							* Add qwen1.5 test config yaml with transformers 4.37.0
* Update for yaml file 
							
						 
						
							2024-02-07 22:31:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0fcfbfaf6f 
								
							 
						 
						
							
							
								
								LLM: add rwkv5 eagle GPU HF example ( #10122 )  
							
							 
							
							... 
							
							
							
							* LLM: add rwkv5 eagle example
* fix
* fix link 
							
						 
						
							2024-02-07 16:58:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								925f82107e 
								
							 
						 
						
							
							
								
								LLM: support models hosted by modelscope ( #10106 )  
							
							 
							
							
							
						 
						
							2024-02-07 16:46:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								c1ec3d8921 
								
							 
						 
						
							
							
								
								LLM: update FAQ about too many open files ( #10119 )  
							
							 
							
							
							
						 
						
							2024-02-07 15:02:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								2e80701f58 
								
							 
						 
						
							
							
								
								Unit test on final logits and the logits of the last attention layer ( #10093 )  
							
							 
							
							... 
							
							
							
							* Add unit test on final logits and attention
* Add unit test on final logits and attention
* Modify unit test on final logits and attention 
							
						 
						
							2024-02-07 14:25:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								3832eb0ce0 
								
							 
						 
						
							
							
								
								Add ChatGLM C-Eval Evaluator ( #10095 )  
							
							 
							
							... 
							
							
							
							* Add ChatGLM ceval evaluator
* Modify ChatGLM Evaluator Reference 
							
						 
						
							2024-02-07 11:27:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								63050c954d 
								
							 
						 
						
							
							
								
								fix ( #10117 )  
							
							 
							
							
							
						 
						
							2024-02-07 11:05:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								d3d2ee1b63 
								
							 
						 
						
							
							
								
								LLM: add speech T5 GPU example ( #10090 )  
							
							 
							
							... 
							
							
							
							* add speech t5 example
* fix
* fix 
							
						 
						
							2024-02-07 10:50:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								2f4c754759 
								
							 
						 
						
							
							
								
								LLM: add bark gpu example ( #10091 )  
							
							 
							
							... 
							
							
							
							* add bark gpu example
* fix
* fix license
* add bark
* add example
* fix
* another way 
							
						 
						
							2024-02-07 10:47:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								8953acd7d6 
								
							 
						 
						
							
							
								
								[LLM] Fix log condition for BIGDL_OPT_IPEX ( #10115 )  
							
							 
							
							... 
							
							
							
							Fix log condition for BIGDL_OPT_IPEX 
							
						 
						
							2024-02-07 10:27:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0eccb94d75 
								
							 
						 
						
							
							
								
								remove text-generation-webui from bigdl repo ( #10107 )  
							
							 
							
							
							
						 
						
							2024-02-06 17:46:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								2aaa21c41d 
								
							 
						 
						
							
							
								
								LLM: Update ppl tests ( #10092 )  
							
							 
							
							... 
							
							
							
							* update ppl tests
* use load_dataset api
* add exception handling
* add language argument
* address comments 
							
						 
						
							2024-02-06 17:31:48 +08:00