Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								39d90839aa 
								
							 
						 
						
							
							
								
								LLM: add quantize kv cache for llama. ( #10086 )  
							
							 
							
							... 
							
							
							
							* feat: add quantize kv cache for llama.
* fix style.
* add quantized attention forward function.
* revert style.
* fix style.
* fix style.
* update quantized kv cache and add quantize_qkv
* fix style.
* fix style.
* optimize quantize kv cache.
* fix style. 
							
						 
						
							2024-02-08 16:49:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d848efe17c 
								
							 
						 
						
							
							
								
								add quantize kv cache support for qwen2 ( #10134 )  
							
							 
							
							
							
						 
						
							2024-02-08 16:17:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								3f79128ed7 
								
							 
						 
						
							
							
								
								[LLM] Enable kv_cache optimization for Qwen2 on transformers-v4.37.0 ( #10131 )  
							
							 
							
							... 
							
							
							
							* add support for kv_cache optimization on transformers-v4.37.0
* enable attention forward
* style fix
* disable rotary for now 
							
						 
						
							2024-02-08 14:20:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								063dc145ac 
								
							 
						 
						
							
							
								
								LLM: basic support for q2k ( #10132 )  
							
							 
							
							... 
							
							
							
							* basic support for q2k
* fix style 
							
						 
						
							2024-02-08 13:52:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								11fe5a87ec 
								
							 
						 
						
							
							
								
								LLM: add Modelscope model example ( #10126 )  
							
							 
							
							
							
						 
						
							2024-02-08 11:18:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								0cf6a12691 
								
							 
						 
						
							
							
								
								LLM: add default torch_dtype for fp16. ( #10124 )  
							
							 
							
							... 
							
							
							
							* set default torch_dtype for fp16.
* fix style.
* bug fix.
* update bug fix. 
							
						 
						
							2024-02-08 10:24:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1aa0c623ce 
								
							 
						 
						
							
							
								
								disable fused layer norm on UHD ( #10130 )  
							
							 
							
							
							
						 
						
							2024-02-08 10:20:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								a8450fc300 
								
							 
						 
						
							
							
								
								[LLM] Support MLP optimization for Qwen1.5 ( #10123 )  
							
							 
							
							
							
						 
						
							2024-02-08 09:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								81ed65fbe7 
								
							 
						 
						
							
							
								
								[LLM] Add qwen1.5-7B in iGPU perf ( #10127 )  
							
							 
							
							... 
							
							
							
							* Add qwen1.5 test config yaml with transformers 4.37.0
* Update for yaml file 
							
						 
						
							2024-02-07 22:31:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								a7f9a13f6e 
								
							 
						 
						
							
							
								
								Enhance gpu doc with PIP install oneAPI ( #10109 )  
							
							 
							
							... 
							
							
							
							* Add pip install oneapi instructions
* Fixes
* Add instruction for oneapi2023
* Runtime config
* Fixes
* Remove "Currently, oneAPI installed with .. "
* Add pip package version for oneAPI 2024
* Reviewer comments
* Fix errors 
							
						 
						
							2024-02-07 21:14:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								b4c327ea78 
								
							 
						 
						
							
							
								
								Llm ppl workflow bug fix ( #10128 )  
							
							 
							
							... 
							
							
							
							* add llm-ppl workflow
* update the DATASET_DIR
* test multiple precisions
* modify nightly test
* match the updated ppl code
* add matrix.include
* fix the include error
* update the include
* add more model
* update the precision of include
* update nightly time and add more models
* fix the workflow_dispatch description, change default model of pr and modify the env
* modify workflow_dispatch language options
* modify options
* modify language options
* modeify workflow_dispatch type
* modify type
* modify the type of language
* change seq_len type 
							
						 
						
							2024-02-07 18:48:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								76bd792ff1 
								
							 
						 
						
							
							
								
								Fix llm ppl workflow workflow_dispatch bugs ( #10125 )  
							
							 
							
							... 
							
							
							
							* add llm-ppl workflow
* update the DATASET_DIR
* test multiple precisions
* modify nightly test
* match the updated ppl code
* add matrix.include
* fix the include error
* update the include
* add more model
* update the precision of include
* update nightly time and add more models
* fix the workflow_dispatch description, change default model of pr and modify the env
* modify workflow_dispatch language options
* modify options
* modify language options 
							
						 
						
							2024-02-07 17:41:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0fcfbfaf6f 
								
							 
						 
						
							
							
								
								LLM: add rwkv5 eagle GPU HF example ( #10122 )  
							
							 
							
							... 
							
							
							
							* LLM: add rwkv5 eagle example
* fix
* fix link 
							
						 
						
							2024-02-07 16:58:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								9f5a86f9db 
								
							 
						 
						
							
							
								
								fix OpenSSF Token-Permissions issues ( #10121 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: Your Name <Your Email> 
							
						 
						
							2024-02-07 16:51:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								925f82107e 
								
							 
						 
						
							
							
								
								LLM: support models hosted by modelscope ( #10106 )  
							
							 
							
							
							
						 
						
							2024-02-07 16:46:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								1710ecb990 
								
							 
						 
						
							
							
								
								Add llm-ppl workflow ( #10074 )  
							
							 
							
							... 
							
							
							
							* add llm-ppl workflow
* update the DATASET_DIR
* test multiple precisions
* modify nightly test
* match the updated ppl code
* add matrix.include
* fix the include error
* update the include
* add more model
* update the precision of include
* update nightly time and add more models
* fix the workflow_dispatch description, change default model of pr and modify the env 
							
						 
						
							2024-02-07 16:29:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								c1ec3d8921 
								
							 
						 
						
							
							
								
								LLM: update FAQ about too many open files ( #10119 )  
							
							 
							
							
							
						 
						
							2024-02-07 15:02:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								2e80701f58 
								
							 
						 
						
							
							
								
								Unit test on final logits and the logits of the last attention layer ( #10093 )  
							
							 
							
							... 
							
							
							
							* Add unit test on final logits and attention
* Add unit test on final logits and attention
* Modify unit test on final logits and attention 
							
						 
						
							2024-02-07 14:25:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								3832eb0ce0 
								
							 
						 
						
							
							
								
								Add ChatGLM C-Eval Evaluator ( #10095 )  
							
							 
							
							... 
							
							
							
							* Add ChatGLM ceval evaluator
* Modify ChatGLM Evaluator Reference 
							
						 
						
							2024-02-07 11:27:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								5e9710cec4 
								
							 
						 
						
							
							
								
								Update threshold for cpu stable version tests ( #10108 )  
							
							 
							
							... 
							
							
							
							* update threshold
* update
* test
* update
* update
* revert
* revert
---------
Co-authored-by: Your Name <Your Email> 
							
						 
						
							2024-02-07 11:21:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								63050c954d 
								
							 
						 
						
							
							
								
								fix ( #10117 )  
							
							 
							
							
							
						 
						
							2024-02-07 11:05:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								d3d2ee1b63 
								
							 
						 
						
							
							
								
								LLM: add speech T5 GPU example ( #10090 )  
							
							 
							
							... 
							
							
							
							* add speech t5 example
* fix
* fix 
							
						 
						
							2024-02-07 10:50:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								2f4c754759 
								
							 
						 
						
							
							
								
								LLM: add bark gpu example ( #10091 )  
							
							 
							
							... 
							
							
							
							* add bark gpu example
* fix
* fix license
* add bark
* add example
* fix
* another way 
							
						 
						
							2024-02-07 10:47:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								8953acd7d6 
								
							 
						 
						
							
							
								
								[LLM] Fix log condition for BIGDL_OPT_IPEX ( #10115 )  
							
							 
							
							... 
							
							
							
							Fix log condition for BIGDL_OPT_IPEX 
							
						 
						
							2024-02-07 10:27:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								3f60e9df89 
								
							 
						 
						
							
							
								
								Merge pull request  #10101  from pengyb2001/eval_stat  
							
							 
							
							... 
							
							
							
							Modify harness evaluation workflow 
							
						 
						
							2024-02-07 00:02:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								f63eba6c5a 
								
							 
						 
						
							
							
								
								change pr test machine  
							
							 
							
							
							
						 
						
							2024-02-06 23:35:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								e627727b4b 
								
							 
						 
						
							
							
								
								change download path  
							
							 
							
							
							
						 
						
							2024-02-06 21:12:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								2c4e610743 
								
							 
						 
						
							
							
								
								remove irrelevant code  
							
							 
							
							
							
						 
						
							2024-02-06 20:12:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								e2233dddef 
								
							 
						 
						
							
							
								
								Update README ( #10111 )  
							
							 
							
							
							
						 
						
							2024-02-06 19:29:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0eccb94d75 
								
							 
						 
						
							
							
								
								remove text-generation-webui from bigdl repo ( #10107 )  
							
							 
							
							
							
						 
						
							2024-02-06 17:46:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								2aaa21c41d 
								
							 
						 
						
							
							
								
								LLM: Update ppl tests ( #10092 )  
							
							 
							
							... 
							
							
							
							* update ppl tests
* use load_dataset api
* add exception handling
* add language argument
* address comments 
							
						 
						
							2024-02-06 17:31:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								3a46b57253 
								
							 
						 
						
							
							
								
								[LLM] Add RWKV4 HF GPU Example ( #10105 )  
							
							 
							
							... 
							
							
							
							* Add GPU HF example for RWKV 4
* Add link to rwkv4
* fix 
							
						 
						
							2024-02-06 16:30:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								518ef95abc 
								
							 
						 
						
							
							
								
								Small fix for Nonetype error ( #10104 )  
							
							 
							
							
							
						 
						
							2024-02-06 14:58:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d61f4905ac 
								
							 
						 
						
							
							
								
								LLM: 2bit quantization initial support ( #10042 )  
							
							 
							
							... 
							
							
							
							* basis quantize support
* fix new module name
* small update
* and mixed int4 with iq2_xxs
* remove print
* code refactor
* fix style
* meet code review 
							
						 
						
							2024-02-06 14:58:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								d11ef0d117 
								
							 
						 
						
							
							
								
								remove retry in llm install part  
							
							 
							
							
							
						 
						
							2024-02-06 14:25:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								94723bb0b1 
								
							 
						 
						
							
							
								
								add retry in run llm install part;test arc05 with llama2  
							
							 
							
							
							
						 
						
							2024-02-06 14:09:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								2c75b5b981 
								
							 
						 
						
							
							
								
								remove mistral in pr job  
							
							 
							
							
							
						 
						
							2024-02-06 13:51:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								5edefe7d8e 
								
							 
						 
						
							
							
								
								remove nightly summary job  
							
							 
							
							
							
						 
						
							2024-02-06 13:50:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								f440cb4fba 
								
							 
						 
						
							
							
								
								Update Self-Speculative Decoding Readme ( #10102 )  
							
							 
							
							
							
						 
						
							2024-02-06 12:59:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									pengyb2001 
								
							 
						 
						
							
							
							
							
								
							
							
								bc92dbf7be 
								
							 
						 
						
							
							
								
								remove stableml;change schedule;change storage method  
							
							 
							
							
							
						 
						
							2024-02-06 11:20:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								36c9442c6d 
								
							 
						 
						
							
							
								
								Arc Stable version test ( #10087 )  
							
							 
							
							... 
							
							
							
							* add batch_size in stable version test
* add batch_size in excludes
* add excludes for batch_size
* fix ci
* triger regression test
* fix xpu version
* disable ci
* address kai's comment
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-02-06 10:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								33b9e7744d 
								
							 
						 
						
							
							
								
								fix dimension ( #10097 )  
							
							 
							
							
							
						 
						
							2024-02-05 15:07:38 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								4b02ff188b 
								
							 
						 
						
							
							
								
								[WebUI] Add prompt format and stopping words for Qwen ( #10066 )  
							
							 
							
							... 
							
							
							
							* add prompt format and stopping_words for qwen mdoel
* performance optimization
* optimize
* update
* meet comments 
							
						 
						
							2024-02-05 18:23:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								0aecd8637b 
								
							 
						 
						
							
							
								
								LLM: small fix for the html script ( #10094 )  
							
							 
							
							
							
						 
						
							2024-02-05 17:27:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								7d2be7994f 
								
							 
						 
						
							
							
								
								add phixtral and optimize phi-moe ( #10052 )  
							
							 
							
							
							
						 
						
							2024-02-05 11:12:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								676d6923f2 
								
							 
						 
						
							
							
								
								LLM: modify transformersembeddings.embed() in langchain ( #10051 )  
							
							 
							
							
							
						 
						
							2024-02-05 10:42:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								ad050107b3 
								
							 
						 
						
							
							
								
								LLM: fix mpt load_low_bit issue ( #10075 )  
							
							 
							
							... 
							
							
							
							* fix
* retry
* retry 
							
						 
						
							2024-02-05 10:17:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Lilac09 
								
							 
						 
						
							
							
							
							
								
							
							
								f8dcaff7f4 
								
							 
						 
						
							
							
								
								use default python ( #10070 )  
							
							 
							
							
							
						 
						
							2024-02-05 09:06:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								9050991e4e 
								
							 
						 
						
							
							
								
								fix gradio check issue temply ( #10082 )  
							
							 
							
							
							
						 
						
							2024-02-04 16:46:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								c2e562d037 
								
							 
						 
						
							
							
								
								LLM: add batch_size to the csv and html ( #10080 )  
							
							 
							
							... 
							
							
							
							* LLM: add batch_size to the csv and html
* small fix 
							
						 
						
							2024-02-04 16:35:44 +08:00