Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								a8c866c32b 
								
							 
						 
						
							
							
								
								add ppl benchmark ( #9914 )  
							
							 
							
							... 
							
							
							
							* add ppl benchmark
* add license
* add readme
* add dataset argument
* add dataset usage
* fixed low bit args
* correct result
* fix terminal display
* fix ppl update
* enable fp16 fp32 bf16
* format the desc
* fix model_kwargs
* add more readme 
							
						 
						
							2024-01-18 17:54:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								100e0a87e5 
								
							 
						 
						
							
							
								
								LLM: add compressed chatglm3 model ( #9892 )  
							
							 
							
							... 
							
							
							
							* LLM: add compressed chatglm3 model
* small fix
* revert github action 
							
						 
						
							2024-01-18 17:48:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								9e2ac5291b 
								
							 
						 
						
							
							
								
								Add rwkv v4 back for igpu perf test 32-512 ( #9938 )  
							
							 
							
							
							
						 
						
							2024-01-18 17:15:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7bbb98abb6 
								
							 
						 
						
							
							
								
								Disable fused layer norm when using XMX to fix mpt UT ( #9933 )  
							
							 
							
							
							
						 
						
							2024-01-18 16:22:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								1fc9dfa265 
								
							 
						 
						
							
							
								
								LLM: Update for  Qwen n tokens inputs ( #9931 )  
							
							 
							
							... 
							
							
							
							* update for n tokens inputs
* update style
* update 
							
						 
						
							2024-01-18 15:56:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								5184f400f9 
								
							 
						 
						
							
							
								
								Fix Mixtral GGUF Wrong Output Issue ( #9930 )  
							
							 
							
							... 
							
							
							
							* Fix Mixtral GGUF Wrong Output Issue
* fix style
* fix style 
							
						 
						
							2024-01-18 14:11:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								453df868c9 
								
							 
						 
						
							
							
								
								add rwkv v5 attention kernel ( #9927 )  
							
							 
							
							
							
						 
						
							2024-01-18 10:16:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								054952f82f 
								
							 
						 
						
							
							
								
								LLM: Fix rope of chatglm3 to support speculative decoding on CPU ( #9926 )  
							
							 
							
							
							
						 
						
							2024-01-18 09:28:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								18cd1f1432 
								
							 
						 
						
							
							
								
								[LLM]Solve the problem of calling bmm operator in BF16Linear ( #9924 )  
							
							 
							
							... 
							
							
							
							* Solve the problem of calling bmm operator in BF16Linear 
							
						 
						
							2024-01-17 18:08:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								98b86f83d4 
								
							 
						 
						
							
							
								
								Support fast rope for training ( #9745 )  
							
							 
							
							... 
							
							
							
							* init
* init
* fix style
* add test and fix
* address comment
* update
* merge upstream main 
							
						 
						
							2024-01-17 15:51:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								0c498a7b64 
								
							 
						 
						
							
							
								
								Add llama2-13b to igpu perf test ( #9920 )  
							
							 
							
							
							
						 
						
							2024-01-17 14:58:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b059a32fff 
								
							 
						 
						
							
							
								
								LLM: add benchmark api for bigdl-llm fp16 on GPU ( #9919 )  
							
							 
							
							... 
							
							
							
							* add bmk for bigdl fp16
* fix 
							
						 
						
							2024-01-17 14:24:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								427f75000b 
								
							 
						 
						
							
							
								
								LLM: fix sdp of chatglm3 ( #9917 )  
							
							 
							
							... 
							
							
							
							* fix
* fix
* fix 
							
						 
						
							2024-01-17 13:37:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								94767da7cf 
								
							 
						 
						
							
							
								
								optimize rwkv v4 first token performance ( #9912 )  
							
							 
							
							
							
						 
						
							2024-01-17 09:27:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								511cbcf773 
								
							 
						 
						
							
							
								
								LLM: add Ceval benchmark test. ( #9872 )  
							
							 
							
							... 
							
							
							
							* init ceval benchmark test.
* upload dataset.
* add other tests.
* add qwen evaluator.
* fix qwen evaluator style.
* fix qwen evaluator style.
* update qwen evaluator.
* add llama evaluator.
* update eval
* fix typo.
* fix
* fix typo.
* fix llama evaluator.
* fix bug.
* fix style.
* delete dataset.
* fix style.
* fix style.
* add README.md and fix typo.
* fix comments.
* remove run scripts 
							
						 
						
							2024-01-16 19:14:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								b909c5c9c2 
								
							 
						 
						
							
							
								
								GGUF load memory optimization ( #9913 )  
							
							 
							
							... 
							
							
							
							* block-wise
* convert linear for module
* revert
* Fix PEP8 checks Error 
							
						 
						
							2024-01-16 18:54:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								8643b62521 
								
							 
						 
						
							
							
								
								[LLM] Support longer context in iGPU perf tests (2048-256)  ( #9910 )  
							
							 
							
							
							
						 
						
							2024-01-16 17:48:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								dee32f7d15 
								
							 
						 
						
							
							
								
								copy fused rms norm's reuslt to avoid <unk> ( #9909 )  
							
							 
							
							
							
						 
						
							2024-01-16 16:54:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								8d7326ae03 
								
							 
						 
						
							
							
								
								LLM: fix chatglm3 sdp to support speculative decoding ( #9900 )  
							
							 
							
							... 
							
							
							
							* fix chatglm3
* fix
* update
* meet code review
* fix 
							
						 
						
							2024-01-16 11:29:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								9f34da7cdb 
								
							 
						 
						
							
							
								
								Update PVC XMX condition ( #9901 )  
							
							 
							
							... 
							
							
							
							* update pvc xmx condition
* update condition
* update conditon 
							
						 
						
							2024-01-15 15:42:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6637860ddf 
								
							 
						 
						
							
							
								
								change xmx condition ( #9896 )  
							
							 
							
							
							
						 
						
							2024-01-12 19:51:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								0e69bfe6b0 
								
							 
						 
						
							
							
								
								LLM: fix the performance drop of starcoder ( #9889 )  
							
							 
							
							... 
							
							
							
							* LLM: fix the performance drop of starcoder
* small fix
* small fix 
							
						 
						
							2024-01-12 09:14:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d9cf55bce9 
								
							 
						 
						
							
							
								
								LLM: fix MLP check of mixtral ( #9891 )  
							
							 
							
							
							
						 
						
							2024-01-11 18:01:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4f4ce73f31 
								
							 
						 
						
							
							
								
								[LLM] Add transformer_autocast_bf16 into all-in-one ( #9890 )  
							
							 
							
							... 
							
							
							
							* Add transformer_autocast_bf16 into all-in-one 
							
						 
						
							2024-01-11 17:51:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4af88a67b9 
								
							 
						 
						
							
							
								
								support chatglm3 with bf16 ( #9888 )  
							
							 
							
							... 
							
							
							
							* support chatglm3 with bigdl-bf16 
							
						 
						
							2024-01-11 16:45:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								0aef35a965 
								
							 
						 
						
							
							
								
								[LLM] Improve LLM doc regarding windows gpu related info ( #9880 )  
							
							 
							
							... 
							
							
							
							* Improve runtime configuration for windows
* Add python 310/311 supports for wheel downloading
* Add troubleshooting for windows gpu
* Remove manually import ipex due to auto importer
* Add info regarding cpu_embedding=True on iGPU
* More info for Windows users
* Small updates to API docs
* Python style fix
* Remove tip for loading from saved optimize_model for now
* Updated based on comments
* Update win info for multi-intel gpus selection
* Small fix
* Small fix 
							
						 
						
							2024-01-11 14:37:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinyi Wan 
								
							 
						 
						
							
							
							
							
								
							
							
								07485eff5a 
								
							 
						 
						
							
							
								
								Add SOLAR-10.7B to README ( #9869 )  
							
							 
							
							
							
						 
						
							2024-01-11 14:28:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								33fd1f9c76 
								
							 
						 
						
							
							
								
								LLM: fix input length logic for run_transformer_int4_gpu ( #9864 )  
							
							 
							
							... 
							
							
							
							* LLM: fix input length logic for run_transformer_int4_gpu
* small fix
* small fix
* small fix 
							
						 
						
							2024-01-10 18:20:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								53531ae4ee 
								
							 
						 
						
							
							
								
								LLM: support qkv fusion for fp8e5 ( #9878 )  
							
							 
							
							... 
							
							
							
							* update
* add mistral
* meet code review 
							
						 
						
							2024-01-10 17:50:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Lilac09 
								
							 
						 
						
							
							
							
							
								
							
							
								cb32b985ec 
								
							 
						 
						
							
							
								
								add mistral and chatglm support to vllm ( #9879 )  
							
							 
							
							... 
							
							
							
							* add mistral and chatglm support to vllm
* add mistral and chatglm support to vllm 
							
						 
						
							2024-01-10 15:38:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								e76d984164 
								
							 
						 
						
							
							
								
								[LLM] Support llm-awq vicuna-7b-1.5 on arc ( #9874 )  
							
							 
							
							... 
							
							
							
							* support llm-awq vicuna-7b-1.5 on arc
* support llm-awq vicuna-7b-1.5 on arc 
							
						 
						
							2024-01-10 14:28:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3e05c9e11b 
								
							 
						 
						
							
							
								
								LLM: update esimd sdp kernel ( #9871 )  
							
							 
							
							
							
						 
						
							2024-01-09 18:10:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								023679459e 
								
							 
						 
						
							
							
								
								[LLM] Small fixes for finetune related examples and UTs ( #9870 )  
							
							 
							
							
							
						 
						
							2024-01-09 18:05:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								b2aa267f50 
								
							 
						 
						
							
							
								
								Enhance LLM GPU installation document ( #9828 )  
							
							 
							
							... 
							
							
							
							* Improve gpu install doc
* Add troubleshooting - setvars.sh not done properly.
* Further improvements
* 2024.x.x -> 2024.0
* Fixes
* Fix Install BigDL-LLM From Wheel : bigdl-llm[xpu_2.0]
* Remove "export USE_XETLA=OFF" for Max GPU 
							
						 
						
							2024-01-09 16:30:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								23fc888abe 
								
							 
						 
						
							
							
								
								Update llm gpu xpu default related info to PyTorch 2.1 ( #9866 )  
							
							 
							
							
							
						 
						
							2024-01-09 15:38:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								36496d60ac 
								
							 
						 
						
							
							
								
								only use quantize kv cache on MTL ( #9862 )  
							
							 
							
							
							
						 
						
							2024-01-09 13:24:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								146076bdb5 
								
							 
						 
						
							
							
								
								Support llm-awq backend ( #9856 )  
							
							 
							
							... 
							
							
							
							* Support for LLM-AWQ Backend
* fix
* Update README.md
* Add awqconfig
* modify init
* update
* support llm-awq
* fix style
* fix style
* update
* fix AwqBackendPackingMethod not found error
* fix style
* update README
* fix style
---------
Co-authored-by: Uxito-Ada <414416158@qq.com>
Co-authored-by: Heyang Sun <60865256+Uxito-Ada@users.noreply.github.com>
Co-authored-by: cyita <yitastudy@gmail.com> 
							
						 
						
							2024-01-09 13:07:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								fea6f16057 
								
							 
						 
						
							
							
								
								LLM: add mlp fusion for fp8e5 and update related check ( #9860 )  
							
							 
							
							... 
							
							
							
							* update mlp fusion
* fix style
* update 
							
						 
						
							2024-01-09 09:56:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								294fd32787 
								
							 
						 
						
							
							
								
								LLM: update DeepSpeed AutoTP example with GPU memory optimization ( #9823 )  
							
							 
							
							
							
						 
						
							2024-01-09 09:22:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								5ba1dc38d4 
								
							 
						 
						
							
							
								
								[LLM] Change default Linux GPU install option to PyTorch 2.1 ( #9858 )  
							
							 
							
							... 
							
							
							
							* Update default xpu to ipex 2.1
* Update related install ut support correspondingly
* Add arc ut tests for both ipex 2.0 and 2.1
* Small fix
* Diable ipex 2.1 test for now as oneapi 2024.0 has not beed installed on the test machine
* Update document for default PyTorch 2.1
* Small fix
* Small fix
* Small doc fixes
* Small fixes 
							
						 
						
							2024-01-08 17:16:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Mingyu Wei 
								
							 
						 
						
							
							
							
							
								
							
							
								ed81baa35e 
								
							 
						 
						
							
							
								
								LLM: Use default typing-extension in LangChain examples ( #9857 )  
							
							 
							
							... 
							
							
							
							* remove typing extension downgrade in readme; minor fixes of code
* fix typos in README
* change default question of docqa.py 
							
						 
						
							2024-01-08 16:50:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3b6372ab12 
								
							 
						 
						
							
							
								
								Fix Llama transformers 4.36 support ( #9852 )  
							
							 
							
							... 
							
							
							
							* supoort 4.36
* style
* update
* update
* update
* fix merge
* update 
							
						 
						
							2024-01-08 00:32:23 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								1b585b0d40 
								
							 
						 
						
							
							
								
								set fp8 default as e5m2 ( #9859 )  
							
							 
							
							
							
						 
						
							2024-01-08 15:53:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								dc995006cc 
								
							 
						 
						
							
							
								
								LLM: add flash attention for mistral / mixtral ( #9846 )  
							
							 
							
							... 
							
							
							
							* add flash attention for mistral
* update
* add flash attn for mixtral
* fix style 
							
						 
						
							2024-01-08 09:51:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								afaa871144 
								
							 
						 
						
							
							
								
								[LLM] support quantize kv cache to fp8 ( #9812 )  
							
							 
							
							
							
						 
						
							2024-01-08 09:28:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								248ae7fad2 
								
							 
						 
						
							
							
								
								LLama optimize_model to support transformers 4.36 ( #9818 )  
							
							 
							
							... 
							
							
							
							* supoort 4.36
* style
* update
* update
* update 
							
						 
						
							2024-01-05 11:30:18 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a60bda3324 
								
							 
						 
						
							
							
								
								LLM: update check for deepspeed ( #9838 )  
							
							 
							
							
							
						 
						
							2024-01-05 16:44:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								16433dd959 
								
							 
						 
						
							
							
								
								LLM: fix first token judgement of flash attention ( #9841 )  
							
							 
							
							... 
							
							
							
							* fix flash attention
* meet code review
* fix 
							
						 
						
							2024-01-05 13:49:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								f919f5792a 
								
							 
						 
						
							
							
								
								fix kv cache out of bound ( #9827 )  
							
							 
							
							
							
						 
						
							2024-01-05 12:38:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								5df31db773 
								
							 
						 
						
							
							
								
								LLM: fix accuracy issue of chatglm3 ( #9830 )  
							
							 
							
							... 
							
							
							
							* add attn mask for first token
* fix
* fix
* change attn calculation
* fix
* fix
* fix style
* fix style 
							
						 
						
							2024-01-05 10:52:05 +08:00