binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2958ca49c0 
								
							 
						 
						
							
							
								
								LLM: add patching function for llm finetuning ( #10247 )  
							
							 
							
							
							
						 
						
							2024-03-21 16:01:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								5b97fdb87b 
								
							 
						 
						
							
							
								
								update deepseek example readme ( #10420 )  
							
							 
							
							... 
							
							
							
							* update readme
* update
* update readme 
							
						 
						
							2024-03-21 15:21:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								a5f35757a4 
								
							 
						 
						
							
							
								
								Migrate langchain rag cpu example to gpu ( #10450 )  
							
							 
							
							... 
							
							
							
							* add langchain rag on gpu
* add rag example in readme
* add trust_remote_code in TransformersEmbeddings.from_model_id
* add trust_remote_code in TransformersEmbeddings.from_model_id in cpu 
							
						 
						
							2024-03-21 15:20:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								85ef3f1d99 
								
							 
						 
						
							
							
								
								LLM: add empty cache in deepspeed autotp benchmark script ( #10488 )  
							
							 
							
							
							
						 
						
							2024-03-21 10:51:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								5a5fd5af5b 
								
							 
						 
						
							
							
								
								LLM: Add speculative benchmark on CPU/XPU ( #10464 )  
							
							 
							
							... 
							
							
							
							Add speculative benchmark on CPU/XPU. 
							
						 
						
							2024-03-21 09:51:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								28c315a5b9 
								
							 
						 
						
							
							
								
								LLM: fix deepspeed error of finetuning on xpu ( #10484 )  
							
							 
							
							
							
						 
						
							2024-03-21 09:46:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								021d77fd22 
								
							 
						 
						
							
							
								
								Remove softmax upcast fp32 in llama ( #10481 )  
							
							 
							
							... 
							
							
							
							* update
* fix style 
							
						 
						
							2024-03-20 18:17:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cfdf8ad496 
								
							 
						 
						
							
							
								
								Fix modules_not_to_convert argument ( #10483 )  
							
							 
							
							
							
						 
						
							2024-03-20 17:47:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								cbe24cc7e6 
								
							 
						 
						
							
							
								
								LLM: Enable BigDL IPEX Int8 ( #10480 )  
							
							 
							
							... 
							
							
							
							Enable BigDL IPEX Int8 
							
						 
						
							2024-03-20 15:59:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								1d062e24db 
								
							 
						 
						
							
							
								
								Update serving doc ( #10475 )  
							
							 
							
							... 
							
							
							
							* update serving doc
* add tob
* update
* update
* update
* update vllm worker 
							
						 
						
							2024-03-20 14:44:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4581e4f17f 
								
							 
						 
						
							
							
								
								LLM: fix whiper model missing config. ( #10473 )  
							
							 
							
							... 
							
							
							
							* fix whiper model missing config.
* fix style.
* fix style.
* style. 
							
						 
						
							2024-03-20 14:22:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e41d556436 
								
							 
						 
						
							
							
								
								LLM: change fp16 benchmark to model.half ( #10477 )  
							
							 
							
							... 
							
							
							
							* LLM: change fp16 benchmark to model.half
* fix 
							
						 
						
							2024-03-20 13:38:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								749bedaf1e 
								
							 
						 
						
							
							
								
								fix rwkv v5 fp16 ( #10474 )  
							
							 
							
							
							
						 
						
							2024-03-20 13:15:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								72bcc27da9 
								
							 
						 
						
							
							
								
								[LLM] Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings ( #10459 )  
							
							 
							
							... 
							
							
							
							* Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings
* Small fixes 
							
						 
						
							2024-03-19 18:04:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								463a86cd5d 
								
							 
						 
						
							
							
								
								LLM: fix qwen-vl interpolation gpu abnormal results. ( #10457 )  
							
							 
							
							... 
							
							
							
							* fix qwen-vl interpolation gpu abnormal results.
* fix style.
* update qwen-vl gpu example.
* fix comment and update example.
* fix style. 
							
						 
						
							2024-03-19 16:59:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e9055c32f9 
								
							 
						 
						
							
							
								
								LLM: fix fp16 mem record in benchmark ( #10461 )  
							
							 
							
							... 
							
							
							
							* LLM: fix fp16 mem record in benchmark
* change style 
							
						 
						
							2024-03-19 16:17:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f3fefdc9ce 
								
							 
						 
						
							
							
								
								fix pad_token_id issue ( #10425 )  
							
							 
							
							
							
						 
						
							2024-03-18 23:30:28 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								74e7490fda 
								
							 
						 
						
							
							
								
								Fix Baichuan2 prompt format ( #10334 )  
							
							 
							
							... 
							
							
							
							* Fix Baichuan2 prompt format
* Fix Baichuan2 README
* Change baichuan2 prompt info
* Change baichuan2 prompt info 
							
						 
						
							2024-03-19 12:48:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0451103a43 
								
							 
						 
						
							
							
								
								LLM: add int4+fp16 benchmark script for windows benchmarking ( #10449 )  
							
							 
							
							... 
							
							
							
							* LLM: add fp16 for benchmark script
* remove transformer_int4_fp16_loadlowbit_gpu_win 
							
						 
						
							2024-03-19 11:11:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								bbd749dceb 
								
							 
						 
						
							
							
								
								qwen2 fp8 cache ( #10446 )  
							
							 
							
							... 
							
							
							
							* qwen2 fp8 cache
* fix style check 
							
						 
						
							2024-03-19 08:32:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								9e763b049c 
								
							 
						 
						
							
							
								
								Support running pipeline parallel inference by vertically partitioning model to different devices ( #10392 )  
							
							 
							
							... 
							
							
							
							* support pipeline parallel inference
* fix logging
* remove benchmark file
* fic
* need to warmup twice
* support qwen and qwen2
* fix lint
* remove genxir
* refine 
							
						 
						
							2024-03-18 13:04:45 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								66b4bb5c5d 
								
							 
						 
						
							
							
								
								LLM: update setup to provide cpp for windows ( #10448 )  
							
							 
							
							
							
						 
						
							2024-03-18 18:20:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								dbdeaddd6a 
								
							 
						 
						
							
							
								
								LLM: Fix log condition for BIGDL_OPT_IPEX ( #10441 )  
							
							 
							
							... 
							
							
							
							remove log for BIGDL_OPT_IPEX 
							
						 
						
							2024-03-18 16:03:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								1de13ea578 
								
							 
						 
						
							
							
								
								LLM: remove CPU english_quotes dataset and update docker example ( #10399 )  
							
							 
							
							... 
							
							
							
							* update dataset
* update readme
* update docker cpu
* update xpu docker 
							
						 
						
							2024-03-18 10:45:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								399843faf0 
								
							 
						 
						
							
							
								
								Baichuan 7b fp16 sdp and qwen2 pvc sdp ( #10435 )  
							
							 
							
							... 
							
							
							
							* add baichuan sdp
* update
* baichuan2
* fix
* fix style
* revert 13b
* revert 
							
						 
						
							2024-03-18 10:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								5ab52ef5b5 
								
							 
						 
						
							
							
								
								update ( #10424 )  
							
							 
							
							
							
						 
						
							2024-03-15 09:24:26 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bd64488b2a 
								
							 
						 
						
							
							
								
								add mask support for llama/chatglm fp8 sdp ( #10433 )  
							
							 
							
							... 
							
							
							
							* add mask support for fp8 sdp
* fix chatglm2 dtype
* update 
							
						 
						
							2024-03-15 17:36:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								444b11af22 
								
							 
						 
						
							
							
								
								Add LangChain upstream ut test for ipynb ( #10387 )  
							
							 
							
							... 
							
							
							
							* Add LangChain upstream ut test for ipynb
* Integrate unit test for LangChain upstream ut and ipynb into one file
* Modify file name
* Remove LangChain version update in unit test
* Move Langchain upstream ut job to arc
* Modify path in .yml file
* Modify path in llm_unit_tests.yml
* Avoid create directory repeatedly 
							
						 
						
							2024-03-15 16:31:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								ca372f6dab 
								
							 
						 
						
							
							
								
								LLM: add save/load example for ModelScope ( #10397 )  
							
							 
							
							... 
							
							
							
							* LLM: add sl example for modelscope
* fix according to comments
* move file 
							
						 
						
							2024-03-15 15:17:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								24473e331a 
								
							 
						 
						
							
							
								
								Qwen2 fp16 sdp ( #10427 )  
							
							 
							
							... 
							
							
							
							* qwen2 sdp and refine
* update
* update
* fix style
* remove use_flash_attention 
							
						 
						
							2024-03-15 13:12:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								1315150e64 
								
							 
						 
						
							
							
								
								Add baichuan2-13b 1k to arc nightly perf ( #10406 )  
							
							 
							
							
							
						 
						
							2024-03-15 10:29:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b036205be2 
								
							 
						 
						
							
							
								
								LLM: add fp8 sdp for chatglm2/3 ( #10411 )  
							
							 
							
							... 
							
							
							
							* add fp8 sdp for chatglm2
* fix style 
							
						 
						
							2024-03-15 09:38:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								fe8976a00f 
								
							 
						 
						
							
							
								
								LLM: Support gguf models use low_bit and fix no json( #10408 )  
							
							 
							
							... 
							
							
							
							* support others model use low_bit
* update readme
* update to add *.json 
							
						 
						
							2024-03-15 09:34:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								cda38f85a9 
								
							 
						 
						
							
							
								
								Qwen fp16 sdp ( #10401 )  
							
							 
							
							... 
							
							
							
							* qwen sdp
* fix
* update
* update
* update sdp
* update
* fix style check
* add to origin type 
							
						 
						
							2024-03-15 08:51:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								1c0f7ed3fa 
								
							 
						 
						
							
							
								
								add xpu support ( #10419 )  
							
							 
							
							
							
						 
						
							2024-03-14 17:13:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								7d29765092 
								
							 
						 
						
							
							
								
								refactor qwen2 forward to enable XPU ( #10409 )  
							
							 
							
							... 
							
							
							
							* refactor awen2 forward to enable XPU
* Update qwen2.py 
							
						 
						
							2024-03-14 11:03:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								f36224aac4 
								
							 
						 
						
							
							
								
								Fix ceval run.sh ( #10410 )  
							
							 
							
							
							
						 
						
							2024-03-14 10:57:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								f66329e35d 
								
							 
						 
						
							
							
								
								Fix multiple get_enable_ipex function error ( #10400 )  
							
							 
							
							... 
							
							
							
							* fix multiple get_enable_ipex function error
* remove get_enable_ipex_low_bit function 
							
						 
						
							2024-03-14 10:14:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								76e30d8ec8 
								
							 
						 
						
							
							
								
								Empty cache for lm_head ( #10317 )  
							
							 
							
							... 
							
							
							
							* empty cache
* add comments 
							
						 
						
							2024-03-13 20:31:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								2be8bbd236 
								
							 
						 
						
							
							
								
								LLM: add cpp option in setup.py ( #10403 )  
							
							 
							
							... 
							
							
							
							* add llama_cpp option
* meet code review 
							
						 
						
							2024-03-13 20:12:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								0dbce53464 
								
							 
						 
						
							
							
								
								LLM: Add decoder/layernorm unit tests ( #10211 )  
							
							 
							
							... 
							
							
							
							* add decoder/layernorm unit tests
* update tests
* delete decoder tests
* address comments
* remove none type check
* restore nonetype checks
* delete nonetype checks; add decoder tests for Llama
* add gc
* deal with tuple output 
							
						 
						
							2024-03-13 19:41:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								06a851afa9 
								
							 
						 
						
							
							
								
								support new baichuan model ( #10404 )  
							
							 
							
							
							
						 
						
							2024-03-13 17:45:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								a90e9b6ec2 
								
							 
						 
						
							
							
								
								Fix C-Eval Workflow ( #10359 )  
							
							 
							
							... 
							
							
							
							* Fix Baichuan2 prompt format
* Fix ceval workflow errors
* Fix ceval workflow error
* Fix ceval error
* Fix ceval error
* Test ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Fix ceval
* Add ceval dependency test
* Fix ceval
* Fix ceval
* Test full ceval
* Test full ceval
* Fix ceval
* Fix ceval 
							
						 
						
							2024-03-13 17:23:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b268baafd6 
								
							 
						 
						
							
							
								
								use fp8 sdp in llama ( #10396 )  
							
							 
							
							
							
						 
						
							2024-03-13 16:45:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								60043a3ae8 
								
							 
						 
						
							
							
								
								LLM: Support Baichuan2-13b in BigDL-vLLM ( #10398 )  
							
							 
							
							... 
							
							
							
							Support Baichuan2-13b in BigDL-vLLM. 
							
						 
						
							2024-03-13 16:21:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								e10de2c42d 
								
							 
						 
						
							
							
								
								[Fix] LLM: Fix condition check error for speculative decoding on CPU ( #10402 )  
							
							 
							
							... 
							
							
							
							Fix condition check error for speculative decoding on CPU 
							
						 
						
							2024-03-13 16:05:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f158b49835 
								
							 
						 
						
							
							
								
								[LLM] Recover arc ut test for Falcon ( #10385 )  
							
							 
							
							
							
						 
						
							2024-03-13 13:31:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								d72c0fad0d 
								
							 
						 
						
							
							
								
								Qwen2 SDPA forward on CPU ( #10395 )  
							
							 
							
							... 
							
							
							
							* Fix Qwen1.5 CPU forward
* Update convert.py
* Update qwen2.py 
							
						 
						
							2024-03-13 13:10:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ca58a69b97 
								
							 
						 
						
							
							
								
								fix arc rms norm UT ( #10394 )  
							
							 
							
							
							
						 
						
							2024-03-13 13:09:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								0193f29411 
								
							 
						 
						
							
							
								
								LLM : Enable  gguf float16 and Yuan2 model ( #10372 )  
							
							 
							
							... 
							
							
							
							* enable float16
* add yun files
* enable yun
* enable set low_bit on yuan2
* update
* update license
* update generate
* update readme
* update python style
* update 
							
						 
						
							2024-03-13 10:19:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								f5d65203c0 
								
							 
						 
						
							
							
								
								First token lm_head optimization ( #10318 )  
							
							 
							
							... 
							
							
							
							* add lm head linear
* update
* address comments and fix style
* address comment 
							
						 
						
							2024-03-13 10:11:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								7cf01e6ec8 
								
							 
						 
						
							
							
								
								Add LangChain upstream ut test ( #10349 )  
							
							 
							
							... 
							
							
							
							* Add LangChain upstream ut test
* Add LangChain upstream ut test
* Specify version numbers in yml script
* Correct langchain-community version 
							
						 
						
							2024-03-13 09:52:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								28c4a8cf5c 
								
							 
						 
						
							
							
								
								Qwen fused qkv ( #10368 )  
							
							 
							
							... 
							
							
							
							* fused qkv + rope for qwen
* quantized kv cache
* fix
* update qwen
* fixed quantized qkv
* fix
* meet code review
* update split
* convert.py
* extend when no enough kv
* fix 
							
						 
						
							2024-03-12 17:39:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								741c2bf1df 
								
							 
						 
						
							
							
								
								use new rms norm ( #10384 )  
							
							 
							
							
							
						 
						
							2024-03-12 17:29:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								0ded0b4b13 
								
							 
						 
						
							
							
								
								LLM: Enable BigDL IPEX optimization for int4 ( #10319 )  
							
							 
							
							... 
							
							
							
							Enable BigDL IPEX optimization for int4 
							
						 
						
							2024-03-12 17:08:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								5d7e044dbc 
								
							 
						 
						
							
							
								
								LLM: add low bit option in deepspeed autotp example ( #10382 )  
							
							 
							
							
							
						 
						
							2024-03-12 17:07:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								df3bcc0e65 
								
							 
						 
						
							
							
								
								LLM: remove english_quotes dataset ( #10370 )  
							
							 
							
							
							
						 
						
							2024-03-12 16:57:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								df2b84f7de 
								
							 
						 
						
							
							
								
								Enable kv cache on arc batch ( #10308 )  
							
							 
							
							
							
						 
						
							2024-03-12 16:46:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Lilac09 
								
							 
						 
						
							
							
							
							
								
							
							
								5809a3f5fe 
								
							 
						 
						
							
							
								
								Add run-hbm.sh & add user guide for spr and hbm ( #10357 )  
							
							 
							
							... 
							
							
							
							* add run-hbm.sh
* add spr and hbm guide
* only support quad mode
* only support quad mode
* update special cases
* update special cases 
							
						 
						
							2024-03-12 16:15:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								5d996a5caf 
								
							 
						 
						
							
							
								
								LLM: add benchmark script for deepspeed autotp on gpu ( #10380 )  
							
							 
							
							
							
						 
						
							2024-03-12 15:19:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f9c144dc4c 
								
							 
						 
						
							
							
								
								Fix final logits ut failure ( #10377 )  
							
							 
							
							... 
							
							
							
							* Fix final logits ut failure
* Fix final logits ut failure
* Remove Falcon from completion test for now
* Remove Falcon from unit test for now 
							
						 
						
							2024-03-12 14:34:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								cc4148636d 
								
							 
						 
						
							
							
								
								[FastChat-integration] Add initial implementation for loader ( #10323 )  
							
							 
							
							... 
							
							
							
							* add initial implementation for loader
* add test method for model_loader
* data
* Refine 
							
						 
						
							2024-03-12 10:54:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								17bdb1a60b 
								
							 
						 
						
							
							
								
								LLM: add whisper models into nightly test ( #10193 )  
							
							 
							
							... 
							
							
							
							* LLM: add whisper models into nightly test
* small fix
* small fix
* add more whisper models
* test all cases
* test specific cases
* collect the csv
* store the resut
* to html
* small fix
* small test
* test all cases
* modify whisper_csv_to_html 
							
						 
						
							2024-03-11 20:00:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								dbcfc5c2fa 
								
							 
						 
						
							
							
								
								LLM: fix error of 'AI-ModelScope/phi-2' hosted by ModelScope hub ( #10364 )  
							
							 
							
							
							
						 
						
							2024-03-11 16:19:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								fe27a6971c 
								
							 
						 
						
							
							
								
								LLM: update modelscope version ( #10367 )  
							
							 
							
							
							
						 
						
							2024-03-11 16:18:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								a425eaabfc 
								
							 
						 
						
							
							
								
								fix from_pretrained when device_map=None ( #10361 )  
							
							 
							
							... 
							
							
							
							* pr trigger
* fix error when device_map=None
* fix device_map=None 
							
						 
						
							2024-03-11 16:06:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								d7b765fd3f 
								
							 
						 
						
							
							
								
								serving xpu memory opt ( #10358 )  
							
							 
							
							
							
						 
						
							2024-03-11 15:21:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								be29833b2b 
								
							 
						 
						
							
							
								
								LLM: fix qwen2 ( #10356 )  
							
							 
							
							
							
						 
						
							2024-03-11 09:29:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								9026c08633 
								
							 
						 
						
							
							
								
								Fix llamaindex AutoTokenizer bug ( #10345 )  
							
							 
							
							... 
							
							
							
							* fix tokenizer
* fix AutoTokenizer bug
* modify code style 
							
						 
						
							2024-03-08 16:24:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								2a10b53d73 
								
							 
						 
						
							
							
								
								rename docqa.py->rag.py ( #10353 )  
							
							 
							
							
							
						 
						
							2024-03-08 16:07:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f1825d7408 
								
							 
						 
						
							
							
								
								Add RMSNorm unit test ( #10190 )  
							
							 
							
							
							
						 
						
							2024-03-08 15:51:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								370c52090c 
								
							 
						 
						
							
							
								
								Langchain readme ( #10348 )  
							
							 
							
							... 
							
							
							
							* update langchain readme
* update readme
* create new README
* Update README_nativeint4.md 
							
						 
						
							2024-03-08 14:57:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								7a621a4db0 
								
							 
						 
						
							
							
								
								Fix device_map bug by raise an error when using device_map=xpu ( #10340 )  
							
							 
							
							... 
							
							
							
							* Fix device_map bug by raise an error when using device_map=xpu
* Fix sync error
* Fix python style
* Use invalidInputError instead of invalidOperationError 
							
						 
						
							2024-03-08 13:38:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1ac193ba02 
								
							 
						 
						
							
							
								
								add rope theta argument ( #10343 )  
							
							 
							
							
							
						 
						
							2024-03-07 17:27:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								0c8d3c9830 
								
							 
						 
						
							
							
								
								Add C-Eval HTML report ( #10294 )  
							
							 
							
							... 
							
							
							
							* Add C-Eval HTML report
* Fix C-Eval workflow pr trigger path
* Fix C-Eval workflow typos
* Add permissions to C-Eval workflow
* Fix C-Eval workflow typo
* Add pandas dependency
* Fix C-Eval workflow typo 
							
						 
						
							2024-03-07 16:44:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								496d18ab6d 
								
							 
						 
						
							
							
								
								LLM: add quantize kv cache support for baichuan 7b and 13b. ( #10330 )  
							
							 
							
							... 
							
							
							
							* add quantize kv cache for baichuan 7b and 13b.
* fix typo.
* fix.
* fix style.
* fix style. 
							
						 
						
							2024-03-07 16:17:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								b7db21414e 
								
							 
						 
						
							
							
								
								Update llamaindex ut ( #10338 )  
							
							 
							
							... 
							
							
							
							* add test_llamaindex of gpu
* add llamaindex gpu tests bash
* add llamaindex cpu tests bash
* update name of Run LLM langchain GPU test
* import llama_index in llamaindex gpu ut
* update the dependency of test_llamaindex
* add Run LLM llamaindex GPU test
* modify import dependency of llamaindex cpu test
* add Run LLM llamaindex test
* update llama_model_path
* delete unused model path
* add LLAMA2_7B_ORIGIN_PATH in llamaindex cpu test 
							
						 
						
							2024-03-07 10:06:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								267de7abc3 
								
							 
						 
						
							
							
								
								fix fschat DEP version error ( #10325 )  
							
							 
							
							
							
						 
						
							2024-03-06 16:15:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								9ea499ca68 
								
							 
						 
						
							
							
								
								Optimize speculative decoding PVC memory usage ( #10329 )  
							
							 
							
							... 
							
							
							
							* optimize memory
* update
* update
* update
* support other models
* update
* fix style 
							
						 
						
							2024-03-06 09:54:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								cc796848ea 
								
							 
						 
						
							
							
								
								fix typos ( #10274 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 18:38:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								af11c53473 
								
							 
						 
						
							
							
								
								Add the installation step of postgresql and pgvector on windows in LlamaIndex GPU  support ( #10328 )  
							
							 
							
							... 
							
							
							
							* add the installation of postgresql and pgvector of windows
* fix some format 
							
						 
						
							2024-03-05 18:31:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								0011ff9f64 
								
							 
						 
						
							
							
								
								optimize bge large performance ( #10324 )  
							
							 
							
							
							
						 
						
							2024-03-05 17:06:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								178eea5009 
								
							 
						 
						
							
							
								
								upload bigdl-llm wheel to sourceforge for backup ( #10321 )  
							
							 
							
							... 
							
							
							
							* test: upload to sourceforge
* update scripts
* revert 
							
						 
						
							2024-03-05 16:36:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								30d009bca7 
								
							 
						 
						
							
							
								
								LLM: support quantized kv cache for Mistral in transformers >=4.36.0 ( #10326 )  
							
							 
							
							... 
							
							
							
							* support quantize kv for mistral in transformers 4.36
* update mistral support.
* fix style. 
							
						 
						
							2024-03-05 16:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								1e6f0c6f1a 
								
							 
						 
						
							
							
								
								Add llamaindex gpu example ( #10314 )  
							
							 
							
							... 
							
							
							
							* add llamaindex example
* fix core dump
* refine readme
* add trouble shooting
* refine readme
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:36:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								fc7f10cd12 
								
							 
						 
						
							
							
								
								add langchain gpu example ( #10277 )  
							
							 
							
							... 
							
							
							
							* first draft
* fix
* add readme for transformer_int4_gpu
* fix doc
* check device_map
* add arc ut test
* fix ut test
* fix langchain ut
* Refine README
* fix gpu mem too high
* fix ut test
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:33:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								5dbbe1a826 
								
							 
						 
						
							
							
								
								[LLM] Support for new arc ut runner ( #10311 )  
							
							 
							
							... 
							
							
							
							* Support for new arc ut runner
* Comment unnecessary OMP_NUM_THREADS related settings for arc uts 
							
						 
						
							2024-03-04 18:42:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								d45e577d8c 
								
							 
						 
						
							
							
								
								[LLM] Test load_low_bit in iGPU perf test on Windows ( #10313 )  
							
							 
							
							
							
						 
						
							2024-03-04 18:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								fd81d66047 
								
							 
						 
						
							
							
								
								LLM: Compress some models to save space ( #10315 )  
							
							 
							
							... 
							
							
							
							* LLM: compress some models to save space
* add deleted comments 
							
						 
						
							2024-03-04 17:53:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								bab2ee5f9e 
								
							 
						 
						
							
							
								
								update nightly spr perf test ( #10178 )  
							
							 
							
							... 
							
							
							
							* update nightly spr perf test
* update
* update runner lable
* update
* update
* update folder
* revert 
							
						 
						
							2024-03-04 13:46:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ab9fc2485f 
								
							 
						 
						
							
							
								
								LLM: add quantize kv support for llama transformer 4.36 ( #10298 )  
							
							 
							
							... 
							
							
							
							* add quantize kv support for llama transformer 4.36
* fix style.
* fix style. 
							
						 
						
							2024-03-04 10:33:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								58208a5883 
								
							 
						 
						
							
							
								
								Update FAQ document. ( #10300 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md 
							
						 
						
							2024-03-04 08:35:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								27d9a14989 
								
							 
						 
						
							
							
								
								[LLM] all-on-one update: memory optimize and streaming output ( #10302 )  
							
							 
							
							... 
							
							
							
							* Memory saving for continous in-out pair run and add support for streaming output on MTL iGPU
* Small fix
* Small fix
* Add things back 
							
						 
						
							2024-03-01 18:02:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0ab40917fb 
								
							 
						 
						
							
							
								
								[LLM] Split merged_qk to separated q/k linear ( #10299 )  
							
							 
							
							... 
							
							
							
							* modify merge_qk_linear to separated q/k linear
* update 
							
						 
						
							2024-03-01 16:48:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f4d7dbcde2 
								
							 
						 
						
							
							
								
								use fused qkv forward in qwen2 ( #10185 )  
							
							 
							
							... 
							
							
							
							* use fused qkv forward in qwen2
* support both
* fix style
* fix rope
* remove pring
* fix style
* clean up 
							
						 
						
							2024-03-01 16:46:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								509e206de0 
								
							 
						 
						
							
							
								
								update doc about gemma random and unreadable output. ( #10297 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update README.md
* Update README.md 
							
						 
						
							2024-03-01 15:41:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								beb9433cec 
								
							 
						 
						
							
							
								
								LLM: Reduce speculative _ipex_optimize_model memory use ( #10281 )  
							
							 
							
							... 
							
							
							
							* use tpp
* update ipex 
							
						 
						
							2024-03-01 13:48:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								f0ff0eebe1 
								
							 
						 
						
							
							
								
								[LLM] Support quantize kv cache for Baichuan2 7B ( #10280 )  
							
							 
							
							... 
							
							
							
							* Add quatized kv cache framework for Baichuan2 7B
* Support quantize kv cache for baichuan2
* Small fix
* Fix python style 
							
						 
						
							2024-03-01 13:35:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								273de341d7 
								
							 
						 
						
							
							
								
								hot-fix silu error import ( #10292 )  
							
							 
							
							
							
						 
						
							2024-03-01 10:11:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								bcfad555df 
								
							 
						 
						
							
							
								
								revise llamaindex readme ( #10283 )  
							
							 
							
							
							
						 
						
							2024-02-29 17:19:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								232273a1b5 
								
							 
						 
						
							
							
								
								Enable Gemma fused mlp + Gelu ( #10276 )  
							
							 
							
							... 
							
							
							
							* update llama mlp forward
* add all
* fix style check
* split
* update
* update
* update
* fix style 
							
						 
						
							2024-02-29 16:53:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								2d930bdca8 
								
							 
						 
						
							
							
								
								Add vLLM bf16 support ( #10278 )  
							
							 
							
							... 
							
							
							
							* add argument load_in_low_bit
* add docs
* modify gpu doc
* done
---------
Co-authored-by: ivy-lv11 <lvzc@lamda.nju.edu.cn> 
							
						 
						
							2024-02-29 16:33:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								13b0bc9075 
								
							 
						 
						
							
							
								
								[LLM] Add quantize_kv optimization for yuan2 model ( #10243 )  
							
							 
							
							... 
							
							
							
							* add initial quantize_kv support for yuan2 model
* fix yuan2 quantize_kv generation
* apply fp16 conv layer optimizations
* disable mlp for quantize_kv 
							
						 
						
							2024-02-29 16:33:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								4e6cc424f1 
								
							 
						 
						
							
							
								
								Add LlamaIndex RAG ( #10263 )  
							
							 
							
							... 
							
							
							
							* run demo
* format code
* add llamaindex
* add custom LLM with bigdl
* update
* add readme
* begin ut
* add unit test
* add license
* add license
* revised
* update
* modify docs
* remove data folder
* update
* modify prompt
* fixed
* fixed
* fixed 
							
						 
						
							2024-02-29 15:21:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								5d7243067c 
								
							 
						 
						
							
							
								
								LLM: add Baichuan2-13B-Chat 2048-256 to MTL perf ( #10273 )  
							
							 
							
							
							
						 
						
							2024-02-29 13:48:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a9fd20b6ba 
								
							 
						 
						
							
							
								
								LLM: Update qkv fusion for GGUF-IQ2 ( #10271 )  
							
							 
							
							... 
							
							
							
							* first commit
* update mistral
* fix transformers==4.36.0
* fix
* disable qk for mixtral now
* fix style 
							
						 
						
							2024-02-29 12:49:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6fb65bb9d2 
								
							 
						 
						
							
							
								
								fix in transformers 4.36 ( #10150 )  
							
							 
							
							
							
						 
						
							2024-02-28 18:43:01 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								43dac97e03 
								
							 
						 
						
							
							
								
								Update README.md ( #10260 )  
							
							 
							
							
							
						 
						
							2024-02-29 10:41:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4b08bc1417 
								
							 
						 
						
							
							
								
								LLM: relax batch check of flash atttention by double check attention mask ( #10270 )  
							
							 
							
							... 
							
							
							
							* relax batch check
* fix
* fix style 
							
						 
						
							2024-02-29 09:39:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								07f36fbfcc 
								
							 
						 
						
							
							
								
								Fix gptj failed to extend ( #10269 )  
							
							 
							
							
							
						 
						
							2024-02-29 09:39:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cccb02dad1 
								
							 
						 
						
							
							
								
								fix baichuan2 13b 2k input ( #10267 )  
							
							 
							
							
							
						 
						
							2024-02-28 17:20:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								7244fd1ba5 
								
							 
						 
						
							
							
								
								Fix Arc StarCoder wrong query_shape when input is long ( #10268 )  
							
							 
							
							... 
							
							
							
							* Fix Arc StarCoder wrong query_shape when input is long
* Update gptbigcode.py 
							
						 
						
							2024-02-28 17:07:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								a4de3095f3 
								
							 
						 
						
							
							
								
								LLM: Support quantize kv cache in mistral. ( #10261 )  
							
							 
							
							... 
							
							
							
							* init
* update quantize kv. 
							
						 
						
							2024-02-28 14:08:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								db0d129226 
								
							 
						 
						
							
							
								
								Revert "Add rwkv example ( #9432 )" ( #10264 )  
							
							 
							
							... 
							
							
							
							This reverts commit 6930422b42 . 
							
						 
						
							2024-02-28 11:48:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yining Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6930422b42 
								
							 
						 
						
							
							
								
								Add rwkv example ( #9432 )  
							
							 
							
							... 
							
							
							
							* codeshell fix wrong urls
* restart runner
* add RWKV CPU & GPU example (rwkv-4-world-7b)
* restart runner
* update submodule
* fix runner
* runner-test
---------
Co-authored-by: Shengsheng Huang <shengsheng.huang@intel.com> 
							
						 
						
							2024-02-28 11:41:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								59861f73e5 
								
							 
						 
						
							
							
								
								Add Deepseek-6.7B ( #9991 )  
							
							 
							
							... 
							
							
							
							* Add new example Deepseek
* Add new example Deepseek
* Add new example Deepseek
* Add new example Deepseek
* Add new example Deepseek
* modify deepseek
* modify deepseek
* Add verified model in README
* Turn cpu_embedding=True in Deepseek example
---------
Co-authored-by: Shengsheng Huang <shengsheng.huang@intel.com> 
							
						 
						
							2024-02-28 11:36:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								2524273198 
								
							 
						 
						
							
							
								
								Update AutoGen README ( #10255 )  
							
							 
							
							... 
							
							
							
							* Update AutoGen README
* Fix AutoGen README typos
* Update AutoGen README
* Update AutoGen README 
							
						 
						
							2024-02-28 11:34:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								2347f611cf 
								
							 
						 
						
							
							
								
								Add cpu and gpu examples of Mamba ( #9797 )  
							
							 
							
							... 
							
							
							
							* Add mamba cpu example
* Add mamba gpu example
* Use a smaller model as the example
* minor fixes
---------
Co-authored-by: Shengsheng Huang <shengsheng.huang@intel.com> 
							
						 
						
							2024-02-28 11:33:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								937e1f7c74 
								
							 
						 
						
							
							
								
								rebase ( #9104 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2024-02-28 11:18:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									JunX 
								
							 
						 
						
							
							
							
							
								
							
							
								4833067489 
								
							 
						 
						
							
							
								
								fix GPU example link in README.md ( #9533 )  
							
							 
							
							... 
							
							
							
							* fix GPU example link in README.md
* fix GPU links in llm README.md 
							
						 
						
							2024-02-28 11:13:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								308e637d0d 
								
							 
						 
						
							
							
								
								Add DeepSeek-MoE-16B-Chat ( #10155 )  
							
							 
							
							... 
							
							
							
							* dsmoe-hf add
* add dsmoe pytorch
* update README
* modify comment
* remove GPU example
* update model name
* format code 
							
						 
						
							2024-02-28 10:12:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
							
							
								
							
							
								f4a2e32106 
								
							 
						 
						
							
							
								
								Stream llm example for both GPU and CPU ( #9390 )  
							
							 
							
							
							
						 
						
							2024-02-27 15:54:47 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c581c6db30 
								
							 
						 
						
							
							
								
								draft mmint4 ( #10031 )  
							
							 
							
							... 
							
							
							
							change to llm.cpp
support transposed format
revert
implement qkv fuse
fix style
change to vertically pack
change to enable_xetla
fix mlp_fusion_check
remove comments
address comments
add some comments
fix style 
							
						 
						
							2024-02-27 14:55:16 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								cba61a2909 
								
							 
						 
						
							
							
								
								Add html report of ppl ( #10218 )  
							
							 
							
							... 
							
							
							
							* remove include and language option, select the corresponding dataset based on the model name in Run
* change the nightly test time
* change the nightly test time of harness and ppl
* save the ppl result to json file
* generate csv file and print table result
* generate html
* modify the way to get parent folder
* update html in parent folder
* add llm-ppl-summary and llm-ppl-summary-html
* modify echo single result
* remove download fp16.csv
* change model name of PR
* move ppl nightly related files to llm/test folder
* reformat
* seperate make_table from make_table_and_csv.py
* separate make_csv from make_table_and_csv.py
* update llm-ppl-html
* remove comment
* add Download fp16.results 
							
						 
						
							2024-02-27 17:37:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								6d60982746 
								
							 
						 
						
							
							
								
								Env script: add license ( #10257 )  
							
							 
							
							... 
							
							
							
							* env script
* update README.md
* modify README
* modify cpu info output
* add env-check.sh
* add env-check.bat
* add windows
* modify bat
* add license 
							
						 
						
							2024-02-27 15:29:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b4fa4ab46f 
								
							 
						 
						
							
							
								
								optimize yuan 2.0 again ( #10252 )  
							
							 
							
							
							
						 
						
							2024-02-27 14:51:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								03b9c4930a 
								
							 
						 
						
							
							
								
								UX: Script to print env info ( #10088 )  
							
							 
							
							... 
							
							
							
							* env script
* update README.md
* modify README
* modify cpu info output
* add env-check.sh
* add env-check.bat
* add windows
* modify bat 
							
						 
						
							2024-02-27 14:45:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								843fe546b0 
								
							 
						 
						
							
							
								
								Add CPU and GPU examples for DeciLM-7B ( #9867 )  
							
							 
							
							... 
							
							
							
							* Add cpu and gpu examples for DeciLM-7B
* Add cpu and gpu examples for DeciLM-7B
* Add DeciLM-7B to README table
* modify deciLM
* modify deciLM
* modify deciLM
* Add verified model in README
* Add cpu_embedding=True 
							
						 
						
							2024-02-27 13:15:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								38ae4b372f 
								
							 
						 
						
							
							
								
								Add yuan2-2b to win igpu perf test ( #10250 )  
							
							 
							
							
							
						 
						
							2024-02-27 11:08:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								36a9e88104 
								
							 
						 
						
							
							
								
								Speculative Starcoder on CPU ( #10138 )  
							
							 
							
							... 
							
							
							
							* Speculative Starcoder on CPU
* enable kv-cache pre-allocation
* refine codes
* refine
* fix style
* fix style
* fix style
* refine
* refine
* Update speculative.py
* Update gptbigcode.py
* fix style
* Update speculative.py
* enable mixed-datatype layernorm on top of torch API
* adaptive dtype
* Update README.md 
							
						 
						
							2024-02-27 09:57:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a47989c860 
								
							 
						 
						
							
							
								
								optimize yuan 2.0 performance ( #10244 )  
							
							 
							
							
							
						 
						
							2024-02-26 17:20:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								6c74b99a28 
								
							 
						 
						
							
							
								
								LLM: Update qwen readme ( #10245 )  
							
							 
							
							
							
						 
						
							2024-02-26 17:03:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								15ad2fd72e 
								
							 
						 
						
							
							
								
								Merge pull request  #10226  from zhentaocc/fix_harness  
							
							 
							
							... 
							
							
							
							Fix harness 
							
						 
						
							2024-02-26 16:49:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								f9b75f900b 
								
							 
						 
						
							
							
								
								LLM: Enable qwen target_model ipex ( #10232 )  
							
							 
							
							... 
							
							
							
							* change order
* enable qwen ipex
* update qwen example
* update
* fix style
* update 
							
						 
						
							2024-02-26 16:41:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								3e6d188553 
								
							 
						 
						
							
							
								
								LLM: add baichuan2-13b to mtl perf ( #10238 )  
							
							 
							
							
							
						 
						
							2024-02-26 15:55:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								e38e29511c 
								
							 
						 
						
							
							
								
								[LLM] Yuan2 MLP and Rotary optimization ( #10231 )  
							
							 
							
							... 
							
							
							
							* Add optimization for rotary embedding
* Add mlp fused optimizatgion
* Python style fix
* Fix rotary embedding due to logits difference
* Small fix 
							
						 
						
							2024-02-26 15:10:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ea23afc8ec 
								
							 
						 
						
							
							
								
								[LLM]update ipex part in mistral example readme ( #10239 )  
							
							 
							
							... 
							
							
							
							* update ipex part in mistral example readme 
							
						 
						
							2024-02-26 14:35:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								df2f3885ba 
								
							 
						 
						
							
							
								
								[LLM] Enable kv_cache and forward_qkv optimizations for yuan2 ( #10225 )  
							
							 
							
							... 
							
							
							
							* add init kv_cache support for yuan2
* add forward qkv in yuan 
							
						 
						
							2024-02-26 11:29:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								85a99e13e8 
								
							 
						 
						
							
							
								
								LLM: Fix ChatGLM3 Speculative Example ( #10236 )  
							
							 
							
							... 
							
							
							
							Fix ChatGLM3 Speculative Example. 
							
						 
						
							2024-02-26 10:57:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								213ef06691 
								
							 
						 
						
							
							
								
								fix readme  
							
							 
							
							
							
						 
						
							2024-02-24 00:38:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								28513f3978 
								
							 
						 
						
							
							
								
								LLM: support fp16 embedding & add mlp fusion for iq2_xxs ( #10219 )  
							
							 
							
							... 
							
							
							
							* add fp16 embed
* small fixes
* fix style
* fix style
* fix comment 
							
						 
						
							2024-02-23 17:26:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								eeecd9fc08 
								
							 
						 
						
							
							
								
								Python style fix ( #10230 )  
							
							 
							
							
							
						 
						
							2024-02-23 17:21:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								e511bbd8f1 
								
							 
						 
						
							
							
								
								[LLM] Add basic optimization framework for Yuan2 ( #10227 )  
							
							 
							
							... 
							
							
							
							* Add basic optimization framework for Yuan2
* Small fix
* Python style fix
* Small fix
* Small fix 
							
						 
						
							2024-02-23 17:05:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								8ef5482da2 
								
							 
						 
						
							
							
								
								update Gemma readme ( #10229 )  
							
							 
							
							... 
							
							
							
							* Update README.md
* Update README.md
* Update README.md
* Update README.md 
							
						 
						
							2024-02-23 16:57:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								6fe5344fa6 
								
							 
						 
						
							
							
								
								separate make_csv from the file  
							
							 
							
							
							
						 
						
							2024-02-23 16:33:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								bfa98666a6 
								
							 
						 
						
							
							
								
								fall back to make_table.py  
							
							 
							
							
							
						 
						
							2024-02-23 16:33:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								19260492c7 
								
							 
						 
						
							
							
								
								LLM: fix action/installation error of mpmath ( #10223 )  
							
							 
							
							... 
							
							
							
							* fix
* test
* fix
* update 
							
						 
						
							2024-02-23 16:14:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								aabfc06977 
								
							 
						 
						
							
							
								
								add gemma example ( #10224 )  
							
							 
							
							... 
							
							
							
							* add gemma gpu example
* Update README.md
* add cpu example
* Update README.md
* Update README.md
* Update generate.py
* Update generate.py 
							
						 
						
							2024-02-23 15:20:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								a2c1675546 
								
							 
						 
						
							
							
								
								Add CPU and GPU examples for Yuan2-2B-hf ( #9946 )  
							
							 
							
							... 
							
							
							
							* Add a new CPU example of Yuan2-2B-hf
* Add a new CPU generate.py of Yuan2-2B-hf example
* Add a new GPU example of Yuan2-2B-hf
* Add Yuan2 to README table
* In CPU example:1.Use English as default prompt; 2.Provide modified files in yuan2-2B-instruct
* In GPU example:1.Use English as default prompt;2.Provide modified files
* GPU example:update README
* update Yuan2-2B-hf in README table
* Add CPU example for Yuan2-2B in Pytorch-Models
* Add GPU example for Yuan2-2B in Pytorch-Models
* Add license in generate.py; Modify README
* In GPU Add license in generate.py; Modify README
* In CPU yuan2 modify README
* In GPU yuan2 modify README
* In CPU yuan2 modify README
* In GPU example, updated the readme for Windows GPU supports
* In GPU torch example, updated the readme for Windows GPU supports
* GPU hf example README modified
* GPU example README modified 
							
						 
						
							2024-02-23 14:09:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								f1f4094a09 
								
							 
						 
						
							
							
								
								Add CPU and GPU examples of phi-2 ( #10014 )  
							
							 
							
							... 
							
							
							
							* Add CPU and GPU examples of phi-2
* In GPU hf example, updated the readme for Windows GPU supports
* In GPU torch example, updated the readme for Windows GPU supports
* update the table in BigDL/README.md
* update the table in BigDL/python/llm/README.md 
							
						 
						
							2024-02-23 14:05:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								f315c7f93a 
								
							 
						 
						
							
							
								
								Move harness nightly related files to llm/test folder ( #10209 )  
							
							 
							
							... 
							
							
							
							* move harness nightly files to test folder
* change workflow file path accordingly
* use arc01 when pr
* fix path
* fix fp16 csv path 
							
						 
						
							2024-02-23 11:12:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								30795bdfbc 
								
							 
						 
						
							
							
								
								Gemma optimization: rms_norm, kv_cache, fused_rope, fused_rope+qkv ( #10212 )  
							
							 
							
							... 
							
							
							
							* gemma optimization
* update
* update
* fix style
* meet code review 
							
						 
						
							2024-02-23 10:07:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
							
							
								
							
							
								63681af97e 
								
							 
						 
						
							
							
								
								falcon for transformers 4.36 ( #9960 )  
							
							 
							
							... 
							
							
							
							* falcon for transformers 4.36 
							
						 
						
							2024-02-22 17:04:40 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								84d5f40936 
								
							 
						 
						
							
							
								
								Update README.md ( #10213 )  
							
							 
							
							
							
						 
						
							2024-02-22 17:22:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								ce5840a8b7 
								
							 
						 
						
							
							
								
								GPT-J rope optimization on xpu ( #10182 )  
							
							 
							
							... 
							
							
							
							* optimize
* update
* fix style & move use_fuse_rope
* add ipex version check
* fix style
* update
* fix style
* meet comments
* address comments
* fix style 
							
						 
						
							2024-02-22 16:25:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								f445217d02 
								
							 
						 
						
							
							
								
								LLM: Update IPEX to 2.2.0+cpu and Refactor for _ipex_optimize ( #10189 )  
							
							 
							
							... 
							
							
							
							Update IPEX to 2.2.0+cpu and refactor for _ipex_optimize. 
							
						 
						
							2024-02-22 16:01:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								c876d9b5ca 
								
							 
						 
						
							
							
								
								Support for MPT rotary embedding ( #10208 )  
							
							 
							
							
							
						 
						
							2024-02-22 15:16:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								5e1fee5e05 
								
							 
						 
						
							
							
								
								LLM: add GGUF-IQ2 examples ( #10207 )  
							
							 
							
							... 
							
							
							
							* add iq2 examples
* small fix
* meet code review
* fix
* meet review
* small fix 
							
						 
						
							2024-02-22 14:18:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								21de2613ce 
								
							 
						 
						
							
							
								
								[LLM] Add model loading time record for all-in-one benchmark ( #10201 )  
							
							 
							
							... 
							
							
							
							* Add model loading time record in csv for all-in-one benchmark
* Small fix
* Small fix to number after . 
							
						 
						
							2024-02-22 13:57:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								60e11b6739 
								
							 
						 
						
							
							
								
								LLM: Add mlp layer unit tests ( #10200 )  
							
							 
							
							... 
							
							
							
							* add mlp layer unit tests
* add download baichuan-13b
* exclude llama for now
* install additional packages
* rename bash file
* switch to Baichuan2
* delete attention related code
* fix name errors in yml file 
							
						 
						
							2024-02-22 13:44:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								ca1166a0e5 
								
							 
						 
						
							
							
								
								[LLM] Add quantize kv_cache for Baichuan2-13B ( #10203 )  
							
							 
							
							... 
							
							
							
							* add quantize kv_cache for baichuan2-13b
* style fix 
							
						 
						
							2024-02-22 13:43:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								34ee1aa91f 
								
							 
						 
						
							
							
								
								LLM: add esimd sdp support for chatglm3 ( #10205 )  
							
							 
							
							... 
							
							
							
							* add esimd sdp support
* fix style 
							
						 
						
							2024-02-22 13:37:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								7cbc2429a6 
								
							 
						 
						
							
							
								
								Fix C-Eval ChatGLM loading issue ( #10206 )  
							
							 
							
							... 
							
							
							
							* Add c-eval workflow and modify running files
* Modify the chatglm evaluator file
* Modify the ceval workflow for triggering test
* Modify the ceval workflow file
* Modify the ceval workflow file
* Modify ceval workflow
* Adjust the ceval dataset download
* Add ceval workflow dependencies
* Modify ceval workflow dataset download
* Add ceval test dependencies
* Add ceval test dependencies
* Correct the result print
* Fix the nightly test trigger time
* Fix ChatGLM loading issue 
							
						 
						
							2024-02-22 10:00:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								94cb16fe40 
								
							 
						 
						
							
							
								
								[LLM] Small updates to Win GPU Install Doc ( #10199 )  
							
							 
							
							... 
							
							
							
							* Make Offline installer as default for win gpu doc for oneAPI
* Small other fixes 
							
						 
						
							2024-02-21 17:58:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								9975b029c5 
								
							 
						 
						
							
							
								
								LLM: add qlora finetuning example using trl.SFTTrainer ( #10183 )  
							
							 
							
							
							
						 
						
							2024-02-21 16:40:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f7c96b19ef 
								
							 
						 
						
							
							
								
								LLM: support iq2 for mixtral ( #10191 )  
							
							 
							
							... 
							
							
							
							* support name mapping for mixtral
* support mixtral mixed quantization
* fix style
* fix 
							
						 
						
							2024-02-21 16:00:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								b1a97b71a9 
								
							 
						 
						
							
							
								
								Harness eval: Add is_last parameter and fix logical operator in highlight_vals ( #10192 )  
							
							 
							
							... 
							
							
							
							* Add is_last parameter and fix logical operator in highlight_vals
* Add script to update HTML files in parent folder
* Add running update_html_in_parent_folder.py in summarize step
* Add licence info
* Remove update_html_in_parent_folder.py in Summarize the results for pull request 
							
						 
						
							2024-02-21 14:45:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								c7e839e66c 
								
							 
						 
						
							
							
								
								Add Qwen1.5-7B-Chat ( #10113 )  
							
							 
							
							... 
							
							
							
							* add Qwen1.5-7B-Chat
* modify Qwen1.5 example
* update README
* update prompt format
* update folder name and example README
* add Chinese prompt sample output
* update link in README
* correct the link
* update transformer version 
							
						 
						
							2024-02-21 13:29:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								56ad781f2f 
								
							 
						 
						
							
							
								
								qwen2 cpu fix ( #10187 )  
							
							 
							
							
							
						 
						
							2024-02-21 11:23:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								39d37bd042 
								
							 
						 
						
							
							
								
								upgrade harness package version in workflow ( #10188 )  
							
							 
							
							... 
							
							
							
							* upgrade harness
* update readme 
							
						 
						
							2024-02-21 11:21:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								001c13243e 
								
							 
						 
						
							
							
								
								[LLM] Add support for low_low_bit benchmark on Windows GPU ( #10167 )  
							
							 
							
							... 
							
							
							
							* Add support for low_low_bit performance test on Windows GPU
* Small fix
* Small fix
* Save memory during converting model process
* Drop the results for first time when loading in low bit on mtl igpu for better performance
* Small fix 
							
						 
						
							2024-02-21 10:51:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								276ef0e885 
								
							 
						 
						
							
							
								
								Speculative Ziya on CPU ( #10160 )  
							
							 
							
							... 
							
							
							
							* Speculative Ziya on CPU
* Without part of Accelerate with BIGDL_OPT_IPEX 
							
						 
						
							2024-02-21 10:30:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								4fbf449c2d 
								
							 
						 
						
							
							
								
								for rwkv4 ( #10179 )  
							
							 
							
							
							
						 
						
							2024-02-21 10:11:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								de3dc609ee 
								
							 
						 
						
							
							
								
								Modify harness evaluation workflow ( #10174 )  
							
							 
							
							... 
							
							
							
							* Modify table head in harness
* Specify the file path of fp16.csv
* change run to run nightly and run pr to debug
* Modify the way to get fp16.csv to downloading from github
* Change the method to calculate diff in html table
* Change the method to calculate diff in html table
* Re-arrange job order
* Re-arrange job order
* Change limit
* Change fp16.csv  path
* Change highlight rules
* Change limit 
							
						 
						
							2024-02-20 18:55:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3288acb8de 
								
							 
						 
						
							
							
								
								LLM : Support embedding quantization (only q2k now) ( #10170 )  
							
							 
							
							... 
							
							
							
							* basic logic added
* basic support
* support save&load, update mixed strategy
* fix style
* use int8 for lm_head
* add check for xpu 
							
						 
						
							2024-02-20 16:56:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								6e10d98a8d 
								
							 
						 
						
							
							
								
								Fix some typos ( #10175 )  
							
							 
							
							... 
							
							
							
							* add llm-ppl workflow
* update the DATASET_DIR
* test multiple precisions
* modify nightly test
* match the updated ppl code
* add matrix.include
* fix the include error
* update the include
* add more model
* update the precision of include
* update nightly time and add more models
* fix the workflow_dispatch description, change default model of pr and modify the env
* modify workflow_dispatch language options
* modify options
* modify language options
* modeify workflow_dispatch type
* modify type
* modify the type of language
* change seq_len type
* fix some typos
* revert changes to stress_test.txt 
							
						 
						
							2024-02-20 14:14:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								add3899311 
								
							 
						 
						
							
							
								
								Add ziya CPU example ( #10114 )  
							
							 
							
							... 
							
							
							
							* ziya on CPU
* add README for ziya
* specify use_cache
* add arc CPU
* update prompt format
* update link
* add comments to emphasize use_cache
* update pip cmd 
							
						 
						
							2024-02-20 13:59:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2bb96c775c 
								
							 
						 
						
							
							
								
								LLM: fix device setting during saving optimized model ( #10154 )  
							
							 
							
							
							
						 
						
							2024-02-20 09:52:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								1f6d5b9f30 
								
							 
						 
						
							
							
								
								enable fused rmsnorm and rope qwen2 ( #10163 )  
							
							 
							
							... 
							
							
							
							* qwen2
* change convert
* cleanup 
							
						 
						
							2024-02-20 08:33:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								e31210ba00 
								
							 
						 
						
							
							
								
								Modify html table style and add fp16.csv in harness ( #10169 )  
							
							 
							
							... 
							
							
							
							* Specify the version of pandas in harness evaluation workflow
* Specify the version of pandas in harness evaluation workflow
* Modify html table style and add fp16.csv in harness
* Modify comments 
							
						 
						
							2024-02-19 18:13:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								6c09aed90d 
								
							 
						 
						
							
							
								
								LLM: add qwen_1.5_7b model for arc perf test ( #10166 )  
							
							 
							
							... 
							
							
							
							* LLM: add qwen_1.5_7b model for arc perf test
* small fix
* revert some codes 
							
						 
						
							2024-02-19 17:21:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								209122559a 
								
							 
						 
						
							
							
								
								Add Ceval workflow and modify the result printing ( #10140 )  
							
							 
							
							... 
							
							
							
							* Add c-eval workflow and modify running files
* Modify the chatglm evaluator file
* Modify the ceval workflow for triggering test
* Modify the ceval workflow file
* Modify the ceval workflow file
* Modify ceval workflow
* Adjust the ceval dataset download
* Add ceval workflow dependencies
* Modify ceval workflow dataset download
* Add ceval test dependencies
* Add ceval test dependencies
* Correct the result print 
							
						 
						
							2024-02-19 17:06:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								f8730e8dc1 
								
							 
						 
						
							
							
								
								Skip rescale rwkv linear when load_low_bit ( #10164 )  
							
							 
							
							... 
							
							
							
							* rwkv_ld 
							
						 
						
							2024-02-19 15:56:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								3e2af5ec0a 
								
							 
						 
						
							
							
								
								Fix IPEX Baichuan Speculative ( #10162 )  
							
							 
							
							... 
							
							
							
							* Fix IPEX Baichuan Speculative
* compatible with 13B
* Update speculative.py 
							
						 
						
							2024-02-19 15:27:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								23c91cdce6 
								
							 
						 
						
							
							
								
								[LLM] Add min_step_draft in speculative decoding ( #10142 )  
							
							 
							
							... 
							
							
							
							* Fix gptj kvcache & position id
* Add min_draft_tokens in speculative decoding
* fix style
* update 
							
						 
						
							2024-02-19 14:31:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								14ba2c5135 
								
							 
						 
						
							
							
								
								Harness: remove deprecated files ( #10165 )  
							
							 
							
							
							
						 
						
							2024-02-19 14:27:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								d3591383d5 
								
							 
						 
						
							
							
								
								LLM : Add CPU chatglm3 speculative example ( #10004 )  
							
							 
							
							... 
							
							
							
							* init chatglm
* update
* update 
							
						 
						
							2024-02-19 13:38:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								f2417e083c 
								
							 
						 
						
							
							
								
								LLM: enable chatglm3-6b target_model ipex ( #10085 )  
							
							 
							
							... 
							
							
							
							* init
* always make casual_mask
* not return last tensor
* update
* optimize_model = False
* enable optimized=False
* enable optimized_model=true
* speed_up ipex target_model
* remove if True
* use group_size
* update python style
* update
* update 
							
						 
						
							2024-02-19 13:38:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								177273c1a4 
								
							 
						 
						
							
							
								
								IPEX Speculative Support for Baichuan2 7B ( #10112 )  
							
							 
							
							... 
							
							
							
							* IPEX Speculative Support for Baichuan2 7B
* fix license problems
* refine 
							
						 
						
							2024-02-19 09:12:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								1508d6b089 
								
							 
						 
						
							
							
								
								Fix gptj kvcache & position id ( #10141 )  
							
							 
							
							
							
						 
						
							2024-02-18 10:02:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									yb-peng 
								
							 
						 
						
							
							
							
							
								
							
							
								b4dc33def6 
								
							 
						 
						
							
							
								
								In harness-evaluation workflow, add statistical tables ( #10118 )  
							
							 
							
							... 
							
							
							
							* chnage storage
* fix typo
* change label
* change label to arc03
* change needs in the last step
* add generate csv in harness/make_table_results.py
* modify needs in the last job
* add csv to html
* mfix path issue in llm-harness-summary-nightly
* modify output_path
* modify args in make_table_results.py
* modify make table command in summary
* change pr env label
* remove irrelevant code in summary; add set output path step; add limit in harness run
* re-organize code structure
* modify limit in run harness
* modify csv_to_html input path
* modify needs in summary-nightly 
							
						 
						
							2024-02-08 19:01:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4d33aac7f9 
								
							 
						 
						
							
							
								
								quick fix qwen2 fp8 kv cache ( #10135 )  
							
							 
							
							
							
						 
						
							2024-02-08 17:04:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								39d90839aa 
								
							 
						 
						
							
							
								
								LLM: add quantize kv cache for llama. ( #10086 )  
							
							 
							
							... 
							
							
							
							* feat: add quantize kv cache for llama.
* fix style.
* add quantized attention forward function.
* revert style.
* fix style.
* fix style.
* update quantized kv cache and add quantize_qkv
* fix style.
* fix style.
* optimize quantize kv cache.
* fix style. 
							
						 
						
							2024-02-08 16:49:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d848efe17c 
								
							 
						 
						
							
							
								
								add quantize kv cache support for qwen2 ( #10134 )  
							
							 
							
							
							
						 
						
							2024-02-08 16:17:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								3f79128ed7 
								
							 
						 
						
							
							
								
								[LLM] Enable kv_cache optimization for Qwen2 on transformers-v4.37.0 ( #10131 )  
							
							 
							
							... 
							
							
							
							* add support for kv_cache optimization on transformers-v4.37.0
* enable attention forward
* style fix
* disable rotary for now 
							
						 
						
							2024-02-08 14:20:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								063dc145ac 
								
							 
						 
						
							
							
								
								LLM: basic support for q2k ( #10132 )  
							
							 
							
							... 
							
							
							
							* basic support for q2k
* fix style 
							
						 
						
							2024-02-08 13:52:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								11fe5a87ec 
								
							 
						 
						
							
							
								
								LLM: add Modelscope model example ( #10126 )  
							
							 
							
							
							
						 
						
							2024-02-08 11:18:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								0cf6a12691 
								
							 
						 
						
							
							
								
								LLM: add default torch_dtype for fp16. ( #10124 )  
							
							 
							
							... 
							
							
							
							* set default torch_dtype for fp16.
* fix style.
* bug fix.
* update bug fix. 
							
						 
						
							2024-02-08 10:24:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1aa0c623ce 
								
							 
						 
						
							
							
								
								disable fused layer norm on UHD ( #10130 )  
							
							 
							
							
							
						 
						
							2024-02-08 10:20:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								a8450fc300 
								
							 
						 
						
							
							
								
								[LLM] Support MLP optimization for Qwen1.5 ( #10123 )  
							
							 
							
							
							
						 
						
							2024-02-08 09:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								81ed65fbe7 
								
							 
						 
						
							
							
								
								[LLM] Add qwen1.5-7B in iGPU perf ( #10127 )  
							
							 
							
							... 
							
							
							
							* Add qwen1.5 test config yaml with transformers 4.37.0
* Update for yaml file 
							
						 
						
							2024-02-07 22:31:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0fcfbfaf6f 
								
							 
						 
						
							
							
								
								LLM: add rwkv5 eagle GPU HF example ( #10122 )  
							
							 
							
							... 
							
							
							
							* LLM: add rwkv5 eagle example
* fix
* fix link 
							
						 
						
							2024-02-07 16:58:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								925f82107e 
								
							 
						 
						
							
							
								
								LLM: support models hosted by modelscope ( #10106 )  
							
							 
							
							
							
						 
						
							2024-02-07 16:46:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								c1ec3d8921 
								
							 
						 
						
							
							
								
								LLM: update FAQ about too many open files ( #10119 )  
							
							 
							
							
							
						 
						
							2024-02-07 15:02:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								2e80701f58 
								
							 
						 
						
							
							
								
								Unit test on final logits and the logits of the last attention layer ( #10093 )  
							
							 
							
							... 
							
							
							
							* Add unit test on final logits and attention
* Add unit test on final logits and attention
* Modify unit test on final logits and attention 
							
						 
						
							2024-02-07 14:25:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								3832eb0ce0 
								
							 
						 
						
							
							
								
								Add ChatGLM C-Eval Evaluator ( #10095 )  
							
							 
							
							... 
							
							
							
							* Add ChatGLM ceval evaluator
* Modify ChatGLM Evaluator Reference 
							
						 
						
							2024-02-07 11:27:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								63050c954d 
								
							 
						 
						
							
							
								
								fix ( #10117 )  
							
							 
							
							
							
						 
						
							2024-02-07 11:05:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								d3d2ee1b63 
								
							 
						 
						
							
							
								
								LLM: add speech T5 GPU example ( #10090 )  
							
							 
							
							... 
							
							
							
							* add speech t5 example
* fix
* fix 
							
						 
						
							2024-02-07 10:50:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								2f4c754759 
								
							 
						 
						
							
							
								
								LLM: add bark gpu example ( #10091 )  
							
							 
							
							... 
							
							
							
							* add bark gpu example
* fix
* fix license
* add bark
* add example
* fix
* another way 
							
						 
						
							2024-02-07 10:47:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								8953acd7d6 
								
							 
						 
						
							
							
								
								[LLM] Fix log condition for BIGDL_OPT_IPEX ( #10115 )  
							
							 
							
							... 
							
							
							
							Fix log condition for BIGDL_OPT_IPEX 
							
						 
						
							2024-02-07 10:27:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0eccb94d75 
								
							 
						 
						
							
							
								
								remove text-generation-webui from bigdl repo ( #10107 )  
							
							 
							
							
							
						 
						
							2024-02-06 17:46:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								2aaa21c41d 
								
							 
						 
						
							
							
								
								LLM: Update ppl tests ( #10092 )  
							
							 
							
							... 
							
							
							
							* update ppl tests
* use load_dataset api
* add exception handling
* add language argument
* address comments 
							
						 
						
							2024-02-06 17:31:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								3a46b57253 
								
							 
						 
						
							
							
								
								[LLM] Add RWKV4 HF GPU Example ( #10105 )  
							
							 
							
							... 
							
							
							
							* Add GPU HF example for RWKV 4
* Add link to rwkv4
* fix 
							
						 
						
							2024-02-06 16:30:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								518ef95abc 
								
							 
						 
						
							
							
								
								Small fix for Nonetype error ( #10104 )  
							
							 
							
							
							
						 
						
							2024-02-06 14:58:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d61f4905ac 
								
							 
						 
						
							
							
								
								LLM: 2bit quantization initial support ( #10042 )  
							
							 
							
							... 
							
							
							
							* basis quantize support
* fix new module name
* small update
* and mixed int4 with iq2_xxs
* remove print
* code refactor
* fix style
* meet code review 
							
						 
						
							2024-02-06 14:58:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								36c9442c6d 
								
							 
						 
						
							
							
								
								Arc Stable version test ( #10087 )  
							
							 
							
							... 
							
							
							
							* add batch_size in stable version test
* add batch_size in excludes
* add excludes for batch_size
* fix ci
* triger regression test
* fix xpu version
* disable ci
* address kai's comment
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-02-06 10:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								33b9e7744d 
								
							 
						 
						
							
							
								
								fix dimension ( #10097 )  
							
							 
							
							
							
						 
						
							2024-02-05 15:07:38 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								4b02ff188b 
								
							 
						 
						
							
							
								
								[WebUI] Add prompt format and stopping words for Qwen ( #10066 )  
							
							 
							
							... 
							
							
							
							* add prompt format and stopping_words for qwen mdoel
* performance optimization
* optimize
* update
* meet comments 
							
						 
						
							2024-02-05 18:23:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								0aecd8637b 
								
							 
						 
						
							
							
								
								LLM: small fix for the html script ( #10094 )  
							
							 
							
							
							
						 
						
							2024-02-05 17:27:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								7d2be7994f 
								
							 
						 
						
							
							
								
								add phixtral and optimize phi-moe ( #10052 )  
							
							 
							
							
							
						 
						
							2024-02-05 11:12:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								676d6923f2 
								
							 
						 
						
							
							
								
								LLM: modify transformersembeddings.embed() in langchain ( #10051 )  
							
							 
							
							
							
						 
						
							2024-02-05 10:42:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								ad050107b3 
								
							 
						 
						
							
							
								
								LLM: fix mpt load_low_bit issue ( #10075 )  
							
							 
							
							... 
							
							
							
							* fix
* retry
* retry 
							
						 
						
							2024-02-05 10:17:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								9050991e4e 
								
							 
						 
						
							
							
								
								fix gradio check issue temply ( #10082 )  
							
							 
							
							
							
						 
						
							2024-02-04 16:46:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								c2e562d037 
								
							 
						 
						
							
							
								
								LLM: add batch_size to the csv and html ( #10080 )  
							
							 
							
							... 
							
							
							
							* LLM: add batch_size to the csv and html
* small fix 
							
						 
						
							2024-02-04 16:35:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								7e49fbc5dd 
								
							 
						 
						
							
							
								
								LLM: make finetuning examples more common for other models ( #10078 )  
							
							 
							
							
							
						 
						
							2024-02-04 16:03:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								90f004b80b 
								
							 
						 
						
							
							
								
								remove benchmarkwrapper form deepspeed example ( #10079 )  
							
							 
							
							
							
						 
						
							2024-02-04 15:42:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								8e33cb0f38 
								
							 
						 
						
							
							
								
								LLM: support speecht5_tts ( #10077 )  
							
							 
							
							... 
							
							
							
							* support speecht5_tts
* fix 
							
						 
						
							2024-02-04 13:26:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ivy-lv11 
								
							 
						 
						
							
							
							
							
								
							
							
								428b7105f6 
								
							 
						 
						
							
							
								
								Add HF and PyTorch example InternLM2 ( #10061 )  
							
							 
							
							
							
						 
						
							2024-02-04 10:25:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								77be19bb97 
								
							 
						 
						
							
							
								
								LLM: Support gpt-j in speculative decoding ( #10067 )  
							
							 
							
							... 
							
							
							
							* gptj
* support gptj in speculative decoding
* fix
* update readme
* small fix 
							
						 
						
							2024-02-02 14:54:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								19183ef476 
								
							 
						 
						
							
							
								
								[WebUI] Reset bigdl-llm loader options with default value ( #10064 )  
							
							 
							
							... 
							
							
							
							* reset bigdl-llm loader options with default value
* remove options which maybe complex for naive users 
							
						 
						
							2024-02-01 15:45:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								6e0f1a1e92 
								
							 
						 
						
							
							
								
								use apply_rotary_pos_emb_cache_freq_xpu in mixtral ( #10060 )  
							
							 
							
							... 
							
							
							
							* use apply_rotary_pos_emb_cache_freq_xpu in mixtral
* fix style 
							
						 
						
							2024-02-01 15:40:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								aae20d728e 
								
							 
						 
						
							
							
								
								LLM: Add initial DPO finetuning example ( #10021 )  
							
							 
							
							
							
						 
						
							2024-02-01 14:18:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								601024f418 
								
							 
						 
						
							
							
								
								Mistral CPU example of speculative decoding ( #10024 )  
							
							 
							
							... 
							
							
							
							* Mistral CPU example of speculative decoding
* update transformres version
* update example
* Update README.md 
							
						 
						
							2024-02-01 10:52:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								968e70544d 
								
							 
						 
						
							
							
								
								Enable IPEX Mistral in Speculative ( #10059 )  
							
							 
							
							
							
						 
						
							2024-02-01 10:48:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								3ca03d4e97 
								
							 
						 
						
							
							
								
								Add deepmind sample into bigdl-llm speculative decoding ( #10041 )  
							
							 
							
							... 
							
							
							
							* migrate deepmind sample
* update
* meet comments
* fix style
* fix style 
							
						 
						
							2024-02-01 09:57:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								d2d3f6b091 
								
							 
						 
						
							
							
								
								LLM: ensure the result of daily arc perf test ( #10016 )  
							
							 
							
							... 
							
							
							
							* ensure the result of daily arc perf test
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* small fix
* concat more csvs
* small fix
* revert some files 
							
						 
						
							2024-01-31 18:26:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								9724939499 
								
							 
						 
						
							
							
								
								temporarily disable bloom 2k input ( #10056 )  
							
							 
							
							
							
						 
						
							2024-01-31 17:49:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								8c8fc148c9 
								
							 
						 
						
							
							
								
								LLM: add rwkv 5 ( #10048 )  
							
							 
							
							
							
						 
						
							2024-01-31 15:54:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								a9018a0e95 
								
							 
						 
						
							
							
								
								LLM: modify the GPU example for redpajama model ( #10044 )  
							
							 
							
							... 
							
							
							
							* LLM: modify the GPU example for redpajama model
* small fix 
							
						 
						
							2024-01-31 14:32:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								95636cad97 
								
							 
						 
						
							
							
								
								Add AutoGen CPU and XPU Example ( #9980 )  
							
							 
							
							... 
							
							
							
							* Add AutoGen example
* Adjust AutoGen README
* Adjust AutoGen README
* Change AutoGen README
* Change AutoGen README 
							
						 
						
							2024-01-31 11:31:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								7284edd9b7 
								
							 
						 
						
							
							
								
								Vicuna CPU example of speculative decoding ( #10018 )  
							
							 
							
							... 
							
							
							
							* Vicuna CPU example of speculative decoding
* Update speculative.py
* Update README.md
* add requirements for ipex
* Update README.md
* Update speculative.py
* Update speculative.py 
							
						 
						
							2024-01-31 11:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								7e5cd42a5c 
								
							 
						 
						
							
							
								
								LLM : Update optimize ipex bf16 ( #10038 )  
							
							 
							
							... 
							
							
							
							* use 4.35.2 and remove
* update rmsnorm
* remove
* remove
* update python style
* update
* update python style
* update
* fix style
* update
* remove whitespace 
							
						 
						
							2024-01-31 10:59:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								fb53b994f8 
								
							 
						 
						
							
							
								
								LLM : Add llama ipex optimized ( #10046 )  
							
							 
							
							... 
							
							
							
							* init ipex
* remove padding 
							
						 
						
							2024-01-31 10:38:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3685622f29 
								
							 
						 
						
							
							
								
								LLM: fix llama 4.36 forward( #10047 )  
							
							 
							
							
							
						 
						
							2024-01-31 10:31:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								53a5140eff 
								
							 
						 
						
							
							
								
								Optimize rwkv v5 rest token again ( #10043 )  
							
							 
							
							
							
						 
						
							2024-01-31 10:01:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								b1ff28ceb6 
								
							 
						 
						
							
							
								
								LLama2 CPU example of speculative decoding ( #9962 )  
							
							 
							
							... 
							
							
							
							* LLama2 example of speculative decoding
* add docs
* Update speculative.py
* Update README.md
* Update README.md
* Update speculative.py
* remove autocast 
							
						 
						
							2024-01-31 09:45:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								0fcad6ce14 
								
							 
						 
						
							
							
								
								LLM: add gpu example for redpajama models ( #10040 )  
							
							 
							
							
							
						 
						
							2024-01-30 19:39:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								9978089796 
								
							 
						 
						
							
							
								
								[LLM] Enable BIGDL_OPT_IPEX in speculative baichuan2 13b example  ( #10028 )  
							
							 
							
							... 
							
							
							
							Enable BIGDL_OPT_IPEX in speculative baichuan2 13b example 
							
						 
						
							2024-01-30 17:11:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ovo233 
								
							 
						 
						
							
							
							
							
								
							
							
								226f398c2a 
								
							 
						 
						
							
							
								
								fix ppl test errors ( #10036 )  
							
							 
							
							
							
						 
						
							2024-01-30 16:26:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								13e61738c5 
								
							 
						 
						
							
							
								
								hide detail memory for each token in benchmark_utils.py ( #10037 )  
							
							 
							
							
							
						 
						
							2024-01-30 16:04:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6b63ba23d1 
								
							 
						 
						
							
							
								
								LLM: add full module name during convert ( #10035 )  
							
							 
							
							
							
						 
						
							2024-01-30 14:43:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7dfa6dbe46 
								
							 
						 
						
							
							
								
								add rwkv time shift optimization ( #10032 )  
							
							 
							
							
							
						 
						
							2024-01-30 14:10:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								f57d0fda8b 
								
							 
						 
						
							
							
								
								[LLM] Use IPEX Optimization for Self Speculative Decoding ( #9997 )  
							
							 
							
							... 
							
							
							
							Use IPEX Optimization for Self Speculative Decoding 
							
						 
						
							2024-01-30 09:11:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ccf8f613fb 
								
							 
						 
						
							
							
								
								LLM: update fp16 Linear on ARC/FLEX ( #10023 )  
							
							 
							
							
							
						 
						
							2024-01-29 18:25:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								824c8029d7 
								
							 
						 
						
							
							
								
								Fix "local variable 'model' referenced before assignment" ( #10022 )  
							
							 
							
							
							
						 
						
							2024-01-29 16:18:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								cc3f122f6a 
								
							 
						 
						
							
							
								
								Baichuan2 CPU example of speculative decoding ( #10003 )  
							
							 
							
							... 
							
							
							
							* Baichuan2 CPU example of speculative decoding
* Update generate.py
* Update README.md
* Update generate.py
* Update generate.py
* Update generate.py
* fix default model
* fix wrong chinese coding
* Update generate.py
* update prompt
* update sample outputs
* baichuan 7b needs transformers==4.31.0
* rename example file's name 
							
						 
						
							2024-01-29 14:21:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								f37e4702bc 
								
							 
						 
						
							
							
								
								[LLM] Use IPEX Optimization for BF16 Model ( #9988 )  
							
							 
							
							... 
							
							
							
							Use IPEX Optimization for BF16 Model by env BIGDL_OPT_IPEX=true 
							
						 
						
							2024-01-29 11:28:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								440cfe18ed 
								
							 
						 
						
							
							
								
								LLM: GPU Example Updates for Windows ( #9992 )  
							
							 
							
							... 
							
							
							
							* modify aquila
* modify aquila2
* add baichuan
* modify baichuan2
* modify blue-lm
* modify chatglm3
* modify chinese-llama2
* modiy codellama
* modify distil-whisper
* modify dolly-v1
* modify dolly-v2
* modify falcon
* modify flan-t5
* modify gpt-j
* modify internlm
* modify llama2
* modify mistral
* modify mixtral
* modify mpt
* modify phi-1_5
* modify qwen
* modify qwen-vl
* modify replit
* modify solar
* modify starcoder
* modify vicuna
* modify voiceassistant
* modify whisper
* modify yi
* modify aquila2
* modify baichuan
* modify baichuan2
* modify blue-lm
* modify chatglm2
* modify chatglm3
* modify codellama
* modify distil-whisper
* modify dolly-v1
* modify dolly-v2
* modify flan-t5
* modify llama2
* modify llava
* modify mistral
* modify mixtral
* modify phi-1_5
* modify qwen-vl
* modify replit
* modify solar
* modify starcoder
* modify yi
* correct the comments
* remove cpu_embedding in code for whisper and distil-whisper
* remove comment
* remove cpu_embedding for voice assistant
* revert modify voice assistant
* modify for voice assistant
* add comment for voice assistant
* fix comments
* fix comments 
							
						 
						
							2024-01-29 11:25:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								c6d4f91777 
								
							 
						 
						
							
							
								
								[LLM] Add UTs of load_low_bit for transformers-style API ( #10001 )  
							
							 
							
							... 
							
							
							
							* Add uts for transformers api load_low_bit generation
* Small fixes
* Remove replit-code for CPU tests due to current load_low_bit issue on MPT
* Small change
* Small reorganization to llm unit tests on CPU
* Small fixes 
							
						 
						
							2024-01-29 10:18:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d720554d43 
								
							 
						 
						
							
							
								
								simplify quantize kv cache api ( #10011 )  
							
							 
							
							
							
						 
						
							2024-01-29 09:23:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								a3322e2a6c 
								
							 
						 
						
							
							
								
								add fp8 e5 to use_xmx ( #10015 )  
							
							 
							
							
							
						 
						
							2024-01-26 18:29:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
							
							
								
							
							
								9e18ea187f 
								
							 
						 
						
							
							
								
								[LLM] Avoid KV Cache OOM when seq len is larger than 1 ( #10006 )  
							
							 
							
							... 
							
							
							
							* Avoid OOM during muti-round streaming chat with kv cache
* For llama like kv cache, i.e., [bs, n_head, seq_len, head_dim], use is_enough_kv_cache_room_4_31.
* Other models need to compare kv cache size with kv_len. 
							
						 
						
							2024-01-26 17:30:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								e5ae6f2c13 
								
							 
						 
						
							
							
								
								LLM: fix truncation logic of past_key_values in chatglm multi turn chat ( #10007 )  
							
							 
							
							... 
							
							
							
							* Avoid frequently truncating past_key_values  when its length is larger than required. 
							
						 
						
							2024-01-26 16:56:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								1eaaace2dc 
								
							 
						 
						
							
							
								
								Update perf test all-in-one config for batch_size arg ( #10012 )  
							
							 
							
							
							
						 
						
							2024-01-26 16:46:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								7952bbc919 
								
							 
						 
						
							
							
								
								add conf batch_size to run_model ( #10010 )  
							
							 
							
							
							
						 
						
							2024-01-26 15:48:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								421e7cee80 
								
							 
						 
						
							
							
								
								[LLM] Add Text_Generation_WebUI Support ( #9884 )  
							
							 
							
							... 
							
							
							
							* initially add text_generation_webui support
* add env requirements install
* add necessary dependencies
* update for starting webui
* update shared and noted to place models
* update heading of part3
* meet comments
* add copyright license
* remove extensions
* convert tutorial to windows side
* add warm-up to optimize performance 
							
						 
						
							2024-01-26 15:12:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								f0da0c131b 
								
							 
						 
						
							
							
								
								Disable llama2 optimize model true or false test for now in Arc UTs ( #10008 )  
							
							 
							
							
							
						 
						
							2024-01-26 14:42:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a00efa0564 
								
							 
						 
						
							
							
								
								LLM: add mlp & qkv fusion for FP16 Llama-7B ( #9932 )  
							
							 
							
							... 
							
							
							
							* add mlp fusion for llama
* add mlp fusion
* fix style
* update
* add mm_qkv_out
* fix style
* update
* meet code review
* meet code review 
							
						 
						
							2024-01-26 11:50:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								98ea3459e5 
								
							 
						 
						
							
							
								
								LLM : Fix llama draft_model dtype error ( #10005 )  
							
							 
							
							... 
							
							
							
							* fix llama draft_model dtype error
* updat 
							
						 
						
							2024-01-26 10:59:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								aae1870096 
								
							 
						 
						
							
							
								
								fix qwen kv cache length ( #9998 )  
							
							 
							
							
							
						 
						
							2024-01-26 10:15:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								762adc4f9d 
								
							 
						 
						
							
							
								
								Reformat summary table ( #9942 )  
							
							 
							
							... 
							
							
							
							* reformat the table
* refactor the file
* read result.json only 
							
						 
						
							2024-01-25 23:49:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								171fb2d185 
								
							 
						 
						
							
							
								
								LLM: reorganize GPU finetuning examples ( #9952 )  
							
							 
							
							
							
						 
						
							2024-01-25 19:02:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								24b34b6e46 
								
							 
						 
						
							
							
								
								change xmx condition ( #10000 )  
							
							 
							
							
							
						 
						
							2024-01-25 17:48:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								8b08ad408b 
								
							 
						 
						
							
							
								
								Add batch_size in all_in_one ( #9999 )  
							
							 
							
							... 
							
							
							
							Add batch_size in all_in_one, except run_native_int4 
							
						 
						
							2024-01-25 17:43:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								093e6f8f73 
								
							 
						 
						
							
							
								
								LLM: Add qwen CPU speculative example ( #9985 )  
							
							 
							
							... 
							
							
							
							* init from gpu
* update for cpu
* update
* update
* fix xpu readme
* update
* update example prompt
* update prompt and add 72b
* update
* update 
							
						 
						
							2024-01-25 17:01:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bf65548d29 
								
							 
						 
						
							
							
								
								Add quantize kv cache support for chaglm2/3 ( #9996 )  
							
							 
							
							
							
						 
						
							2024-01-25 16:55:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								86055d76d5 
								
							 
						 
						
							
							
								
								fix optimize_model not working ( #9995 )  
							
							 
							
							
							
						 
						
							2024-01-25 16:39:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								9bff84e6fd 
								
							 
						 
						
							
							
								
								LLM: Convert draft_model kv_cache from bf16 to fp32 ( #9964 )  
							
							 
							
							... 
							
							
							
							* convert bf16 to fp32
* update
* change when init
* init first and cut off after
* init and exchange
* update python type
* update
* fix bug
* update
* update 
							
						 
						
							2024-01-25 11:20:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								99ff6cf048 
								
							 
						 
						
							
							
								
								Update gpu spec decoding baichuan2 example dependency ( #9990 )  
							
							 
							
							... 
							
							
							
							* add dependency
* update
* update 
							
						 
						
							2024-01-25 11:05:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								27338540c3 
								
							 
						 
						
							
							
								
								Fix repetition_penalty not activated issue ( #9989 )  
							
							 
							
							
							
						 
						
							2024-01-25 10:40:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								3bc3d0bbcd 
								
							 
						 
						
							
							
								
								Update self-speculative readme ( #9986 )  
							
							 
							
							
							
						 
						
							2024-01-24 22:37:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								b27e5a27b9 
								
							 
						 
						
							
							
								
								Remove the check for meta device in _replace_with_low_bit_linear ( #9984 )  
							
							 
							
							
							
						 
						
							2024-01-24 18:15:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d4f65a6033 
								
							 
						 
						
							
							
								
								LLM: add mistral speculative example ( #9976 )  
							
							 
							
							... 
							
							
							
							* add mistral example
* update 
							
						 
						
							2024-01-24 17:35:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								b176cad75a 
								
							 
						 
						
							
							
								
								LLM: Add baichuan2 gpu spec example ( #9973 )  
							
							 
							
							... 
							
							
							
							* add baichuan2 gpu spec example
* update readme & example
* remove print
* fix typo
* meet comments
* revert
* update 
							
						 
						
							2024-01-24 16:40:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinyi Wan 
								
							 
						 
						
							
							
							
							
								
							
							
								ec2d9de0ea 
								
							 
						 
						
							
							
								
								Fix README.md for solar ( #9957 )  
							
							 
							
							
							
						 
						
							2024-01-24 15:50:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Mingyu Wei 
								
							 
						 
						
							
							
							
							
								
							
							
								bc9cff51a8 
								
							 
						 
						
							
							
								
								LLM GPU Example Update for Windows Support ( #9902 )  
							
							 
							
							... 
							
							
							
							* Update README in LLM GPU Examples
* Update reference of Intel GPU
* add cpu_embedding=True in comment
* small fixes
* update GPU/README.md and add explanation for cpu_embedding=True
* address comments
* fix small typos
* add backtick for cpu_embedding=True
* remove extra backtick in the doc
* add period mark
* update readme 
							
						 
						
							2024-01-24 13:42:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								e0db44dcb6 
								
							 
						 
						
							
							
								
								fix unexpected keyword argument 'device'  ( #9982 )  
							
							 
							
							... 
							
							
							
							* add device for chatglm3 only
* add comment for this change
* fix style
* fix style
* fix style again..
* finally fixed style 
							
						 
						
							2024-01-24 13:20:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Mingyu Wei 
								
							 
						 
						
							
							
							
							
								
							
							
								50a851e3b3 
								
							 
						 
						
							
							
								
								LLM: separate arc ut for disable XMX ( #9953 )  
							
							 
							
							... 
							
							
							
							* separate test_optimize_model api with disabled xmx
* delete test_optimize_model in test_transformers_api.py
* set env variable in .sh/ put back test_optimize_model
* unset env variable
* remove env setting in .py
* address errors in action
* remove import ipex
* lower tolerance 
							
						 
						
							2024-01-23 19:04:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								8d28aa8e2b 
								
							 
						 
						
							
							
								
								[LLM] Fix the model.device problem when cpu_embedding=True ( #9971 )  
							
							 
							
							... 
							
							
							
							* Overwrite the device attribute for CPUPinnedParam
* Expose cpu_embedding=True for Linux users
* Fix python style 
							
						 
						
							2024-01-23 18:51:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f82782cd3b 
								
							 
						 
						
							
							
								
								fix starcoder ( #9975 )  
							
							 
							
							
							
						 
						
							2024-01-23 17:24:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								be5836bee1 
								
							 
						 
						
							
							
								
								LLM: fix outlier value ( #9945 )  
							
							 
							
							... 
							
							
							
							* fix outlier value
* small fix 
							
						 
						
							2024-01-23 17:04:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								2c8a9aaf0d 
								
							 
						 
						
							
							
								
								fix qwen causal mask when quantize_kv_cache=True ( #9968 )  
							
							 
							
							
							
						 
						
							2024-01-23 16:34:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								5aa4b32c1b 
								
							 
						 
						
							
							
								
								LLM: Add qwen spec gpu example ( #9965 )  
							
							 
							
							... 
							
							
							
							* add qwen spec gpu example
* update readme
---------
Co-authored-by: rnwang04 <ruonan1.wang@intel.com> 
							
						 
						
							2024-01-23 15:59:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								36c665667d 
								
							 
						 
						
							
							
								
								Add logits processor & qwen eos stop in speculative decoding ( #9963 )  
							
							 
							
							... 
							
							
							
							* add logits processor & qwen eos
* fix style
* fix
* fix
* fix style
* fix style
* support transformers 4.31
* fix style
* fix style
---------
Co-authored-by: rnwang04 <ruonan1.wang@intel.com> 
							
						 
						
							2024-01-23 15:57:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								60b35db1f1 
								
							 
						 
						
							
							
								
								LLM: add chatglm3 speculative decoding example ( #9966 )  
							
							 
							
							... 
							
							
							
							* add chatglm3 example
* update
* fix 
							
						 
						
							2024-01-23 15:54:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								da4687c917 
								
							 
						 
						
							
							
								
								fix fp16 ( #9970 )  
							
							 
							
							
							
						 
						
							2024-01-23 15:53:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								301425e377 
								
							 
						 
						
							
							
								
								harness tests on pvc multiple xpus ( #9908 )  
							
							 
							
							... 
							
							
							
							* add run_multi_llb.py
* update readme
* add job hint 
							
						 
						
							2024-01-23 13:20:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								27b19106f3 
								
							 
						 
						
							
							
								
								LLM: add readme for speculative decoding gpu examples ( #9961 )  
							
							 
							
							... 
							
							
							
							* add readme
* add readme
* meet code review 
							
						 
						
							2024-01-23 12:54:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								39219b7e9a 
								
							 
						 
						
							
							
								
								add default device meta  when lcmu enabled ( #9941 )  
							
							 
							
							
							
						 
						
							2024-01-23 11:00:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								dacf680294 
								
							 
						 
						
							
							
								
								add fused rotary pos emb for qwen ( #9956 )  
							
							 
							
							... 
							
							
							
							* add fused rotary pos emb for qwen
* update 
							
						 
						
							2024-01-23 10:37:56 +08:00