Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								11550d3f25 
								
							 
						 
						
							
							
								
								LLM: Add length check for IPEX-CPU speculative decoding  ( #10529 )  
							
							 
							
							... 
							
							
							
							Add length check for IPEX-CPU speculative decoding. 
							
						 
						
							2024-03-26 17:47:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a3b007f3b1 
								
							 
						 
						
							
							
								
								[Serving] Fix fastchat breaks ( #10548 )  
							
							 
							
							... 
							
							
							
							* fix fastchat
* fix doc 
							
						 
						
							2024-03-26 17:03:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69a28d6b4c 
								
							 
						 
						
							
							
								
								fix chatglm ( #10540 )  
							
							 
							
							
							
						 
						
							2024-03-26 16:01:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c563b41491 
								
							 
						 
						
							
							
								
								add nightly_build workflow ( #10533 )  
							
							 
							
							... 
							
							
							
							* add nightly_build workflow
* add create-job-status-badge action
* update
* update
* update
* update setup.py
* release
* revert 
							
						 
						
							2024-03-26 12:47:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a3e4e788f 
								
							 
						 
						
							
							
								
								LLM: fix mistral hidden_size setting for deepspeed autotp ( #10527 )  
							
							 
							
							
							
						 
						
							2024-03-26 10:55:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1dd40b429c 
								
							 
						 
						
							
							
								
								enable fp4 fused mlp and qkv ( #10531 )  
							
							 
							
							... 
							
							
							
							* enable fp4 fused mlp and qkv
* update qwen
* update qwen2 
							
						 
						
							2024-03-26 08:34:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16b2ef49c6 
								
							 
						 
						
							
							
								
								Update_document by heyang ( #30 )  
							
							 
							
							
							
						 
						
							2024-03-25 10:06:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1048ca7f6 
								
							 
						 
						
							
							
								
								Update setup.py and add new actions and add compatible mode ( #25 )  
							
							 
							
							... 
							
							
							
							* update setup.py
* add new action
* add compatible mode 
							
						 
						
							2024-03-22 15:44:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9df70d95eb 
								
							 
						 
						
							
							
								
								Refactor bigdl.llm to  ipex_llm ( #24 )  
							
							 
							
							... 
							
							
							
							* Rename bigdl/llm to ipex_llm
* rm python/llm/src/bigdl
* from bigdl.llm to from ipex_llm 
							
						 
						
							2024-03-22 15:41:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								cc5806f4bc 
								
							 
						 
						
							
							
								
								LLM: add save/load example for hf-transformers ( #10432 )  
							
							 
							
							
							
						 
						
							2024-03-22 13:57:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								34d0a9328c 
								
							 
						 
						
							
							
								
								LLM: Speed-up mixtral in pipeline parallel inference ( #10472 )  
							
							 
							
							... 
							
							
							
							* speed-up mixtral
* fix style 
							
						 
						
							2024-03-22 11:06:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b9d4280892 
								
							 
						 
						
							
							
								
								LLM: fix baichuan7b quantize kv abnormal output. ( #10504 )  
							
							 
							
							... 
							
							
							
							* fix abnormal output.
* fix style.
* fix style. 
							
						 
						
							2024-03-22 10:00:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f0f317b6cf 
								
							 
						 
						
							
							
								
								fix a typo in yuan ( #10503 )  
							
							 
							
							
							
						 
						
							2024-03-22 09:40:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								3a3756b51d 
								
							 
						 
						
							
							
								
								Add FastChat bigdl_worker ( #10493 )  
							
							 
							
							... 
							
							
							
							* done
* fix format
* add licence
* done
* fix doc
* refactor folder
* add license 
							
						 
						
							2024-03-21 18:35:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								dba7ddaab3 
								
							 
						 
						
							
							
								
								add sdp fp8 for qwen llama436 baichuan mistral baichuan2 ( #10485 )  
							
							 
							
							... 
							
							
							
							* add sdp fp8
* fix style
* fix qwen
* fix baichuan 13
* revert baichuan 13b and baichuan2-13b
* fix style
* update 
							
						 
						
							2024-03-21 17:23:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								30f111cd32 
								
							 
						 
						
							
							
								
								lm_head empty_cache for more models ( #10490 )  
							
							 
							
							... 
							
							
							
							* modify constraint
* fix style 
							
						 
						
							2024-03-21 17:11:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								1579ee4421 
								
							 
						 
						
							
							
								
								[LLM] Add nightly igpu perf test for INT4+FP16 1024-128 ( #10496 )  
							
							 
							
							
							
						 
						
							2024-03-21 16:07:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2958ca49c0 
								
							 
						 
						
							
							
								
								LLM: add patching function for llm finetuning ( #10247 )  
							
							 
							
							
							
						 
						
							2024-03-21 16:01:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								5b97fdb87b 
								
							 
						 
						
							
							
								
								update deepseek example readme ( #10420 )  
							
							 
							
							... 
							
							
							
							* update readme
* update
* update readme 
							
						 
						
							2024-03-21 15:21:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								a5f35757a4 
								
							 
						 
						
							
							
								
								Migrate langchain rag cpu example to gpu ( #10450 )  
							
							 
							
							... 
							
							
							
							* add langchain rag on gpu
* add rag example in readme
* add trust_remote_code in TransformersEmbeddings.from_model_id
* add trust_remote_code in TransformersEmbeddings.from_model_id in cpu 
							
						 
						
							2024-03-21 15:20:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								85ef3f1d99 
								
							 
						 
						
							
							
								
								LLM: add empty cache in deepspeed autotp benchmark script ( #10488 )  
							
							 
							
							
							
						 
						
							2024-03-21 10:51:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								5a5fd5af5b 
								
							 
						 
						
							
							
								
								LLM: Add speculative benchmark on CPU/XPU ( #10464 )  
							
							 
							
							... 
							
							
							
							Add speculative benchmark on CPU/XPU. 
							
						 
						
							2024-03-21 09:51:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								28c315a5b9 
								
							 
						 
						
							
							
								
								LLM: fix deepspeed error of finetuning on xpu ( #10484 )  
							
							 
							
							
							
						 
						
							2024-03-21 09:46:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								021d77fd22 
								
							 
						 
						
							
							
								
								Remove softmax upcast fp32 in llama ( #10481 )  
							
							 
							
							... 
							
							
							
							* update
* fix style 
							
						 
						
							2024-03-20 18:17:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cfdf8ad496 
								
							 
						 
						
							
							
								
								Fix modules_not_to_convert argument ( #10483 )  
							
							 
							
							
							
						 
						
							2024-03-20 17:47:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								cbe24cc7e6 
								
							 
						 
						
							
							
								
								LLM: Enable BigDL IPEX Int8 ( #10480 )  
							
							 
							
							... 
							
							
							
							Enable BigDL IPEX Int8 
							
						 
						
							2024-03-20 15:59:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								1d062e24db 
								
							 
						 
						
							
							
								
								Update serving doc ( #10475 )  
							
							 
							
							... 
							
							
							
							* update serving doc
* add tob
* update
* update
* update
* update vllm worker 
							
						 
						
							2024-03-20 14:44:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4581e4f17f 
								
							 
						 
						
							
							
								
								LLM: fix whiper model missing config. ( #10473 )  
							
							 
							
							... 
							
							
							
							* fix whiper model missing config.
* fix style.
* fix style.
* style. 
							
						 
						
							2024-03-20 14:22:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e41d556436 
								
							 
						 
						
							
							
								
								LLM: change fp16 benchmark to model.half ( #10477 )  
							
							 
							
							... 
							
							
							
							* LLM: change fp16 benchmark to model.half
* fix 
							
						 
						
							2024-03-20 13:38:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								749bedaf1e 
								
							 
						 
						
							
							
								
								fix rwkv v5 fp16 ( #10474 )  
							
							 
							
							
							
						 
						
							2024-03-20 13:15:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								72bcc27da9 
								
							 
						 
						
							
							
								
								[LLM] Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings ( #10459 )  
							
							 
							
							... 
							
							
							
							* Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings
* Small fixes 
							
						 
						
							2024-03-19 18:04:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								463a86cd5d 
								
							 
						 
						
							
							
								
								LLM: fix qwen-vl interpolation gpu abnormal results. ( #10457 )  
							
							 
							
							... 
							
							
							
							* fix qwen-vl interpolation gpu abnormal results.
* fix style.
* update qwen-vl gpu example.
* fix comment and update example.
* fix style. 
							
						 
						
							2024-03-19 16:59:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e9055c32f9 
								
							 
						 
						
							
							
								
								LLM: fix fp16 mem record in benchmark ( #10461 )  
							
							 
							
							... 
							
							
							
							* LLM: fix fp16 mem record in benchmark
* change style 
							
						 
						
							2024-03-19 16:17:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f3fefdc9ce 
								
							 
						 
						
							
							
								
								fix pad_token_id issue ( #10425 )  
							
							 
							
							
							
						 
						
							2024-03-18 23:30:28 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								74e7490fda 
								
							 
						 
						
							
							
								
								Fix Baichuan2 prompt format ( #10334 )  
							
							 
							
							... 
							
							
							
							* Fix Baichuan2 prompt format
* Fix Baichuan2 README
* Change baichuan2 prompt info
* Change baichuan2 prompt info 
							
						 
						
							2024-03-19 12:48:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0451103a43 
								
							 
						 
						
							
							
								
								LLM: add int4+fp16 benchmark script for windows benchmarking ( #10449 )  
							
							 
							
							... 
							
							
							
							* LLM: add fp16 for benchmark script
* remove transformer_int4_fp16_loadlowbit_gpu_win 
							
						 
						
							2024-03-19 11:11:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								bbd749dceb 
								
							 
						 
						
							
							
								
								qwen2 fp8 cache ( #10446 )  
							
							 
							
							... 
							
							
							
							* qwen2 fp8 cache
* fix style check 
							
						 
						
							2024-03-19 08:32:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								9e763b049c 
								
							 
						 
						
							
							
								
								Support running pipeline parallel inference by vertically partitioning model to different devices ( #10392 )  
							
							 
							
							... 
							
							
							
							* support pipeline parallel inference
* fix logging
* remove benchmark file
* fic
* need to warmup twice
* support qwen and qwen2
* fix lint
* remove genxir
* refine 
							
						 
						
							2024-03-18 13:04:45 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								66b4bb5c5d 
								
							 
						 
						
							
							
								
								LLM: update setup to provide cpp for windows ( #10448 )  
							
							 
							
							
							
						 
						
							2024-03-18 18:20:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								dbdeaddd6a 
								
							 
						 
						
							
							
								
								LLM: Fix log condition for BIGDL_OPT_IPEX ( #10441 )  
							
							 
							
							... 
							
							
							
							remove log for BIGDL_OPT_IPEX 
							
						 
						
							2024-03-18 16:03:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								1de13ea578 
								
							 
						 
						
							
							
								
								LLM: remove CPU english_quotes dataset and update docker example ( #10399 )  
							
							 
							
							... 
							
							
							
							* update dataset
* update readme
* update docker cpu
* update xpu docker 
							
						 
						
							2024-03-18 10:45:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								399843faf0 
								
							 
						 
						
							
							
								
								Baichuan 7b fp16 sdp and qwen2 pvc sdp ( #10435 )  
							
							 
							
							... 
							
							
							
							* add baichuan sdp
* update
* baichuan2
* fix
* fix style
* revert 13b
* revert 
							
						 
						
							2024-03-18 10:15:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								5ab52ef5b5 
								
							 
						 
						
							
							
								
								update ( #10424 )  
							
							 
							
							
							
						 
						
							2024-03-15 09:24:26 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bd64488b2a 
								
							 
						 
						
							
							
								
								add mask support for llama/chatglm fp8 sdp ( #10433 )  
							
							 
							
							... 
							
							
							
							* add mask support for fp8 sdp
* fix chatglm2 dtype
* update 
							
						 
						
							2024-03-15 17:36:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								444b11af22 
								
							 
						 
						
							
							
								
								Add LangChain upstream ut test for ipynb ( #10387 )  
							
							 
							
							... 
							
							
							
							* Add LangChain upstream ut test for ipynb
* Integrate unit test for LangChain upstream ut and ipynb into one file
* Modify file name
* Remove LangChain version update in unit test
* Move Langchain upstream ut job to arc
* Modify path in .yml file
* Modify path in llm_unit_tests.yml
* Avoid create directory repeatedly 
							
						 
						
							2024-03-15 16:31:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								ca372f6dab 
								
							 
						 
						
							
							
								
								LLM: add save/load example for ModelScope ( #10397 )  
							
							 
							
							... 
							
							
							
							* LLM: add sl example for modelscope
* fix according to comments
* move file 
							
						 
						
							2024-03-15 15:17:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								24473e331a 
								
							 
						 
						
							
							
								
								Qwen2 fp16 sdp ( #10427 )  
							
							 
							
							... 
							
							
							
							* qwen2 sdp and refine
* update
* update
* fix style
* remove use_flash_attention 
							
						 
						
							2024-03-15 13:12:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								1315150e64 
								
							 
						 
						
							
							
								
								Add baichuan2-13b 1k to arc nightly perf ( #10406 )  
							
							 
							
							
							
						 
						
							2024-03-15 10:29:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b036205be2 
								
							 
						 
						
							
							
								
								LLM: add fp8 sdp for chatglm2/3 ( #10411 )  
							
							 
							
							... 
							
							
							
							* add fp8 sdp for chatglm2
* fix style 
							
						 
						
							2024-03-15 09:38:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								fe8976a00f 
								
							 
						 
						
							
							
								
								LLM: Support gguf models use low_bit and fix no json( #10408 )  
							
							 
							
							... 
							
							
							
							* support others model use low_bit
* update readme
* update to add *.json 
							
						 
						
							2024-03-15 09:34:18 +08:00