Cheen Hau, 俊豪 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1c5eb14128 
								
							 
						 
						
							
							
								
								Update pip install to use --extra-index-url for ipex package ( #10557 )  
							
							 
							
							... 
							
							
							
							* Change to 'pip install .. --extra-index-url' for readthedocs
* Change to 'pip install .. --extra-index-url' for examples
* Change to 'pip install .. --extra-index-url' for remaining files
* Fix URL for ipex
* Add links for ipex US and CN servers
* Update ipex cpu url
* remove readme
* Update for github actions
* Update for dockerfiles 
							
						 
						
							2024-03-28 09:56:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								92dfed77be 
								
							 
						 
						
							
							
								
								LLM: fix abnormal output of fp16 deepspeed autotp ( #10558 )  
							
							 
							
							
							
						 
						
							2024-03-28 09:35:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c450c85489 
								
							 
						 
						
							
							
								
								Delete llm/readme.md ( #10569 )  
							
							 
							
							
							
						 
						
							2024-03-27 20:06:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								51d34ca68e 
								
							 
						 
						
							
							
								
								Fix wrong import in speculative ( #10562 )  
							
							 
							
							
							
						 
						
							2024-03-27 18:21:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f239bc329b 
								
							 
						 
						
							
							
								
								Specify oneAPI minor version in documentation ( #10561 )  
							
							 
							
							
							
						 
						
							2024-03-27 17:58:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fbeb10c796 
								
							 
						 
						
							
							
								
								LLM: Set different env based on different Linux kernels ( #10566 )  
							
							 
							
							
							
						 
						
							2024-03-27 17:56:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d86477f14d 
								
							 
						 
						
							
							
								
								Remove native_int4 in LangChain examples ( #10510 )  
							
							 
							
							... 
							
							
							
							* rebase the modify to ipex-llm
* modify the typo 
							
						 
						
							2024-03-27 17:48:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								04baac5a2e 
								
							 
						 
						
							
							
								
								Fix fastchat top_k ( #10560 )  
							
							 
							
							... 
							
							
							
							* fix -1 top_k
* fix
* done 
							
						 
						
							2024-03-27 16:01:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fc8c7904f0 
								
							 
						 
						
							
							
								
								LLM: fix torch_dtype setting of apply fp16 optimization through optimize_model ( #10556 )  
							
							 
							
							
							
						 
						
							2024-03-27 14:18:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ea4bc450c4 
								
							 
						 
						
							
							
								
								LLM: add esimd sdp for pvc ( #10543 )  
							
							 
							
							... 
							
							
							
							* add esimd sdp for pvc
* update
* fix
* fix batch 
							
						 
						
							2024-03-26 19:04:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b78289a595 
								
							 
						 
						
							
							
								
								Remove ipex-llm dependency in readme ( #10544 )  
							
							 
							
							
							
						 
						
							2024-03-26 18:25:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								11550d3f25 
								
							 
						 
						
							
							
								
								LLM: Add length check for IPEX-CPU speculative decoding  ( #10529 )  
							
							 
							
							... 
							
							
							
							Add length check for IPEX-CPU speculative decoding. 
							
						 
						
							2024-03-26 17:47:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a3b007f3b1 
								
							 
						 
						
							
							
								
								[Serving] Fix fastchat breaks ( #10548 )  
							
							 
							
							... 
							
							
							
							* fix fastchat
* fix doc 
							
						 
						
							2024-03-26 17:03:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								69a28d6b4c 
								
							 
						 
						
							
							
								
								fix chatglm ( #10540 )  
							
							 
							
							
							
						 
						
							2024-03-26 16:01:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c563b41491 
								
							 
						 
						
							
							
								
								add nightly_build workflow ( #10533 )  
							
							 
							
							... 
							
							
							
							* add nightly_build workflow
* add create-job-status-badge action
* update
* update
* update
* update setup.py
* release
* revert 
							
						 
						
							2024-03-26 12:47:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0a3e4e788f 
								
							 
						 
						
							
							
								
								LLM: fix mistral hidden_size setting for deepspeed autotp ( #10527 )  
							
							 
							
							
							
						 
						
							2024-03-26 10:55:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1dd40b429c 
								
							 
						 
						
							
							
								
								enable fp4 fused mlp and qkv ( #10531 )  
							
							 
							
							... 
							
							
							
							* enable fp4 fused mlp and qkv
* update qwen
* update qwen2 
							
						 
						
							2024-03-26 08:34:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16b2ef49c6 
								
							 
						 
						
							
							
								
								Update_document by heyang ( #30 )  
							
							 
							
							
							
						 
						
							2024-03-25 10:06:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1048ca7f6 
								
							 
						 
						
							
							
								
								Update setup.py and add new actions and add compatible mode ( #25 )  
							
							 
							
							... 
							
							
							
							* update setup.py
* add new action
* add compatible mode 
							
						 
						
							2024-03-22 15:44:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9df70d95eb 
								
							 
						 
						
							
							
								
								Refactor bigdl.llm to  ipex_llm ( #24 )  
							
							 
							
							... 
							
							
							
							* Rename bigdl/llm to ipex_llm
* rm python/llm/src/bigdl
* from bigdl.llm to from ipex_llm 
							
						 
						
							2024-03-22 15:41:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								cc5806f4bc 
								
							 
						 
						
							
							
								
								LLM: add save/load example for hf-transformers ( #10432 )  
							
							 
							
							
							
						 
						
							2024-03-22 13:57:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								34d0a9328c 
								
							 
						 
						
							
							
								
								LLM: Speed-up mixtral in pipeline parallel inference ( #10472 )  
							
							 
							
							... 
							
							
							
							* speed-up mixtral
* fix style 
							
						 
						
							2024-03-22 11:06:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								b9d4280892 
								
							 
						 
						
							
							
								
								LLM: fix baichuan7b quantize kv abnormal output. ( #10504 )  
							
							 
							
							... 
							
							
							
							* fix abnormal output.
* fix style.
* fix style. 
							
						 
						
							2024-03-22 10:00:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f0f317b6cf 
								
							 
						 
						
							
							
								
								fix a typo in yuan ( #10503 )  
							
							 
							
							
							
						 
						
							2024-03-22 09:40:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								3a3756b51d 
								
							 
						 
						
							
							
								
								Add FastChat bigdl_worker ( #10493 )  
							
							 
							
							... 
							
							
							
							* done
* fix format
* add licence
* done
* fix doc
* refactor folder
* add license 
							
						 
						
							2024-03-21 18:35:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								dba7ddaab3 
								
							 
						 
						
							
							
								
								add sdp fp8 for qwen llama436 baichuan mistral baichuan2 ( #10485 )  
							
							 
							
							... 
							
							
							
							* add sdp fp8
* fix style
* fix qwen
* fix baichuan 13
* revert baichuan 13b and baichuan2-13b
* fix style
* update 
							
						 
						
							2024-03-21 17:23:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								30f111cd32 
								
							 
						 
						
							
							
								
								lm_head empty_cache for more models ( #10490 )  
							
							 
							
							... 
							
							
							
							* modify constraint
* fix style 
							
						 
						
							2024-03-21 17:11:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								1579ee4421 
								
							 
						 
						
							
							
								
								[LLM] Add nightly igpu perf test for INT4+FP16 1024-128 ( #10496 )  
							
							 
							
							
							
						 
						
							2024-03-21 16:07:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2958ca49c0 
								
							 
						 
						
							
							
								
								LLM: add patching function for llm finetuning ( #10247 )  
							
							 
							
							
							
						 
						
							2024-03-21 16:01:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								5b97fdb87b 
								
							 
						 
						
							
							
								
								update deepseek example readme ( #10420 )  
							
							 
							
							... 
							
							
							
							* update readme
* update
* update readme 
							
						 
						
							2024-03-21 15:21:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								a5f35757a4 
								
							 
						 
						
							
							
								
								Migrate langchain rag cpu example to gpu ( #10450 )  
							
							 
							
							... 
							
							
							
							* add langchain rag on gpu
* add rag example in readme
* add trust_remote_code in TransformersEmbeddings.from_model_id
* add trust_remote_code in TransformersEmbeddings.from_model_id in cpu 
							
						 
						
							2024-03-21 15:20:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								85ef3f1d99 
								
							 
						 
						
							
							
								
								LLM: add empty cache in deepspeed autotp benchmark script ( #10488 )  
							
							 
							
							
							
						 
						
							2024-03-21 10:51:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								5a5fd5af5b 
								
							 
						 
						
							
							
								
								LLM: Add speculative benchmark on CPU/XPU ( #10464 )  
							
							 
							
							... 
							
							
							
							Add speculative benchmark on CPU/XPU. 
							
						 
						
							2024-03-21 09:51:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								28c315a5b9 
								
							 
						 
						
							
							
								
								LLM: fix deepspeed error of finetuning on xpu ( #10484 )  
							
							 
							
							
							
						 
						
							2024-03-21 09:46:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								021d77fd22 
								
							 
						 
						
							
							
								
								Remove softmax upcast fp32 in llama ( #10481 )  
							
							 
							
							... 
							
							
							
							* update
* fix style 
							
						 
						
							2024-03-20 18:17:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cfdf8ad496 
								
							 
						 
						
							
							
								
								Fix modules_not_to_convert argument ( #10483 )  
							
							 
							
							
							
						 
						
							2024-03-20 17:47:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								cbe24cc7e6 
								
							 
						 
						
							
							
								
								LLM: Enable BigDL IPEX Int8 ( #10480 )  
							
							 
							
							... 
							
							
							
							Enable BigDL IPEX Int8 
							
						 
						
							2024-03-20 15:59:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								1d062e24db 
								
							 
						 
						
							
							
								
								Update serving doc ( #10475 )  
							
							 
							
							... 
							
							
							
							* update serving doc
* add tob
* update
* update
* update
* update vllm worker 
							
						 
						
							2024-03-20 14:44:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								4581e4f17f 
								
							 
						 
						
							
							
								
								LLM: fix whiper model missing config. ( #10473 )  
							
							 
							
							... 
							
							
							
							* fix whiper model missing config.
* fix style.
* fix style.
* style. 
							
						 
						
							2024-03-20 14:22:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e41d556436 
								
							 
						 
						
							
							
								
								LLM: change fp16 benchmark to model.half ( #10477 )  
							
							 
							
							... 
							
							
							
							* LLM: change fp16 benchmark to model.half
* fix 
							
						 
						
							2024-03-20 13:38:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								749bedaf1e 
								
							 
						 
						
							
							
								
								fix rwkv v5 fp16 ( #10474 )  
							
							 
							
							
							
						 
						
							2024-03-20 13:15:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								72bcc27da9 
								
							 
						 
						
							
							
								
								[LLM] Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings ( #10459 )  
							
							 
							
							... 
							
							
							
							* Add TransformersBgeEmbeddings class in bigdl.llm.langchain.embeddings
* Small fixes 
							
						 
						
							2024-03-19 18:04:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								463a86cd5d 
								
							 
						 
						
							
							
								
								LLM: fix qwen-vl interpolation gpu abnormal results. ( #10457 )  
							
							 
							
							... 
							
							
							
							* fix qwen-vl interpolation gpu abnormal results.
* fix style.
* update qwen-vl gpu example.
* fix comment and update example.
* fix style. 
							
						 
						
							2024-03-19 16:59:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e9055c32f9 
								
							 
						 
						
							
							
								
								LLM: fix fp16 mem record in benchmark ( #10461 )  
							
							 
							
							... 
							
							
							
							* LLM: fix fp16 mem record in benchmark
* change style 
							
						 
						
							2024-03-19 16:17:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f3fefdc9ce 
								
							 
						 
						
							
							
								
								fix pad_token_id issue ( #10425 )  
							
							 
							
							
							
						 
						
							2024-03-18 23:30:28 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								74e7490fda 
								
							 
						 
						
							
							
								
								Fix Baichuan2 prompt format ( #10334 )  
							
							 
							
							... 
							
							
							
							* Fix Baichuan2 prompt format
* Fix Baichuan2 README
* Change baichuan2 prompt info
* Change baichuan2 prompt info 
							
						 
						
							2024-03-19 12:48:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								0451103a43 
								
							 
						 
						
							
							
								
								LLM: add int4+fp16 benchmark script for windows benchmarking ( #10449 )  
							
							 
							
							... 
							
							
							
							* LLM: add fp16 for benchmark script
* remove transformer_int4_fp16_loadlowbit_gpu_win 
							
						 
						
							2024-03-19 11:11:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								bbd749dceb 
								
							 
						 
						
							
							
								
								qwen2 fp8 cache ( #10446 )  
							
							 
							
							... 
							
							
							
							* qwen2 fp8 cache
* fix style check 
							
						 
						
							2024-03-19 08:32:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								9e763b049c 
								
							 
						 
						
							
							
								
								Support running pipeline parallel inference by vertically partitioning model to different devices ( #10392 )  
							
							 
							
							... 
							
							
							
							* support pipeline parallel inference
* fix logging
* remove benchmark file
* fic
* need to warmup twice
* support qwen and qwen2
* fix lint
* remove genxir
* refine 
							
						 
						
							2024-03-18 13:04:45 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								66b4bb5c5d 
								
							 
						 
						
							
							
								
								LLM: update setup to provide cpp for windows ( #10448 )  
							
							 
							
							
							
						 
						
							2024-03-18 18:20:55 +08:00