Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ee98cdd85c 
								
							 
						 
						
							
							
								
								Support latest transformer version ( #8923 )  
							
							 
							
							... 
							
							
							
							* Support latest transformer version
* fix style 
							
						 
						
							2023-09-07 19:01:32 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								25428b22b4 
								
							 
						 
						
							
							
								
								Fix chatglm2 attention and kv cache ( #8924 )  
							
							 
							
							... 
							
							
							
							* fix chatglm2 attention
* fix bf16 bug
* make model stateless
* add utils
* cleanup
* fix style 
							
						 
						
							2023-09-07 18:54:29 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								b209b8f7b6 
								
							 
						 
						
							
							
								
								[LLM] Fix arc qtype != q4_0 generate issue ( #8920 )  
							
							 
							
							... 
							
							
							
							* Fix arc precision!=q4_0 generate issue
* meet comments 
							
						 
						
							2023-09-07 08:56:36 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								3d2efe9608 
								
							 
						 
						
							
							
								
								LLM: update llm latency benchmark. ( #8922 )  
							
							 
							
							
							
						 
						
							2023-09-07 19:00:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								7897eb4b51 
								
							 
						 
						
							
							
								
								LLM: add benchmark scripts on GPU ( #8916 )  
							
							 
							
							
							
						 
						
							2023-09-07 18:08:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								d8a01d7c4f 
								
							 
						 
						
							
							
								
								fix chatglm in run.pu ( #8919 )  
							
							 
							
							
							
						 
						
							2023-09-07 16:44:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								e9de9d9950 
								
							 
						 
						
							
							
								
								benchmark for native int4  ( #8918 )  
							
							 
							
							... 
							
							
							
							* native4
* update
* update
* update 
							
						 
						
							2023-09-07 15:56:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c0797ea232 
								
							 
						 
						
							
							
								
								LLM: update setup to specify bigdl-core-xe version ( #8913 )  
							
							 
							
							
							
						 
						
							2023-09-07 15:11:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								c152c719ea 
								
							 
						 
						
							
							
								
								Update bigdl logo url to the one hosted on readthedocs ( #8911 )  
							
							 
							
							
							
						 
						
							2023-09-07 14:40:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								057e77e229 
								
							 
						 
						
							
							
								
								LLM: update benchmark_utils.py to handle do_sample=True ( #8903 )  
							
							 
							
							
							
						 
						
							2023-09-07 14:20:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								3d1c7e7082 
								
							 
						 
						
							
							
								
								Small link fix ( #8910 )  
							
							 
							
							
							
						 
						
							2023-09-07 13:35:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c34400e6b0 
								
							 
						 
						
							
							
								
								Use new layout for xpu qlinear ( #8896 )  
							
							 
							
							... 
							
							
							
							* use new layout for xpu qlinear
* fix style 
							
						 
						
							2023-09-06 21:55:33 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								8bc1d8a17c 
								
							 
						 
						
							
							
								
								LLM: Fix discards in optimize_model with non-hf models  and add openai whisper example ( #8877 )  
							
							 
							
							... 
							
							
							
							* openai-whisper 
							
						 
						
							2023-09-07 10:35:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								5d9942a3ca 
								
							 
						 
						
							
							
								
								transformer int4 and native int4's benchmark script for 32 256 1k 2k input ( #8871 )  
							
							 
							
							... 
							
							
							
							* transformer
* move
* update
* add header
* update all-in-one
* clean up 
							
						 
						
							2023-09-07 09:49:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								bfc71fbc15 
								
							 
						 
						
							
							
								
								Add known issue in arc voice assistant example ( #8902 )  
							
							 
							
							... 
							
							
							
							* add known issue in voice assistant example
* update cpu 
							
						 
						
							2023-09-07 09:28:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								db26c7b84d 
								
							 
						 
						
							
							
								
								[LLM] Update readme gif & image url to the ones hosted on readthedocs ( #8900 )  
							
							 
							
							
							
						 
						
							2023-09-06 20:04:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								7a71ced78f 
								
							 
						 
						
							
							
								
								[LLM Docs] Remain API Docs Issues Solution ( #8780 )  
							
							 
							
							... 
							
							
							
							* langchain readthedocs update
* solve langchain.llms.transformersllm issues
* langchain.embeddings.transformersembeddings/transfortmersllms issues
* update docs for get_num_tokens
* add low_bit api doc
* add optimizer model api doc
* update rst index
* fix coomments style
* update docs following the comments
* update api doc 
							
						 
						
							2023-09-06 16:29:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								cf6a620bae 
								
							 
						 
						
							
							
								
								[LLM] BigDL-LLM Documentation Initial Version ( #8833 )  
							
							 
							
							... 
							
							
							
							* Change order of LLM in header
* Some updates to footer
* Add BigDL-LLM index page and basic file structure
* Update index page for key features
* Add initial content for BigDL-LLM in 5 mins
* Improvement to footnote
* Add initial contents based on current contents we have
* Add initial quick links
* Small fix
* Rename file
* Hide cli section for now and change model supports to examples
* Hugging Face format -> Hugging Face transformers format
* Add placeholder for GPU supports
* Add GPU related content structure
* Add cpu/gpu installation initial contents
* Add initial contents for GPU supports
* Add image link to LLM index page
* Hide tips and known issues for now
* Small fix
* Update based on comments
* Small fix
* Add notes for Python 3.9
* Add placehoder optimize model & reveal CLI; small revision
* examples add gpu part
* Hide CLI part again for first version of merging
* add keyfeatures-optimize_model part (#1 )
* change gif link to the ones hosted on github
* Small fix
---------
Co-authored-by: plusbang <binbin1.deng@intel.com>
Co-authored-by: binbin Deng <108676127+plusbang@users.noreply.github.com> 
							
						 
						
							2023-09-06 15:38:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								49a39452c6 
								
							 
						 
						
							
							
								
								update benchmark ( #8899 )  
							
							 
							
							
							
						 
						
							2023-09-06 15:11:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								2d97827ec5 
								
							 
						 
						
							
							
								
								fix typo in lora entrypoint ( #8862 )  
							
							 
							
							
							
						 
						
							2023-09-06 13:52:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Kai Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								4a9ff050a1 
								
							 
						 
						
							
							
								
								Add qlora nf4 ( #8782 )  
							
							 
							
							... 
							
							
							
							* add nf4
* dequant nf4
* style 
							
						 
						
							2023-09-06 09:39:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								14b95ebfb4 
								
							 
						 
						
							
							
								
								Update Databricks documentation for Pytorch Estimator issue ( #8869 )  
							
							 
							
							... 
							
							
							
							* update doc
* add image
* update 
							
						 
						
							2023-09-05 12:59:47 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								704a896e90 
								
							 
						 
						
							
							
								
								[LLM] Add perf test on xpu for bigdl-llm ( #8866 )  
							
							 
							
							... 
							
							
							
							* add xpu latency job
* update install way
* remove duplicated workflow
* add perf upload 
							
						 
						
							2023-09-05 17:36:24 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								95271f10e0 
								
							 
						 
						
							
							
								
								LLM: Rename low bit layer ( #8875 )  
							
							 
							
							... 
							
							
							
							* rename lowbit
---------
Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-09-05 13:21:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								74a2c2ddf5 
								
							 
						 
						
							
							
								
								Update optimize_model=True in llama2 chatglm2 arc examples ( #8878 )  
							
							 
							
							... 
							
							
							
							* add optimize_model=True in llama2 chatglm2 examples
* add ipex optimize in gpt-j example 
							
						 
						
							2023-09-05 10:35:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								5e58f698cd 
								
							 
						 
						
							
							
								
								Update readthedocs ( #8882 )  
							
							 
							
							
							
						 
						
							2023-09-04 15:42:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								9760eec6de 
								
							 
						 
						
							
							
								
								Update readme ( #8881 )  
							
							 
							
							
							
						 
						
							2023-09-03 12:30:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Song Jiaming 
								
							 
						 
						
							
							
							
							
								
							
							
								7b3ac66e17 
								
							 
						 
						
							
							
								
								[LLM] auto performance test fix specific settings to template ( #8876 )  
							
							 
							
							
							
						 
						
							2023-09-01 15:49:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								242c9d6036 
								
							 
						 
						
							
							
								
								Fix chatglm2 multi-turn streamchat ( #8867 )  
							
							 
							
							
							
						 
						
							2023-08-31 22:13:49 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Song Jiaming 
								
							 
						 
						
							
							
							
							
								
							
							
								c06f1ca93e 
								
							 
						 
						
							
							
								
								[LLM] auto perf test to output to csv ( #8846 )  
							
							 
							
							
							
						 
						
							2023-09-01 10:48:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								9c652fbe95 
								
							 
						 
						
							
							
								
								LLM: Whisper long segment recognize example ( #8826 )  
							
							 
							
							... 
							
							
							
							* LLM: Long segment recognize example 
							
						 
						
							2023-08-31 16:41:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a232c5aa21 
								
							 
						 
						
							
							
								
								[LLM] add protobuf in bigdl-llm dependency ( #8861 )  
							
							 
							
							
							
						 
						
							2023-08-31 15:23:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								b1ac8dc1bc 
								
							 
						 
						
							
							
								
								BF16 Lora Finetuning on K8S with OneCCL and Intel MPI ( #8775 )  
							
							 
							
							... 
							
							
							
							* BF16 Lora Finetuning on K8S with OneCCL and Intel MPI
* Update README.md
* format
* refine
* Update README.md
* refine
* Update README.md
* increase nfs volume size to improve IO performance
* fix bugs
* Update README.md
* Update README.md
* fix permission
* move output destination
* Update README.md
* fix wrong base model name in doc
* fix output path in entrypoint
* add a permission-precreated output dir
* format
* move output logs to a persistent storage 
							
						 
						
							2023-08-31 14:56:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								de6c6bb17f 
								
							 
						 
						
							
							
								
								[LLM] Downgrade amx build gcc version and remove avx flag display ( #8856 )  
							
							 
							
							... 
							
							
							
							* downgrade to gcc 11
* remove avx display 
							
						 
						
							2023-08-31 14:08:13 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								3b4f4e1c3d 
								
							 
						 
						
							
							
								
								Fix llama attention optimization for XPU ( #8855 )  
							
							 
							
							... 
							
							
							
							* Fix llama attention optimization fo XPU
* fix chatglm2
* fix typo 
							
						 
						
							2023-08-30 21:30:49 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								7b566bf686 
								
							 
						 
						
							
							
								
								[LLM] add new API for optimize any pytorch models ( #8827 )  
							
							 
							
							... 
							
							
							
							* add new API for optimize any pytorch models
* change test util name
* revise API and update UT
* fix python style
* update ut config, change default value
* change defaults, disable ut transcribe 
							
						 
						
							2023-08-30 19:41:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								8eca982301 
								
							 
						 
						
							
							
								
								windows add env ( #8852 )  
							
							 
							
							
							
						 
						
							2023-08-30 15:54:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								731916c639 
								
							 
						 
						
							
							
								
								LLM: Enable attempting loading method automatically ( #8841 )  
							
							 
							
							... 
							
							
							
							* enable auto load method
* warning error
* logger info
---------
Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-08-30 15:41:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bba73ec9d2 
								
							 
						 
						
							
							
								
								[LLM] change chatglm native int4 checkpoint name ( #8851 )  
							
							 
							
							
							
						 
						
							2023-08-30 15:05:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang Jian 
								
							 
						 
						
							
							
							
							
								
							
							
								954ef954b6 
								
							 
						 
						
							
							
								
								[PPML] Add occlum llm image munually build ( #8849 )  
							
							 
							
							
							
						 
						
							2023-08-30 11:31:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								55e705a84c 
								
							 
						 
						
							
							
								
								[LLM] Support the rest of AutoXXX classes in Transformers API ( #8815 )  
							
							 
							
							... 
							
							
							
							* add transformers auto models
* fix 
							
						 
						
							2023-08-30 11:16:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								887018b0f2 
								
							 
						 
						
							
							
								
								Update ut save&load ( #8847 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: leonardozcm <leonardozcm@gmail.com> 
							
						 
						
							2023-08-30 10:32:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								3462fd5c96 
								
							 
						 
						
							
							
								
								Add arc gpt-j example ( #8840 )  
							
							 
							
							
							
						 
						
							2023-08-30 10:31:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f42c0bad1b 
								
							 
						 
						
							
							
								
								LLM: update GPU doc ( #8845 )  
							
							 
							
							
							
						 
						
							2023-08-30 09:24:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								aab7deab1f 
								
							 
						 
						
							
							
								
								Reorganize GPU examples ( #8844 )  
							
							 
							
							
							
						 
						
							2023-08-30 08:32:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a386ad984e 
								
							 
						 
						
							
							
								
								Add Data Center GPU Flex Series to Readme ( #8835 )  
							
							 
							
							... 
							
							
							
							* Add Data Center GPU Flex Series to Readme
* remove
* update starcoder 
							
						 
						
							2023-08-29 11:19:09 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7429ea0606 
								
							 
						 
						
							
							
								
								[LLM] support transformer int4 + amx int4 ( #8838 )  
							
							 
							
							
							
						 
						
							2023-08-29 17:27:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ddff7a6f05 
								
							 
						 
						
							
							
								
								Update readme of GPU to specify oneapi version( #8820 )  
							
							 
							
							
							
						 
						
							2023-08-29 13:14:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									xingyuan li 
								
							 
						 
						
							
							
							
							
								
							
							
								67052198eb 
								
							 
						 
						
							
							
								
								[LLM] Build with multiprocess ( #8797 )  
							
							 
							
							... 
							
							
							
							* build with multiprocess 
							
						 
						
							2023-08-29 10:49:52 +09:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								bb31d4fe80 
								
							 
						 
						
							
							
								
								LLM: Implement hf low_cpu_mem_usage with 1xbinary file peak memory on transformer int4 ( #8731 )  
							
							 
							
							... 
							
							
							
							* 1x peak memory 
							
						 
						
							2023-08-29 09:33:17 +08:00