Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1ac193ba02 
								
							 
						 
						
							
							
								
								add rope theta argument ( #10343 )  
							
							 
							
							
							
						 
						
							2024-03-07 17:27:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								0c8d3c9830 
								
							 
						 
						
							
							
								
								Add C-Eval HTML report ( #10294 )  
							
							 
							
							... 
							
							
							
							* Add C-Eval HTML report
* Fix C-Eval workflow pr trigger path
* Fix C-Eval workflow typos
* Add permissions to C-Eval workflow
* Fix C-Eval workflow typo
* Add pandas dependency
* Fix C-Eval workflow typo 
							
						 
						
							2024-03-07 16:44:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								496d18ab6d 
								
							 
						 
						
							
							
								
								LLM: add quantize kv cache support for baichuan 7b and 13b. ( #10330 )  
							
							 
							
							... 
							
							
							
							* add quantize kv cache for baichuan 7b and 13b.
* fix typo.
* fix.
* fix style.
* fix style. 
							
						 
						
							2024-03-07 16:17:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								b7db21414e 
								
							 
						 
						
							
							
								
								Update llamaindex ut ( #10338 )  
							
							 
							
							... 
							
							
							
							* add test_llamaindex of gpu
* add llamaindex gpu tests bash
* add llamaindex cpu tests bash
* update name of Run LLM langchain GPU test
* import llama_index in llamaindex gpu ut
* update the dependency of test_llamaindex
* add Run LLM llamaindex GPU test
* modify import dependency of llamaindex cpu test
* add Run LLM llamaindex test
* update llama_model_path
* delete unused model path
* add LLAMA2_7B_ORIGIN_PATH in llamaindex cpu test 
							
						 
						
							2024-03-07 10:06:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								fa69fed58f 
								
							 
						 
						
							
							
								
								Small fixes to oneAPI link ( #10339 )  
							
							 
							
							
							
						 
						
							2024-03-07 09:56:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								267de7abc3 
								
							 
						 
						
							
							
								
								fix fschat DEP version error ( #10325 )  
							
							 
							
							
							
						 
						
							2024-03-06 16:15:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								9ea499ca68 
								
							 
						 
						
							
							
								
								Optimize speculative decoding PVC memory usage ( #10329 )  
							
							 
							
							... 
							
							
							
							* optimize memory
* update
* update
* update
* support other models
* update
* fix style 
							
						 
						
							2024-03-06 09:54:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								cc796848ea 
								
							 
						 
						
							
							
								
								fix typos ( #10274 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 18:38:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								af11c53473 
								
							 
						 
						
							
							
								
								Add the installation step of postgresql and pgvector on windows in LlamaIndex GPU  support ( #10328 )  
							
							 
							
							... 
							
							
							
							* add the installation of postgresql and pgvector of windows
* fix some format 
							
						 
						
							2024-03-05 18:31:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								0011ff9f64 
								
							 
						 
						
							
							
								
								optimize bge large performance ( #10324 )  
							
							 
							
							
							
						 
						
							2024-03-05 17:06:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								178eea5009 
								
							 
						 
						
							
							
								
								upload bigdl-llm wheel to sourceforge for backup ( #10321 )  
							
							 
							
							... 
							
							
							
							* test: upload to sourceforge
* update scripts
* revert 
							
						 
						
							2024-03-05 16:36:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								30d009bca7 
								
							 
						 
						
							
							
								
								LLM: support quantized kv cache for Mistral in transformers >=4.36.0 ( #10326 )  
							
							 
							
							... 
							
							
							
							* support quantize kv for mistral in transformers 4.36
* update mistral support.
* fix style. 
							
						 
						
							2024-03-05 16:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								566e9bbb36 
								
							 
						 
						
							
							
								
								[LLM Doc] Restructure ( #10322 )  
							
							 
							
							... 
							
							
							
							* Add quick link guide to sidebar
* Add QuickStart to TOC
* Update quick links in main page
* Hide some section in More for top nav bar
* Resturct FAQ sections
* Small fix 
							
						 
						
							2024-03-05 14:35:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								1e6f0c6f1a 
								
							 
						 
						
							
							
								
								Add llamaindex gpu example ( #10314 )  
							
							 
							
							... 
							
							
							
							* add llamaindex example
* fix core dump
* refine readme
* add trouble shooting
* refine readme
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:36:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								fc7f10cd12 
								
							 
						 
						
							
							
								
								add langchain gpu example ( #10277 )  
							
							 
							
							... 
							
							
							
							* first draft
* fix
* add readme for transformer_int4_gpu
* fix doc
* check device_map
* add arc ut test
* fix ut test
* fix langchain ut
* Refine README
* fix gpu mem too high
* fix ut test
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:33:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								5dbbe1a826 
								
							 
						 
						
							
							
								
								[LLM] Support for new arc ut runner ( #10311 )  
							
							 
							
							... 
							
							
							
							* Support for new arc ut runner
* Comment unnecessary OMP_NUM_THREADS related settings for arc uts 
							
						 
						
							2024-03-04 18:42:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								d45e577d8c 
								
							 
						 
						
							
							
								
								[LLM] Test load_low_bit in iGPU perf test on Windows ( #10313 )  
							
							 
							
							
							
						 
						
							2024-03-04 18:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								fd81d66047 
								
							 
						 
						
							
							
								
								LLM: Compress some models to save space ( #10315 )  
							
							 
							
							... 
							
							
							
							* LLM: compress some models to save space
* add deleted comments 
							
						 
						
							2024-03-04 17:53:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								bab2ee5f9e 
								
							 
						 
						
							
							
								
								update nightly spr perf test ( #10178 )  
							
							 
							
							... 
							
							
							
							* update nightly spr perf test
* update
* update runner lable
* update
* update
* update folder
* revert 
							
						 
						
							2024-03-04 13:46:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ab9fc2485f 
								
							 
						 
						
							
							
								
								LLM: add quantize kv support for llama transformer 4.36 ( #10298 )  
							
							 
							
							... 
							
							
							
							* add quantize kv support for llama transformer 4.36
* fix style.
* fix style. 
							
						 
						
							2024-03-04 10:33:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								57e211dab4 
								
							 
						 
						
							
							
								
								topLevel 'contents' permission set to 'read' ( #10295 )  
							
							 
							
							
							
						 
						
							2024-03-04 10:33:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								58208a5883 
								
							 
						 
						
							
							
								
								Update FAQ document. ( #10300 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md 
							
						 
						
							2024-03-04 08:35:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								4cb4db618d 
								
							 
						 
						
							
							
								
								Update WebUI quickstart ( #10305 )  
							
							 
							
							
							
						 
						
							2024-03-03 22:18:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								27d9a14989 
								
							 
						 
						
							
							
								
								[LLM] all-on-one update: memory optimize and streaming output ( #10302 )  
							
							 
							
							... 
							
							
							
							* Memory saving for continous in-out pair run and add support for streaming output on MTL iGPU
* Small fix
* Small fix
* Add things back 
							
						 
						
							2024-03-01 18:02:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								367b1db4f7 
								
							 
						 
						
							
							
								
								Update readme ( #10303 )  
							
							 
							
							
							
						 
						
							2024-03-01 17:37:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								1db20dd1d0 
								
							 
						 
						
							
							
								
								add warmup advice in quickstart ( #10293 )  
							
							 
							
							
							
						 
						
							2024-03-01 17:15:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0ab40917fb 
								
							 
						 
						
							
							
								
								[LLM] Split merged_qk to separated q/k linear ( #10299 )  
							
							 
							
							... 
							
							
							
							* modify merge_qk_linear to separated q/k linear
* update 
							
						 
						
							2024-03-01 16:48:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f4d7dbcde2 
								
							 
						 
						
							
							
								
								use fused qkv forward in qwen2 ( #10185 )  
							
							 
							
							... 
							
							
							
							* use fused qkv forward in qwen2
* support both
* fix style
* fix rope
* remove pring
* fix style
* clean up 
							
						 
						
							2024-03-01 16:46:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								509e206de0 
								
							 
						 
						
							
							
								
								update doc about gemma random and unreadable output. ( #10297 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update README.md
* Update README.md 
							
						 
						
							2024-03-01 15:41:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								beb9433cec 
								
							 
						 
						
							
							
								
								LLM: Reduce speculative _ipex_optimize_model memory use ( #10281 )  
							
							 
							
							... 
							
							
							
							* use tpp
* update ipex 
							
						 
						
							2024-03-01 13:48:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								f0ff0eebe1 
								
							 
						 
						
							
							
								
								[LLM] Support quantize kv cache for Baichuan2 7B ( #10280 )  
							
							 
							
							... 
							
							
							
							* Add quatized kv cache framework for Baichuan2 7B
* Support quantize kv cache for baichuan2
* Small fix
* Fix python style 
							
						 
						
							2024-03-01 13:35:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								273de341d7 
								
							 
						 
						
							
							
								
								hot-fix silu error import ( #10292 )  
							
							 
							
							
							
						 
						
							2024-03-01 10:11:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								90f2f82638 
								
							 
						 
						
							
							
								
								revise webui quickstart ( #10287 )  
							
							 
							
							
							
						 
						
							2024-03-01 10:04:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								14814abab8 
								
							 
						 
						
							
							
								
								Update README.md ( #10286 )  
							
							 
							
							
							
						 
						
							2024-02-29 20:00:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								bcfad555df 
								
							 
						 
						
							
							
								
								revise llamaindex readme ( #10283 )  
							
							 
							
							
							
						 
						
							2024-02-29 17:19:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								232273a1b5 
								
							 
						 
						
							
							
								
								Enable Gemma fused mlp + Gelu ( #10276 )  
							
							 
							
							... 
							
							
							
							* update llama mlp forward
* add all
* fix style check
* split
* update
* update
* update
* fix style 
							
						 
						
							2024-02-29 16:53:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								2d930bdca8 
								
							 
						 
						
							
							
								
								Add vLLM bf16 support ( #10278 )  
							
							 
							
							... 
							
							
							
							* add argument load_in_low_bit
* add docs
* modify gpu doc
* done
---------
Co-authored-by: ivy-lv11 <lvzc@lamda.nju.edu.cn> 
							
						 
						
							2024-02-29 16:33:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								13b0bc9075 
								
							 
						 
						
							
							
								
								[LLM] Add quantize_kv optimization for yuan2 model ( #10243 )  
							
							 
							
							... 
							
							
							
							* add initial quantize_kv support for yuan2 model
* fix yuan2 quantize_kv generation
* apply fp16 conv layer optimizations
* disable mlp for quantize_kv 
							
						 
						
							2024-02-29 16:33:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Lilac09 
								
							 
						 
						
							
							
							
							
								
							
							
								a2ed4d714e 
								
							 
						 
						
							
							
								
								Fix vllm service error ( #10279 )  
							
							 
							
							
							
						 
						
							2024-02-29 15:45:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								4e6cc424f1 
								
							 
						 
						
							
							
								
								Add LlamaIndex RAG ( #10263 )  
							
							 
							
							... 
							
							
							
							* run demo
* format code
* add llamaindex
* add custom LLM with bigdl
* update
* add readme
* begin ut
* add unit test
* add license
* add license
* revised
* update
* modify docs
* remove data folder
* update
* modify prompt
* fixed
* fixed
* fixed 
							
						 
						
							2024-02-29 15:21:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								5d7243067c 
								
							 
						 
						
							
							
								
								LLM: add Baichuan2-13B-Chat 2048-256 to MTL perf ( #10273 )  
							
							 
							
							
							
						 
						
							2024-02-29 13:48:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								a9fd20b6ba 
								
							 
						 
						
							
							
								
								LLM: Update qkv fusion for GGUF-IQ2 ( #10271 )  
							
							 
							
							... 
							
							
							
							* first commit
* update mistral
* fix transformers==4.36.0
* fix
* disable qk for mixtral now
* fix style 
							
						 
						
							2024-02-29 12:49:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								6fb65bb9d2 
								
							 
						 
						
							
							
								
								fix in transformers 4.36 ( #10150 )  
							
							 
							
							
							
						 
						
							2024-02-28 18:43:01 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								43dac97e03 
								
							 
						 
						
							
							
								
								Update README.md ( #10260 )  
							
							 
							
							
							
						 
						
							2024-02-29 10:41:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								653cb500ed 
								
							 
						 
						
							
							
								
								Add webUI quickstart ( #10266 )  
							
							 
							
							... 
							
							
							
							* Add webUI quickstart
* Add GPU driver install
* Move images to readthedocs assets 
							
						 
						
							2024-02-29 10:08:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								4b08bc1417 
								
							 
						 
						
							
							
								
								LLM: relax batch check of flash atttention by double check attention mask ( #10270 )  
							
							 
							
							... 
							
							
							
							* relax batch check
* fix
* fix style 
							
						 
						
							2024-02-29 09:39:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								07f36fbfcc 
								
							 
						 
						
							
							
								
								Fix gptj failed to extend ( #10269 )  
							
							 
							
							
							
						 
						
							2024-02-29 09:39:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								1572b6f7c3 
								
							 
						 
						
							
							
								
								Add quickstart ( #10272 )  
							
							 
							
							
							
						 
						
							2024-02-29 08:46:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								925aff730e 
								
							 
						 
						
							
							
								
								Integrate the result of ppl and harness ( #10265 )  
							
							 
							
							... 
							
							
							
							* modify NIGHTLY_MATRIX_PRECISION
* change ACC_FOLDER of harness
* change ACC_FOLDER of ppl 
							
						 
						
							2024-02-28 17:53:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								cccb02dad1 
								
							 
						 
						
							
							
								
								fix baichuan2 13b 2k input ( #10267 )  
							
							 
							
							
							
						 
						
							2024-02-28 17:20:20 +08:00