Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								d72c0fad0d 
								
							 
						 
						
							
							
								
								Qwen2 SDPA forward on CPU ( #10395 )  
							
							 
							
							... 
							
							
							
							* Fix Qwen1.5 CPU forward
* Update convert.py
* Update qwen2.py 
							
						 
						
							2024-03-13 13:10:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								ca58a69b97 
								
							 
						 
						
							
							
								
								fix arc rms norm UT ( #10394 )  
							
							 
							
							
							
						 
						
							2024-03-13 13:09:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								0193f29411 
								
							 
						 
						
							
							
								
								LLM : Enable  gguf float16 and Yuan2 model ( #10372 )  
							
							 
							
							... 
							
							
							
							* enable float16
* add yun files
* enable yun
* enable set low_bit on yuan2
* update
* update license
* update generate
* update readme
* update python style
* update 
							
						 
						
							2024-03-13 10:19:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								f5d65203c0 
								
							 
						 
						
							
							
								
								First token lm_head optimization ( #10318 )  
							
							 
							
							... 
							
							
							
							* add lm head linear
* update
* address comments and fix style
* address comment 
							
						 
						
							2024-03-13 10:11:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								7cf01e6ec8 
								
							 
						 
						
							
							
								
								Add LangChain upstream ut test ( #10349 )  
							
							 
							
							... 
							
							
							
							* Add LangChain upstream ut test
* Add LangChain upstream ut test
* Specify version numbers in yml script
* Correct langchain-community version 
							
						 
						
							2024-03-13 09:52:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								28c4a8cf5c 
								
							 
						 
						
							
							
								
								Qwen fused qkv ( #10368 )  
							
							 
							
							... 
							
							
							
							* fused qkv + rope for qwen
* quantized kv cache
* fix
* update qwen
* fixed quantized qkv
* fix
* meet code review
* update split
* convert.py
* extend when no enough kv
* fix 
							
						 
						
							2024-03-12 17:39:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								741c2bf1df 
								
							 
						 
						
							
							
								
								use new rms norm ( #10384 )  
							
							 
							
							
							
						 
						
							2024-03-12 17:29:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
							
							
								
							
							
								0ded0b4b13 
								
							 
						 
						
							
							
								
								LLM: Enable BigDL IPEX optimization for int4 ( #10319 )  
							
							 
							
							... 
							
							
							
							Enable BigDL IPEX optimization for int4 
							
						 
						
							2024-03-12 17:08:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								5d7e044dbc 
								
							 
						 
						
							
							
								
								LLM: add low bit option in deepspeed autotp example ( #10382 )  
							
							 
							
							
							
						 
						
							2024-03-12 17:07:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								df3bcc0e65 
								
							 
						 
						
							
							
								
								LLM: remove english_quotes dataset ( #10370 )  
							
							 
							
							
							
						 
						
							2024-03-12 16:57:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								df2b84f7de 
								
							 
						 
						
							
							
								
								Enable kv cache on arc batch ( #10308 )  
							
							 
							
							
							
						 
						
							2024-03-12 16:46:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Lilac09 
								
							 
						 
						
							
							
							
							
								
							
							
								5809a3f5fe 
								
							 
						 
						
							
							
								
								Add run-hbm.sh & add user guide for spr and hbm ( #10357 )  
							
							 
							
							... 
							
							
							
							* add run-hbm.sh
* add spr and hbm guide
* only support quad mode
* only support quad mode
* update special cases
* update special cases 
							
						 
						
							2024-03-12 16:15:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								5d996a5caf 
								
							 
						 
						
							
							
								
								LLM: add benchmark script for deepspeed autotp on gpu ( #10380 )  
							
							 
							
							
							
						 
						
							2024-03-12 15:19:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f9c144dc4c 
								
							 
						 
						
							
							
								
								Fix final logits ut failure ( #10377 )  
							
							 
							
							... 
							
							
							
							* Fix final logits ut failure
* Fix final logits ut failure
* Remove Falcon from completion test for now
* Remove Falcon from unit test for now 
							
						 
						
							2024-03-12 14:34:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								cc4148636d 
								
							 
						 
						
							
							
								
								[FastChat-integration] Add initial implementation for loader ( #10323 )  
							
							 
							
							... 
							
							
							
							* add initial implementation for loader
* add test method for model_loader
* data
* Refine 
							
						 
						
							2024-03-12 10:54:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								17bdb1a60b 
								
							 
						 
						
							
							
								
								LLM: add whisper models into nightly test ( #10193 )  
							
							 
							
							... 
							
							
							
							* LLM: add whisper models into nightly test
* small fix
* small fix
* add more whisper models
* test all cases
* test specific cases
* collect the csv
* store the resut
* to html
* small fix
* small test
* test all cases
* modify whisper_csv_to_html 
							
						 
						
							2024-03-11 20:00:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								dbcfc5c2fa 
								
							 
						 
						
							
							
								
								LLM: fix error of 'AI-ModelScope/phi-2' hosted by ModelScope hub ( #10364 )  
							
							 
							
							
							
						 
						
							2024-03-11 16:19:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								fe27a6971c 
								
							 
						 
						
							
							
								
								LLM: update modelscope version ( #10367 )  
							
							 
							
							
							
						 
						
							2024-03-11 16:18:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								a425eaabfc 
								
							 
						 
						
							
							
								
								fix from_pretrained when device_map=None ( #10361 )  
							
							 
							
							... 
							
							
							
							* pr trigger
* fix error when device_map=None
* fix device_map=None 
							
						 
						
							2024-03-11 16:06:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								d7b765fd3f 
								
							 
						 
						
							
							
								
								serving xpu memory opt ( #10358 )  
							
							 
							
							
							
						 
						
							2024-03-11 15:21:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								be29833b2b 
								
							 
						 
						
							
							
								
								LLM: fix qwen2 ( #10356 )  
							
							 
							
							
							
						 
						
							2024-03-11 09:29:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								9026c08633 
								
							 
						 
						
							
							
								
								Fix llamaindex AutoTokenizer bug ( #10345 )  
							
							 
							
							... 
							
							
							
							* fix tokenizer
* fix AutoTokenizer bug
* modify code style 
							
						 
						
							2024-03-08 16:24:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhicun 
								
							 
						 
						
							
							
							
							
								
							
							
								2a10b53d73 
								
							 
						 
						
							
							
								
								rename docqa.py->rag.py ( #10353 )  
							
							 
							
							
							
						 
						
							2024-03-08 16:07:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								f1825d7408 
								
							 
						 
						
							
							
								
								Add RMSNorm unit test ( #10190 )  
							
							 
							
							
							
						 
						
							2024-03-08 15:51:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shengsheng Huang 
								
							 
						 
						
							
							
							
							
								
							
							
								370c52090c 
								
							 
						 
						
							
							
								
								Langchain readme ( #10348 )  
							
							 
							
							... 
							
							
							
							* update langchain readme
* update readme
* create new README
* Update README_nativeint4.md 
							
						 
						
							2024-03-08 14:57:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Keyan (Kyrie) Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								7a621a4db0 
								
							 
						 
						
							
							
								
								Fix device_map bug by raise an error when using device_map=xpu ( #10340 )  
							
							 
							
							... 
							
							
							
							* Fix device_map bug by raise an error when using device_map=xpu
* Fix sync error
* Fix python style
* Use invalidInputError instead of invalidOperationError 
							
						 
						
							2024-03-08 13:38:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								1ac193ba02 
								
							 
						 
						
							
							
								
								add rope theta argument ( #10343 )  
							
							 
							
							
							
						 
						
							2024-03-07 17:27:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuxuan Xia 
								
							 
						 
						
							
							
							
							
								
							
							
								0c8d3c9830 
								
							 
						 
						
							
							
								
								Add C-Eval HTML report ( #10294 )  
							
							 
							
							... 
							
							
							
							* Add C-Eval HTML report
* Fix C-Eval workflow pr trigger path
* Fix C-Eval workflow typos
* Add permissions to C-Eval workflow
* Fix C-Eval workflow typo
* Add pandas dependency
* Fix C-Eval workflow typo 
							
						 
						
							2024-03-07 16:44:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								496d18ab6d 
								
							 
						 
						
							
							
								
								LLM: add quantize kv cache support for baichuan 7b and 13b. ( #10330 )  
							
							 
							
							... 
							
							
							
							* add quantize kv cache for baichuan 7b and 13b.
* fix typo.
* fix.
* fix style.
* fix style. 
							
						 
						
							2024-03-07 16:17:38 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								b7db21414e 
								
							 
						 
						
							
							
								
								Update llamaindex ut ( #10338 )  
							
							 
							
							... 
							
							
							
							* add test_llamaindex of gpu
* add llamaindex gpu tests bash
* add llamaindex cpu tests bash
* update name of Run LLM langchain GPU test
* import llama_index in llamaindex gpu ut
* update the dependency of test_llamaindex
* add Run LLM llamaindex GPU test
* modify import dependency of llamaindex cpu test
* add Run LLM llamaindex test
* update llama_model_path
* delete unused model path
* add LLAMA2_7B_ORIGIN_PATH in llamaindex cpu test 
							
						 
						
							2024-03-07 10:06:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								267de7abc3 
								
							 
						 
						
							
							
								
								fix fschat DEP version error ( #10325 )  
							
							 
							
							
							
						 
						
							2024-03-06 16:15:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								9ea499ca68 
								
							 
						 
						
							
							
								
								Optimize speculative decoding PVC memory usage ( #10329 )  
							
							 
							
							... 
							
							
							
							* optimize memory
* update
* update
* update
* support other models
* update
* fix style 
							
						 
						
							2024-03-06 09:54:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								cc796848ea 
								
							 
						 
						
							
							
								
								fix typos ( #10274 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 18:38:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
							
							
								
							
							
								af11c53473 
								
							 
						 
						
							
							
								
								Add the installation step of postgresql and pgvector on windows in LlamaIndex GPU  support ( #10328 )  
							
							 
							
							... 
							
							
							
							* add the installation of postgresql and pgvector of windows
* fix some format 
							
						 
						
							2024-03-05 18:31:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								0011ff9f64 
								
							 
						 
						
							
							
								
								optimize bge large performance ( #10324 )  
							
							 
							
							
							
						 
						
							2024-03-05 17:06:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								178eea5009 
								
							 
						 
						
							
							
								
								upload bigdl-llm wheel to sourceforge for backup ( #10321 )  
							
							 
							
							... 
							
							
							
							* test: upload to sourceforge
* update scripts
* revert 
							
						 
						
							2024-03-05 16:36:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								30d009bca7 
								
							 
						 
						
							
							
								
								LLM: support quantized kv cache for Mistral in transformers >=4.36.0 ( #10326 )  
							
							 
							
							... 
							
							
							
							* support quantize kv for mistral in transformers 4.36
* update mistral support.
* fix style. 
							
						 
						
							2024-03-05 16:23:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								1e6f0c6f1a 
								
							 
						 
						
							
							
								
								Add llamaindex gpu example ( #10314 )  
							
							 
							
							... 
							
							
							
							* add llamaindex example
* fix core dump
* refine readme
* add trouble shooting
* refine readme
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:36:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								fc7f10cd12 
								
							 
						 
						
							
							
								
								add langchain gpu example ( #10277 )  
							
							 
							
							... 
							
							
							
							* first draft
* fix
* add readme for transformer_int4_gpu
* fix doc
* check device_map
* add arc ut test
* fix ut test
* fix langchain ut
* Refine README
* fix gpu mem too high
* fix ut test
---------
Co-authored-by: Ariadne <wyn2000330@126.com> 
							
						 
						
							2024-03-05 13:33:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								5dbbe1a826 
								
							 
						 
						
							
							
								
								[LLM] Support for new arc ut runner ( #10311 )  
							
							 
							
							... 
							
							
							
							* Support for new arc ut runner
* Comment unnecessary OMP_NUM_THREADS related settings for arc uts 
							
						 
						
							2024-03-04 18:42:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								d45e577d8c 
								
							 
						 
						
							
							
								
								[LLM] Test load_low_bit in iGPU perf test on Windows ( #10313 )  
							
							 
							
							
							
						 
						
							2024-03-04 18:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								fd81d66047 
								
							 
						 
						
							
							
								
								LLM: Compress some models to save space ( #10315 )  
							
							 
							
							... 
							
							
							
							* LLM: compress some models to save space
* add deleted comments 
							
						 
						
							2024-03-04 17:53:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
							
							
								
							
							
								bab2ee5f9e 
								
							 
						 
						
							
							
								
								update nightly spr perf test ( #10178 )  
							
							 
							
							... 
							
							
							
							* update nightly spr perf test
* update
* update runner lable
* update
* update
* update folder
* revert 
							
						 
						
							2024-03-04 13:46:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ab9fc2485f 
								
							 
						 
						
							
							
								
								LLM: add quantize kv support for llama transformer 4.36 ( #10298 )  
							
							 
							
							... 
							
							
							
							* add quantize kv support for llama transformer 4.36
* fix style.
* fix style. 
							
						 
						
							2024-03-04 10:33:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								58208a5883 
								
							 
						 
						
							
							
								
								Update FAQ document. ( #10300 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md
* Update README.md
* Update resolve_error.md 
							
						 
						
							2024-03-04 08:35:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								27d9a14989 
								
							 
						 
						
							
							
								
								[LLM] all-on-one update: memory optimize and streaming output ( #10302 )  
							
							 
							
							... 
							
							
							
							* Memory saving for continous in-out pair run and add support for streaming output on MTL iGPU
* Small fix
* Small fix
* Add things back 
							
						 
						
							2024-03-01 18:02:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								0ab40917fb 
								
							 
						 
						
							
							
								
								[LLM] Split merged_qk to separated q/k linear ( #10299 )  
							
							 
							
							... 
							
							
							
							* modify merge_qk_linear to separated q/k linear
* update 
							
						 
						
							2024-03-01 16:48:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								f4d7dbcde2 
								
							 
						 
						
							
							
								
								use fused qkv forward in qwen2 ( #10185 )  
							
							 
							
							... 
							
							
							
							* use fused qkv forward in qwen2
* support both
* fix style
* fix rope
* remove pring
* fix style
* clean up 
							
						 
						
							2024-03-01 16:46:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								509e206de0 
								
							 
						 
						
							
							
								
								update doc about gemma random and unreadable output. ( #10297 )  
							
							 
							
							... 
							
							
							
							* Update install_gpu.md
* Update README.md
* Update README.md 
							
						 
						
							2024-03-01 15:41:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								beb9433cec 
								
							 
						 
						
							
							
								
								LLM: Reduce speculative _ipex_optimize_model memory use ( #10281 )  
							
							 
							
							... 
							
							
							
							* use tpp
* update ipex 
							
						 
						
							2024-03-01 13:48:23 +08:00