Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								963a5c8d79 
								
							 
						 
						
							
							
								
								Add vLLM-XPU version's README/examples ( #9536 )  
							
							 
							
							... 
							
							
							
							* test
* test
* fix last kv cache
* add xpu readme
* remove numactl for xpu example
* fix link error
* update max_num_batched_tokens logic
* add explaination
* add xpu environement version requirement
* refine gpu memory
* fix
* fix style 
							
						 
						
							2023-11-28 09:44:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								b6c3520748 
								
							 
						 
						
							
							
								
								Remove xformers from vLLM-CPU ( #9535 )  
							
							 
							
							
							
						 
						
							2023-11-27 11:21:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								2b9c7d2a59 
								
							 
						 
						
							
							
								
								LLM: quick fix alpaca qlora finetuning script ( #9534 )  
							
							 
							
							
							
						 
						
							2023-11-27 11:04:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								11fa3de290 
								
							 
						 
						
							
							
								
								Add sutup support of win gpu for bigdl-llm ( #9512 )  
							
							 
							
							
							
						 
						
							2023-11-24 17:49:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								45820cf3b9 
								
							 
						 
						
							
							
								
								add optimize model option ( #9530 )  
							
							 
							
							
							
						 
						
							2023-11-24 17:10:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								6bec0faea5 
								
							 
						 
						
							
							
								
								LLM: support Mistral AWQ models ( #9520 )  
							
							 
							
							
							
						 
						
							2023-11-24 16:20:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								914a5a5a27 
								
							 
						 
						
							
							
								
								LLM: fix abnormal Mistral GPU accuracy by updating rms_norm ( #9529 )  
							
							 
							
							
							
						 
						
							2023-11-24 15:37:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								3d24823cda 
								
							 
						 
						
							
							
								
								hot-fix mistral kv_cache ( #9528 )  
							
							 
							
							
							
						 
						
							2023-11-24 14:33:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								42b7a16bc5 
								
							 
						 
						
							
							
								
								Replace torch.bmm with safe_bmm ( #9519 )  
							
							 
							
							... 
							
							
							
							* replace bmm with safe one
* rename args and deprecated warning 
							
						 
						
							2023-11-24 12:16:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								b3178d449f 
								
							 
						 
						
							
							
								
								Update README.md ( #9525 )  
							
							 
							
							
							
						 
						
							2023-11-23 21:45:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								82898a4203 
								
							 
						 
						
							
							
								
								Update GPU example README ( #9524 )  
							
							 
							
							
							
						 
						
							2023-11-23 21:20:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
							
							
								
							
							
								064848028f 
								
							 
						 
						
							
							
								
								Update README.md ( #9523 )  
							
							 
							
							
							
						 
						
							2023-11-23 21:16:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								b63aae8a8e 
								
							 
						 
						
							
							
								
								LLM: add flash attention support for llama ( #9518 )  
							
							 
							
							... 
							
							
							
							* add initial flash attention for llama
* accelerate fp32 first token by changing to fp16 in advance
* support fp32 
							
						 
						
							2023-11-23 18:40:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guancheng Fu 
								
							 
						 
						
							
							
							
							
								
							
							
								bf579507c2 
								
							 
						 
						
							
							
								
								Integrate vllm ( #9310 )  
							
							 
							
							... 
							
							
							
							* done
* Rename structure
* add models
* Add structure/sampling_params,sequence
* add input_metadata
* add outputs
* Add policy,logger
* add and update
* add parallelconfig back
* core/scheduler.py
* Add llm_engine.py
* Add async_llm_engine.py
* Add tested entrypoint
* fix minor error
* Fix everything
* fix kv cache view
* fix
* fix
* fix
* format&refine
* remove logger from repo
* try to add token latency
* remove logger
* Refine config.py
* finish worker.py
* delete utils.py
* add license
* refine
* refine sequence.py
* remove sampling_params.py
* finish
* add license
* format
* add license
* refine
* refine
* Refine line too long
* remove exception
* so dumb style-check
* refine
* refine
* refine
* refine
* refine
* refine
* add README
* refine README
* add warning instead error
* fix padding
* add license
* format
* format
* format fix
* Refine vllm dependency (#1 )
vllm dependency clear
* fix licence
* fix format
* fix format
* fix
* adapt LLM engine
* fix
* add license
* fix format
* fix
* Moving README.md to the correct position
* Fix readme.md
* done
* guide for adding models
* fix
* Fix README.md
* Add new model readme
* remove ray-logic
* refactor arg_utils.py
* remove distributed_init_method logic
* refactor entrypoints
* refactor input_metadata
* refactor model_loader
* refactor utils.py
* refactor models
* fix api server
* remove vllm.stucture
* revert by txy 1120
* remove utils
* format
* fix license
* add bigdl model
* Refer to a specfic commit
* Change code base
* add comments
* add async_llm_engine comment
* refine
* formatted
* add worker comments
* add comments
* add comments
* fix style
* add changes
---------
Co-authored-by: xiangyuT <xiangyu.tian@intel.com>
Co-authored-by: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com>
Co-authored-by: leonardozcm <leonardo1997zcm@gmail.com> 
							
						 
						
							2023-11-23 16:46:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								48fbb1eb94 
								
							 
						 
						
							
							
								
								support ccl (MPI) distributed mode in alpaca_qlora_finetuning_cpu ( #9507 )  
							
							 
							
							
							
						 
						
							2023-11-23 10:58:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
							
							
								
							
							
								0f0c6bb631 
								
							 
						 
						
							
							
								
								[LLM] Fix Qwen registered_causal_mask is None ( #9513 )  
							
							 
							
							... 
							
							
							
							* Add registered_causal_mask init based on 2abd8e5777 . 
							
						 
						
							2023-11-23 09:28:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								11fa5a8a0e 
								
							 
						 
						
							
							
								
								Fix QLoRA CPU dispatch_model issue about accelerate ( #9506 )  
							
							 
							
							
							
						 
						
							2023-11-23 08:41:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								1453046938 
								
							 
						 
						
							
							
								
								install bigdl-llm in deepspeed cpu inference example ( #9508 )  
							
							 
							
							
							
						 
						
							2023-11-23 08:39:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								86743fb57b 
								
							 
						 
						
							
							
								
								LLM: fix transformers version in CPU finetuning example ( #9511 )  
							
							 
							
							
							
						 
						
							2023-11-22 15:53:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								1a2129221d 
								
							 
						 
						
							
							
								
								LLM: support resume from checkpoint in Alpaca QLoRA ( #9502 )  
							
							 
							
							
							
						 
						
							2023-11-22 13:49:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								139e98aa18 
								
							 
						 
						
							
							
								
								LLM: quick fix benchmark ( #9509 )  
							
							 
							
							
							
						 
						
							2023-11-22 10:19:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								c2aeb4d1e8 
								
							 
						 
						
							
							
								
								del model after test ( #9504 )  
							
							 
							
							
							
						 
						
							2023-11-21 18:41:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								076d106ef5 
								
							 
						 
						
							
							
								
								LLM: GPU QLoRA update to bf16 to accelerate gradient checkpointing ( #9499 )  
							
							 
							
							... 
							
							
							
							* update to bf16 to accelerate gradient checkpoint
* add utils and fix ut 
							
						 
						
							2023-11-21 17:08:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								3e39828420 
								
							 
						 
						
							
							
								
								Update all in one benchmark readme ( #9496 )  
							
							 
							
							... 
							
							
							
							* Add gperftools install to all in one benchmark readme
* Update readme 
							
						 
						
							2023-11-21 14:57:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								b7ae572ac3 
								
							 
						 
						
							
							
								
								LLM: update Alpaca QLoRA finetuning example on GPU ( #9492 )  
							
							 
							
							
							
						 
						
							2023-11-21 14:22:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								c5cb3ab82e 
								
							 
						 
						
							
							
								
								LLM : Add CPU alpaca qlora example ( #9469 )  
							
							 
							
							... 
							
							
							
							* init
* update xpu to cpu
* update
* update readme
* update example
* update
* add refer
* add guide to train different datasets
* update readme
* update 
							
						 
						
							2023-11-21 09:19:58 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								96fd26759c 
								
							 
						 
						
							
							
								
								LLM: fix QLoRA finetuning example on CPU ( #9489 )  
							
							 
							
							
							
						 
						
							2023-11-20 14:31:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								50b01058f1 
								
							 
						 
						
							
							
								
								enable new q4_1 ( #9479 )  
							
							 
							
							
							
						 
						
							2023-11-17 14:58:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								3dac21ac7b 
								
							 
						 
						
							
							
								
								LLM: add more example usages about alpaca qlora on different hardware ( #9458 )  
							
							 
							
							
							
						 
						
							2023-11-17 09:56:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								921b263d6a 
								
							 
						 
						
							
							
								
								update deepspeed install and run guide in README ( #9441 )  
							
							 
							
							
							
						 
						
							2023-11-17 09:11:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
							
							
								
							
							
								30abd304a7 
								
							 
						 
						
							
							
								
								LLM: Fix baichuan pre-normalize model tensor assigning issue when loading ( #9481 )  
							
							 
							
							... 
							
							
							
							* No need to normalized when loading 
							
						 
						
							2023-11-16 21:57:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								bc06bec90e 
								
							 
						 
						
							
							
								
								LLM: modify the script to generate html results more accurately ( #9445 )  
							
							 
							
							... 
							
							
							
							* modify the script to generate html results more accurately
* resolve some comments
* revert some codes 
							
						 
						
							2023-11-16 19:50:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								c0ef70df02 
								
							 
						 
						
							
							
								
								llm: quick fix of fast_rms_norm ( #9480 )  
							
							 
							
							
							
						 
						
							2023-11-16 14:42:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								d5263e6681 
								
							 
						 
						
							
							
								
								Add awq load support ( #9453 )  
							
							 
							
							... 
							
							
							
							* Support directly loading GPTQ models from huggingface
* fix style
* fix tests
* change example structure
* address comments
* fix style
* init
* address comments
* add examples
* fix style
* fix style
* fix style
* fix style
* update
* remove
* meet comments
* fix style
---------
Co-authored-by: Yang Wang <yang3.wang@intel.com> 
							
						 
						
							2023-11-16 14:06:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d2c064124a 
								
							 
						 
						
							
							
								
								LLM: update rms related usage to suport ipex 2.1 new api ( #9466 )  
							
							 
							
							... 
							
							
							
							* update rms related usage
* fix style 
							
						 
						
							2023-11-16 11:21:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								731b0aaade 
								
							 
						 
						
							
							
								
								Empty cache after embedding to cpu ( #9477 )  
							
							 
							
							
							
						 
						
							2023-11-16 10:52:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								c487b53f21 
								
							 
						 
						
							
							
								
								LLM: only run arc perf test nightly ( #9448 )  
							
							 
							
							... 
							
							
							
							* LLM: only run arc perf test nightly
* deleted unused python scripts
* rebase main 
							
						 
						
							2023-11-15 19:38:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								0d55bbd9f1 
								
							 
						 
						
							
							
								
								LLM: ajust the order of some models ( #9470 )  
							
							 
							
							
							
						 
						
							2023-11-15 17:04:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								170e0072af 
								
							 
						 
						
							
							
								
								chatglm2 correctness test ( #9450 )  
							
							 
							
							... 
							
							
							
							* chatglm2 ut
* some update
* chatglm2 path
* fix
* add print 
							
						 
						
							2023-11-15 15:44:56 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								0f82b8c3a0 
								
							 
						 
						
							
							
								
								LLM: update qlora example ( #9454 )  
							
							 
							
							... 
							
							
							
							* update qlora example
* fix loss=0 
							
						 
						
							2023-11-15 09:24:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								dbbdb53a18 
								
							 
						 
						
							
							
								
								fix multiple gpu usage ( #9459 )  
							
							 
							
							
							
						 
						
							2023-11-14 17:06:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								d19ca21957 
								
							 
						 
						
							
							
								
								patch bigdl-llm model to harness by binding instead of patch file ( #9420 )  
							
							 
							
							... 
							
							
							
							* add run_llb.py
* fix args interpret
* modify outputs
* update workflow
* add license
* test mixed 4 bit
* update readme
* use autotokenizer
* add timeout
* refactor workflow file
* fix working directory
* fix env
* throw exception if some jobs failed
* improve terminal outputs
* Disable var which cause the run stuck
* fix unknown precision
* fix key error
* directly output config instead
* rm harness submodule 
							
						 
						
							2023-11-14 12:51:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								51d07a9fd8 
								
							 
						 
						
							
							
								
								Support directly loading gptq models from huggingface ( #9391 )  
							
							 
							
							... 
							
							
							
							* Support directly loading GPTQ models from huggingface
* fix style
* fix tests
* change example structure
* address comments
* fix style
* address comments 
							
						 
						
							2023-11-13 20:48:12 -08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								d109275333 
								
							 
						 
						
							
							
								
								temporarily disable the test of some models ( #9434 )  
							
							 
							
							
							
						 
						
							2023-11-13 18:50:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								0ecb9efb05 
								
							 
						 
						
							
							
								
								use AutoTokenizer to enable more models ( #9446 )  
							
							 
							
							
							
						 
						
							2023-11-13 17:47:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ece5805572 
								
							 
						 
						
							
							
								
								LLM: add chatglm3-6b to latency benchmark test. ( #9442 )  
							
							 
							
							
							
						 
						
							2023-11-13 17:24:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								5747e2fe69 
								
							 
						 
						
							
							
								
								fix multiple gpu usage of harness ( #9444 )  
							
							 
							
							
							
						 
						
							2023-11-13 16:53:23 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								da6bbc8c11 
								
							 
						 
						
							
							
								
								fix deepspeed dependencies to install ( #9400 )  
							
							 
							
							... 
							
							
							
							* remove reductant parameter from deepspeed install
* Update install.sh
* Update install.sh 
							
						 
						
							2023-11-13 16:42:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								4faf5af8f1 
								
							 
						 
						
							
							
								
								[LLM] Add perf test for core on Windows ( #9397 )  
							
							 
							
							... 
							
							
							
							* temporary stop other perf test
* Add framework for core performance test with one test model
* Small fix and add platform control
* Comment out lp for now
* Add missing ymal file
* Small fix
* Fix sed contents
* Small fix
* Small path fixes
* Small fix
* Add update to ftp
* Small upload fix
* add chatglm3-6b
* LLM: add model names
* Keep repo id same as ftp and temporary make baichuan2 first priority
* change order
* Remove temp if false and separate pr and nightly results
* Small fix
---------
Co-authored-by: jinbridge <2635480475@qq.com> 
							
						 
						
							2023-11-13 13:58:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								9b5d0e9c75 
								
							 
						 
						
							
							
								
								Add examples for Yi-6B ( #9421 )  
							
							 
							
							
							
						 
						
							2023-11-13 10:53:15 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								2888818b3a 
								
							 
						 
						
							
							
								
								[LLM] Support mixed_fp8 on Arc ( #9415 )  
							
							 
							
							... 
							
							
							
							* ut gpu allocation memory fix
* support mix_8bit on arc
* rename mixed_4bit to mixed_fp4 and mixed_8bit to mixed_fp8
* revert unexpected changes
* revert unexpected changes
* unify common logits
* rename in llm xmx_checker
* fix typo error and re-unify 
							
						 
						
							2023-11-13 09:26:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								ac7fbe77e2 
								
							 
						 
						
							
							
								
								Update qlora readme ( #9416 )  
							
							 
							
							
							
						 
						
							2023-11-12 19:29:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yining Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								d7334513e1 
								
							 
						 
						
							
							
								
								codeshell: fix wrong links ( #9417 )  
							
							 
							
							
							
						 
						
							2023-11-12 19:22:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								0674146cfb 
								
							 
						 
						
							
							
								
								Add cpu and gpu examples of distil-whisper ( #9374 )  
							
							 
							
							... 
							
							
							
							* Add distil-whisper examples
* Fixes based on comments
* Minor fixes
---------
Co-authored-by: Ariadne330 <wyn2000330@126.com> 
							
						 
						
							2023-11-10 16:09:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								ad81b5d838 
								
							 
						 
						
							
							
								
								Update qlora README.md ( #9422 )  
							
							 
							
							
							
						 
						
							2023-11-10 15:19:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								b23b91407c 
								
							 
						 
						
							
							
								
								fix llm-init on deepspeed missing lib ( #9419 )  
							
							 
							
							
							
						 
						
							2023-11-10 13:51:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
							
							
								
							
							
								dfb00e37e9 
								
							 
						 
						
							
							
								
								[LLM] Add model correctness test on ARC for llama and falcon ( #9347 )  
							
							 
							
							... 
							
							
							
							* add correctness test on arc for llama model
* modify layer name
* add falcon ut
* refactor and add ut for falcon model
* modify lambda positions and update docs
* replace loading pre input with last decodelayer output
* switch lower bound to single model instead of using the common one
* make the code implementation simple
* fix gpu action allocation memory issue 
							
						 
						
							2023-11-10 13:48:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								36fbe2144d 
								
							 
						 
						
							
							
								
								Add CPU examples of fuyu ( #9393 )  
							
							 
							
							... 
							
							
							
							* add fuyu cpu examples
* add gpu example
* add comments
* add license
* remove gpu example
* fix inference time 
							
						 
						
							2023-11-09 15:29:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								df8e4d7889 
								
							 
						 
						
							
							
								
								[LLM] apply allreduce and bias to training in LowBitLinear ( #9395 )  
							
							 
							
							
							
						 
						
							2023-11-09 14:35:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								40cead6b5b 
								
							 
						 
						
							
							
								
								LLM: Fix CPU qlora dtype convert issue ( #9394 )  
							
							 
							
							
							
						 
						
							2023-11-09 14:34:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								34449cb4bb 
								
							 
						 
						
							
							
								
								LLM: add remaining models to the arc perf test ( #9384 )  
							
							 
							
							... 
							
							
							
							* add remaining models
* modify the filepath which stores the test result on ftp server
* resolve some comments 
							
						 
						
							2023-11-09 14:28:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bfca76dfa7 
								
							 
						 
						
							
							
								
								LLM: optimize QLoRA by updating lora convert logic ( #9372 )  
							
							 
							
							... 
							
							
							
							* update convert logic of qlora
* update
* refactor and further improve performance
* fix style
* meet code review 
							
						 
						
							2023-11-08 17:46:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								54d95e4907 
								
							 
						 
						
							
							
								
								LLM: add alpaca qlora finetuning example ( #9276 )  
							
							 
							
							
							
						 
						
							2023-11-08 16:25:17 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								97316bbb66 
								
							 
						 
						
							
							
								
								LLM: highlight transformers version requirement in mistral examples ( #9380 )  
							
							 
							
							
							
						 
						
							2023-11-08 16:05:03 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7e8fb29b7c 
								
							 
						 
						
							
							
								
								LLM: optimize QLoRA by reducing convert time ( #9370 )  
							
							 
							
							
							
						 
						
							2023-11-08 13:14:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								298b64217e 
								
							 
						 
						
							
							
								
								add auto triggered acc test ( #9364 )  
							
							 
							
							... 
							
							
							
							* add auto triggered acc test
* use llama 7b instead
* fix env
* debug download
* fix download prefix
* add cut dirs
* fix env of model path
* fix dataset download
* full job
* source xpu env vars
* use matrix to trigger model run
* reset batch=1
* remove redirect
* remove some trigger
* add task matrix
* add precision list
* test llama-7b-chat
* use /mnt/disk1 to store model and datasets
* remove installation test
* correct downloading path
* fix HF vars
* add bigdl-llm env vars
* rename file
* fix hf_home
* fix script path
* rename as harness evalution
* rerun 
							
						 
						
							2023-11-08 10:22:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								bfd9f88f0d 
								
							 
						 
						
							
							
								
								[LLM] Use fp32 as dtype when batch_size <=8 and qtype is q4_0/q8_0/fp8 ( #9365 )  
							
							 
							
							
							
						 
						
							2023-11-08 09:54:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								84ab614aab 
								
							 
						 
						
							
							
								
								LLM: add more models and skip runtime error ( #9349 )  
							
							 
							
							... 
							
							
							
							* add more models and skip runtime error
* upgrade transformers
* temporarily removed Mistral-7B-v0.1
* temporarily disable the upload of arc perf result 
							
						 
						
							2023-11-08 09:45:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								fae6db3ddc 
								
							 
						 
						
							
							
								
								[LLM] refactor cpu low-bit forward logic ( #9366 )  
							
							 
							
							... 
							
							
							
							* [LLM] refactor cpu low-bit forward logic
* fix style
* Update low_bit_linear.py
* Update low_bit_linear.py
* refine 
							
						 
						
							2023-11-07 15:09:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
							
							
								
							
							
								af94058203 
								
							 
						 
						
							
							
								
								[LLM] Support CPU deepspeed distributed inference ( #9259 )  
							
							 
							
							... 
							
							
							
							* [LLM] Support CPU Deepspeed distributed inference
* Update run_deepspeed.py
* Rename
* fix style
* add new codes
* refine
* remove annotated codes
* refine
* Update README.md
* refine doc and example code 
							
						 
						
							2023-11-06 17:56:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								f9bf5382ff 
								
							 
						 
						
							
							
								
								Fix: add aquila2 in README ( #9362 )  
							
							 
							
							
							
						 
						
							2023-11-06 16:37:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								e6b6afa316 
								
							 
						 
						
							
							
								
								LLM: add aquila2 model example ( #9356 )  
							
							 
							
							
							
						 
						
							2023-11-06 15:47:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xin Qiu 
								
							 
						 
						
							
							
							
							
								
							
							
								1420e45cc0 
								
							 
						 
						
							
							
								
								Chatglm2 rope optimization on xpu ( #9350 )  
							
							 
							
							
							
						 
						
							2023-11-06 13:56:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yining Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								9377b9c5d7 
								
							 
						 
						
							
							
								
								add CodeShell CPU example ( #9345 )  
							
							 
							
							... 
							
							
							
							* add CodeShell CPU example
* fix some problems 
							
						 
						
							2023-11-03 13:15:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ZehuaCao 
								
							 
						 
						
							
							
							
							
								
							
							
								ef83c3302e 
								
							 
						 
						
							
							
								
								Use to test llm-performance on spr-perf ( #9316 )  
							
							 
							
							... 
							
							
							
							* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update action.yml
* Create cpu-perf-test.yaml
* Update action.yml
* Update action.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml
* Update llm_performance_tests.yml 
							
						 
						
							2023-11-03 11:17:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
							
							
								
							
							
								a0150bb205 
								
							 
						 
						
							
							
								
								[LLM] Move embedding layer to CPU for iGPU inference ( #9343 )  
							
							 
							
							... 
							
							
							
							* Move embedding layer to CPU for iGPU llm inference
* Empty cache after to cpu
* Remove empty cache as it seems to have some negative effect to first token 
							
						 
						
							2023-11-03 11:13:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cheen Hau, 俊豪 
								
							 
						 
						
							
							
							
							
								
							
							
								8f23fb04dc 
								
							 
						 
						
							
							
								
								Add inference test for Whisper model on Arc ( #9330 )  
							
							 
							
							... 
							
							
							
							* Add inference test for Whisper model
* Remove unnecessary inference time measurement 
							
						 
						
							2023-11-03 10:15:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								63411dff75 
								
							 
						 
						
							
							
								
								Add cpu examples of WizardCoder ( #9344 )  
							
							 
							
							... 
							
							
							
							* Add wizardcoder example
* Minor fixes 
							
						 
						
							2023-11-02 20:22:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								2e3bfbfe1f 
								
							 
						 
						
							
							
								
								Add internlm_xcomposer cpu examples ( #9337 )  
							
							 
							
							... 
							
							
							
							* add internlm-xcomposer cpu examples
* use chat
* some fixes
* add license
* address shengsheng's comments
* use demo.jpg 
							
						 
						
							2023-11-02 15:50:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								97a38958bd 
								
							 
						 
						
							
							
								
								LLM: add CodeLlama CPU and GPU examples ( #9338 )  
							
							 
							
							... 
							
							
							
							* LLM: add codellama CPU pytorch examples
* LLM: add codellama CPU transformers examples
* LLM: add codellama GPU transformers examples
* LLM: add codellama GPU pytorch examples
* LLM: add codellama in readme
* LLM: add LLaVA link 
							
						 
						
							2023-11-02 15:34:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Chen, Zhentao 
								
							 
						 
						
							
							
							
							
								
							
							
								d4dffbdb62 
								
							 
						 
						
							
							
								
								Merge harness ( #9319 )  
							
							 
							
							... 
							
							
							
							* add harness patch and llb script
* add readme
* add license
* use patch instead
* update readme
* rename tests to evaluation
* fix typo
* remove nano dependency
* add original harness link
* rename title of usage
* rename BigDLGPULM as BigDLLM
* empty commit to rerun job 
							
						 
						
							2023-11-02 15:14:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zheng, Yi 
								
							 
						 
						
							
							
							
							
								
							
							
								63b2556ce2 
								
							 
						 
						
							
							
								
								Add cpu examples of skywork ( #9340 )  
							
							 
							
							
							
						 
						
							2023-11-02 15:10:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									dingbaorong 
								
							 
						 
						
							
							
							
							
								
							
							
								f855a864ef 
								
							 
						 
						
							
							
								
								add llava gpu example ( #9324 )  
							
							 
							
							... 
							
							
							
							* add llava gpu example
* use 7b model
* fix typo
* add in README 
							
						 
						
							2023-11-02 14:48:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ziteng Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								dd3cf2f153 
								
							 
						 
						
							
							
								
								LLM: Add python 3.10 & 3.11 UT  
							
							 
							
							... 
							
							
							
							LLM: Add python 3.10 & 3.11 UT 
							
						 
						
							2023-11-02 14:09:29 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
							
							
								
							
							
								149146004f 
								
							 
						 
						
							
							
								
								LLM: Add qlora finetunning CPU example ( #9275 )  
							
							 
							
							... 
							
							
							
							* add qlora finetunning example
* update readme
* update example
* remove merge.py and update readme 
							
						 
						
							2023-11-02 09:45:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								9722e811be 
								
							 
						 
						
							
							
								
								LLM: add more models to the arc perf test ( #9297 )  
							
							 
							
							... 
							
							
							
							* LLM: add more models to the arc perf test
* remove some old models
* install some dependencies 
							
						 
						
							2023-11-01 16:56:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								6a128aee32 
								
							 
						 
						
							
							
								
								LLM: add ui for portable-zip ( #9262 )  
							
							 
							
							
							
						 
						
							2023-11-01 15:36:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jasonzzt 
								
							 
						 
						
							
							
							
							
								
							
							
								cb7ef38e86 
								
							 
						 
						
							
							
								
								rerun  
							
							 
							
							
							
						 
						
							2023-11-01 15:30:34 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jasonzzt 
								
							 
						 
						
							
							
							
							
								
							
							
								ba148ff3ff 
								
							 
						 
						
							
							
								
								test py311  
							
							 
							
							
							
						 
						
							2023-11-01 14:08:49 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								726203d778 
								
							 
						 
						
							
							
								
								[LLM] Replace Embedding layer to fix it on CPU ( #9254 )  
							
							 
							
							
							
						 
						
							2023-11-01 13:58:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jasonzzt 
								
							 
						 
						
							
							
							
							
								
							
							
								7c7a7f2ec1 
								
							 
						 
						
							
							
								
								spr & arc ut with python3,9&3.10&3.11  
							
							 
							
							
							
						 
						
							2023-11-01 13:17:13 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yang Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								e1bc18f8eb 
								
							 
						 
						
							
							
								
								fix import ipex problem ( #9323 )  
							
							 
							
							... 
							
							
							
							* fix import ipex problem
* fix style 
							
						 
						
							2023-10-31 20:31:34 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								9f3d4676c6 
								
							 
						 
						
							
							
								
								LLM: Add qwen-vl gpu example ( #9290 )  
							
							 
							
							... 
							
							
							
							* create qwen-vl gpu example.
* add readme.
* fix.
* change input figure and update outputs.
* add qwen-vl pytorch model gpu example.
* fix.
* add readme. 
							
						 
						
							2023-11-01 11:01:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
							
							
								
							
							
								7e73c354a6 
								
							 
						 
						
							
							
								
								LLM: decoupling bigdl-llm and bigdl-nano ( #9306 )  
							
							 
							
							
							
						 
						
							2023-11-01 11:00:54 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
							
							
								
							
							
								2262ae4d13 
								
							 
						 
						
							
							
								
								Support MoFQ4 on arc ( #9301 )  
							
							 
							
							... 
							
							
							
							* init
* update
* fix style
* fix style
* fix style
* meet comments 
							
						 
						
							2023-11-01 10:59:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
							
							
								
							
							
								8ef8e25178 
								
							 
						 
						
							
							
								
								LLM: improve response speed in multi-turn chat ( #9299 )  
							
							 
							
							... 
							
							
							
							* update
* fix stop word and add chatglm2 support
* remove system prompt 
							
						 
						
							2023-11-01 10:30:44 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Cengguang Zhang 
								
							 
						 
						
							
							
							
							
								
							
							
								d4ab5904ef 
								
							 
						 
						
							
							
								
								LLM: Add python 3.10 llm UT ( #9302 )  
							
							 
							
							... 
							
							
							
							* add py310 test for llm-unit-test.
* add py310 llm-unit-tests
* add llm-cpp-build-py310
* test
* test
* test.
* test
* test
* fix deactivate.
* fix
* fix.
* fix
* test
* test
* test
* add build chatglm for win.
* test.
* fix 
							
						 
						
							2023-11-01 10:15:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									WeiguangHan 
								
							 
						 
						
							
							
							
							
								
							
							
								03aa368776 
								
							 
						 
						
							
							
								
								LLM: add the comparison between latest arc perf test and last one ( #9296 )  
							
							 
							
							... 
							
							
							
							* add the comparison between latest test and last one to html
* resolve some comments
* modify some code logics 
							
						 
						
							2023-11-01 09:53:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								96f8158fe2 
								
							 
						 
						
							
							
								
								LLM: adjust dolly v2 GPU example README ( #9318 )  
							
							 
							
							
							
						 
						
							2023-11-01 09:50:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
							
							
								
							
							
								c44c6dc43a 
								
							 
						 
						
							
							
								
								LLM: add chatglm3 examples ( #9305 )  
							
							 
							
							
							
						 
						
							2023-11-01 09:50:05 +08:00