Guoqiong Song 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d64711900a 
								
							 
						 
						
							
							
								
								Fix cohere model on transformers>=4.41 ( #11575 )  
							
							 
							
							... 
							
							
							
							* fix cohere model for 4-41 
							
						 
						
							2024-07-17 17:18:59 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Guoqiong Song 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								5b6eb85b85 
								
							 
						 
						
							
							
								
								phi model readme ( #11595 )  
							
							 
							
							... 
							
							
							
							Co-authored-by: rnwang04 <ruonan1.wang@intel.com> 
							
						 
						
							2024-07-17 17:18:34 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9c15abf825 
								
							 
						 
						
							
							
								
								Refactor fastapi-serving and add one card serving( #11581 )  
							
							 
							
							... 
							
							
							
							* init fastapi-serving one card
* mv api code to source
* update worker
* update for style-check
* add worker
* update bash
* update
* update worker name and add readme
* rename update
* rename to fastapi 
							
						 
						
							2024-07-17 11:12:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								365adad59f 
								
							 
						 
						
							
							
								
								Support LoRA ChatGLM with Alpaca Dataset ( #11580 )  
							
							 
							
							... 
							
							
							
							* Support LoRA ChatGLM with Alpaca Dataset
* refine
* fix
* add 2-card alpaca 
							
						 
						
							2024-07-16 15:40:02 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								50cf563a71 
								
							 
						 
						
							
							
								
								Add example: MiniCPM-V ( #11570 )  
							
							 
							
							
							
						 
						
							2024-07-15 10:55:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								06745e5742 
								
							 
						 
						
							
							
								
								Add npu benchmark all-in-one script ( #11571 )  
							
							 
							
							... 
							
							
							
							* npu benchmark 
							
						 
						
							2024-07-15 10:42:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0981b72275 
								
							 
						 
						
							
							
								
								Fix /generate_stream api in Pipeline Parallel FastAPI ( #11569 )  
							
							 
							
							
							
						 
						
							2024-07-12 13:19:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b9c66994a5 
								
							 
						 
						
							
							
								
								add npu sdp ( #11562 )  
							
							 
							
							
							
						 
						
							2024-07-11 16:57:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2b8ad8731e 
								
							 
						 
						
							
							
								
								Support pipeline parallel for glm-4v ( #11545 )  
							
							 
							
							
							
						 
						
							2024-07-11 16:06:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7f5111a998 
								
							 
						 
						
							
							
								
								LLM: Refine start script for Pipeline Parallel Serving ( #11557 )  
							
							 
							
							... 
							
							
							
							Refine start script and readme for Pipeline Parallel Serving 
							
						 
						
							2024-07-11 15:45:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								105e124752 
								
							 
						 
						
							
							
								
								optimize phi3-v encoder npu performance and add multimodal example ( #11553 )  
							
							 
							
							... 
							
							
							
							* phi3-v
* readme 
							
						 
						
							2024-07-11 13:59:14 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3c16c9f725 
								
							 
						 
						
							
							
								
								Optimize baichuan on NPU ( #11548 )  
							
							 
							
							... 
							
							
							
							* baichuan_npu 
							
						 
						
							2024-07-10 13:18:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zhao Changmin 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								76a5802acf 
								
							 
						 
						
							
							
								
								update NPU examples ( #11540 )  
							
							 
							
							... 
							
							
							
							* update NPU examples 
							
						 
						
							2024-07-09 17:19:42 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jason Dai 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								099486afb7 
								
							 
						 
						
							
							
								
								Update README.md ( #11530 )  
							
							 
							
							
							
						 
						
							2024-07-08 20:18:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								66f6ffe4b2 
								
							 
						 
						
							
							
								
								Update GPU HF-Transformers example structure ( #11526 )  
							
							 
							
							
							
						 
						
							2024-07-08 17:58:06 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7d8bc83415 
								
							 
						 
						
							
							
								
								LLM: Partial Prefilling for Pipeline Parallel Serving ( #11457 )  
							
							 
							
							... 
							
							
							
							LLM: Partial Prefilling for Pipeline Parallel Serving 
							
						 
						
							2024-07-05 13:10:35 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								60de428b37 
								
							 
						 
						
							
							
								
								Support pipeline parallel for qwen-vl ( #11503 )  
							
							 
							
							
							
						 
						
							2024-07-04 18:03:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								61c36ba085 
								
							 
						 
						
							
							
								
								Add pp_serving verified models ( #11498 )  
							
							 
							
							... 
							
							
							
							* add verified models
* update
* verify large model
* update commend 
							
						 
						
							2024-07-03 14:57:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9274282ef7 
								
							 
						 
						
							
							
								
								Support pipeline parallel for glm-4-9b-chat ( #11463 )  
							
							 
							
							
							
						 
						
							2024-07-03 14:25:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4390e7dc49 
								
							 
						 
						
							
							
								
								Fix codegeex2 transformers version ( #11487 )  
							
							 
							
							
							
						 
						
							2024-07-02 15:09:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								913e750b01 
								
							 
						 
						
							
							
								
								fix non-string deepseed config path bug ( #11476 )  
							
							 
							
							... 
							
							
							
							* fix non-string deepseed config path bug
* Update lora_finetune_chatglm.py 
							
						 
						
							2024-07-01 15:53:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								319a3b36b2 
								
							 
						 
						
							
							
								
								fix npu llama2 ( #11471 )  
							
							 
							
							
							
						 
						
							2024-07-01 10:14:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								07362ffffc 
								
							 
						 
						
							
							
								
								ChatGLM3-6B LoRA Fine-tuning Demo ( #11450 )  
							
							 
							
							... 
							
							
							
							* ChatGLM3-6B LoRA Fine-tuning Demo
* refine
* refine
* add 2-card deepspeed
* refine format
* add mpi4py and deepspeed install 
							
						 
						
							2024-07-01 09:18:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fd933c92d8 
								
							 
						 
						
							
							
								
								Fix: Correct num_requests in benchmark for Pipeline Parallel Serving ( #11462 )  
							
							 
							
							
							
						 
						
							2024-06-28 16:10:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								987017ef47 
								
							 
						 
						
							
							
								
								Update pipeline parallel serving for more model support ( #11428 )  
							
							 
							
							
							
						 
						
							2024-06-27 18:21:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								cf0f5c4322 
								
							 
						 
						
							
							
								
								change npu document ( #11446 )  
							
							 
							
							
							
						 
						
							2024-06-27 13:59:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								508c364a79 
								
							 
						 
						
							
							
								
								Add precision option in PP inference examples ( #11440 )  
							
							 
							
							
							
						 
						
							2024-06-27 09:24:27 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ab9f7f3ac5 
								
							 
						 
						
							
							
								
								FIX: Qwen1.5-GPTQ-Int4 inference error ( #11432 )  
							
							 
							
							... 
							
							
							
							* merge_qkv if quant_method is 'gptq'
* fix python style checks
* refactor
* update GPU example 
							
						 
						
							2024-06-26 15:36:22 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jiao Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								40fa23560e 
								
							 
						 
						
							
							
								
								Fix LLAVA example on CPU ( #11271 )  
							
							 
							
							... 
							
							
							
							* update
* update
* update
* update 
							
						 
						
							2024-06-25 20:04:59 -07:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e473b8d946 
								
							 
						 
						
							
							
								
								Add more qwen1.5 and qwen2 support for pipeline parallel inference ( #11423 )  
							
							 
							
							
							
						 
						
							2024-06-25 15:49:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								3b23de684a 
								
							 
						 
						
							
							
								
								update npu examples ( #11422 )  
							
							 
							
							
							
						 
						
							2024-06-25 13:32:53 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8ddae22cfb 
								
							 
						 
						
							
							
								
								LLM: Refactor Pipeline-Parallel-FastAPI example ( #11319 )  
							
							 
							
							... 
							
							
							
							Initially Refactor for Pipeline-Parallel-FastAPI example 
							
						 
						
							2024-06-25 13:30:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								34c15d3a10 
								
							 
						 
						
							
							
								
								update pp document ( #11421 )  
							
							 
							
							
							
						 
						
							2024-06-25 10:17:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c985912ee3 
								
							 
						 
						
							
							
								
								Add Deepspeed LoRA dependencies in document ( #11410 )  
							
							 
							
							
							
						 
						
							2024-06-24 15:29:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0c67639539 
								
							 
						 
						
							
							
								
								Add more examples for pipeline parallel inference ( #11372 )  
							
							 
							
							... 
							
							
							
							* add more model exampels for pipelien parallel inference
* add mixtral and vicuna models
* add yi model and past_kv supprot for chatglm family
* add docs
* doc update
* add license
* update 
							
						 
						
							2024-06-21 17:55:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									ivy-lv11 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								21fc781fce 
								
							 
						 
						
							
							
								
								Add GLM-4V example ( #11343 )  
							
							 
							
							... 
							
							
							
							* add example
* modify
* modify
* add line
* add
* add link and replace with phi-3-vision template
* fix generate options
* fix
* fix
---------
Co-authored-by: jinbridge <2635480475@qq.com> 
							
						 
						
							2024-06-21 12:54:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4ba82191f2 
								
							 
						 
						
							
							
								
								Support PP inference for chatglm3 ( #11375 )  
							
							 
							
							
							
						 
						
							2024-06-21 09:59:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ae452688c2 
								
							 
						 
						
							
							
								
								Add NPU HF example ( #11358 )  
							
							 
							
							
							
						 
						
							2024-06-19 18:07:28 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								67a1e05876 
								
							 
						 
						
							
							
								
								Remove zero3 context manager from LoRA ( #11346 )  
							
							 
							
							
							
						 
						
							2024-06-18 17:24:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								694912698e 
								
							 
						 
						
							
							
								
								Upgrade scikit-learn to 1.5.0 to fix dependabot issue ( #11349 )  
							
							 
							
							
							
						 
						
							2024-06-18 15:47:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Heyang Sun 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								00f322d8ee 
								
							 
						 
						
							
							
								
								Finetune ChatGLM with Deepspeed Zero3 LoRA ( #11314 )  
							
							 
							
							... 
							
							
							
							* Fintune ChatGLM with Deepspeed Zero3 LoRA
* add deepspeed zero3 config
* rename config
* remove offload_param
* add save_checkpoint parameter
* Update lora_deepspeed_zero3_finetune_chatglm3_6b_arc_2_card.sh
* refine 
							
						 
						
							2024-06-18 12:31:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e50c890e1f 
								
							 
						 
						
							
							
								
								Support finishing PP inference once eos_token_id is found ( #11336 )  
							
							 
							
							
							
						 
						
							2024-06-18 09:55:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								de4bb97b4f 
								
							 
						 
						
							
							
								
								Remove accelerate 0.23.0 install command in readme and docker ( #11333 )  
							
							 
							
							... 
							
							
							
							*ipex-llm's accelerate has been upgraded to 0.23.0. Remove accelerate 0.23.0 install command in README and docker。 
							
						 
						
							2024-06-17 17:52:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ef4b6519fb 
								
							 
						 
						
							
							
								
								Add phi-3 model support for pipeline parallel inference ( #11334 )  
							
							 
							
							... 
							
							
							
							* add phi-3 model support
* add phi3 example 
							
						 
						
							2024-06-17 17:44:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SONG Ge 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								be00380f1a 
								
							 
						 
						
							
							
								
								Fix pipeline parallel inference past_key_value error in Baichuan ( #11318 )  
							
							 
							
							... 
							
							
							
							* fix past_key_value error
* add baichuan2 example
* fix style
* update doc
* add script link in doc
* fix import error
* update 
							
						 
						
							2024-06-17 09:29:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4359ab3172 
								
							 
						 
						
							
							
								
								LLM: Add /generate_stream endpoint for Pipeline-Parallel-FastAPI example ( #11187 )  
							
							 
							
							... 
							
							
							
							Add /generate_stream and OpenAI-formatted endpoint for Pipeline-Parallel-FastAPI example 
							
						 
						
							2024-06-14 15:15:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0e7a31a09c 
								
							 
						 
						
							
							
								
								ChatGLM Examples Restructure regarding Installation Steps  ( #11285 )  
							
							 
							
							... 
							
							
							
							* merge install step in glm examples
* fix section
* fix section
* fix tiktoken 
							
						 
						
							2024-06-14 12:37:05 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								60cb1dac7c 
								
							 
						 
						
							
							
								
								Support PP for qwen1.5  ( #11300 )  
							
							 
							
							
							
						 
						
							2024-06-13 17:35:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f97cce2642 
								
							 
						 
						
							
							
								
								Fix import error of ds autotp ( #11307 )  
							
							 
							
							
							
						 
						
							2024-06-13 16:22:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								220151e2a1 
								
							 
						 
						
							
							
								
								Refactor pipeline parallel multi-stage implementation ( #11286 )  
							
							 
							
							
							
						 
						
							2024-06-13 10:00:23 +08:00