Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aa861df066 
								
							 
						 
						
							
							
								
								use new fp32 softmax kernel ( #11776 )  
							
							 
							
							
							
						 
						
							2024-08-13 14:48:11 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									binbin Deng 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								23d3acdc77 
								
							 
						 
						
							
							
								
								Add experimental support of fused decoder layer for llama2 ( #11768 )  
							
							 
							
							
							
						 
						
							2024-08-13 14:41:36 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c28b3389e6 
								
							 
						 
						
							
							
								
								Update npu multimodal example ( #11773 )  
							
							 
							
							
							
						 
						
							2024-08-13 14:14:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								81824ff8c9 
								
							 
						 
						
							
							
								
								Fix stdout in all-in-one benchmark to utf-8 ( #11772 )  
							
							 
							
							
							
						 
						
							2024-08-13 10:51:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a1eb793f70 
								
							 
						 
						
							
							
								
								optimize minicpm v 2_6 firs token perf ( #11770 )  
							
							 
							
							
							
						 
						
							2024-08-13 09:51:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								841dbcdf3a 
								
							 
						 
						
							
							
								
								Fix compresskv with lookahead issue ( #11767 )  
							
							 
							
							... 
							
							
							
							* fix compresskv + lookahead attn_mask qwen2
* support llama chatglm
* support mistral & chatglm
* address comments
* revert run.py 
							
						 
						
							2024-08-12 18:53:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f97a77ea4e 
								
							 
						 
						
							
							
								
								Update all-in-one benchmark for continuation task input preparation ( #11760 )  
							
							 
							
							... 
							
							
							
							* All use 8192.txt for prompt preparation for now
* Small fix
* Fix text encoding mode to utf-8
* Small update 
							
						 
						
							2024-08-12 17:49:45 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xu, Shuo 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1b05caba2b 
								
							 
						 
						
							
							
								
								Set mistral fuse rope to false except fp6 & fp16 ( #11765 )  
							
							 
							
							... 
							
							
							
							* set mistral fuse rope to false except fp6 & fp16
* lint
* lint
---------
Co-authored-by: ATMxsp01 <shou.xu@intel.com> 
							
						 
						
							2024-08-12 17:25:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8db34057b4 
								
							 
						 
						
							
							
								
								optimize lookahead init time ( #11769 )  
							
							 
							
							
							
						 
						
							2024-08-12 17:19:12 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								05989ad0f9 
								
							 
						 
						
							
							
								
								Update npu example and all in one benckmark ( #11766 )  
							
							 
							
							
							
						 
						
							2024-08-12 16:46:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								57d177738d 
								
							 
						 
						
							
							
								
								optimize minicpm-v-2_6 repetition penalty ( #11763 )  
							
							 
							
							
							
						 
						
							2024-08-12 14:10:10 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								fac4c01a6e 
								
							 
						 
						
							
							
								
								Revert to use out-of-tree GPU driver  ( #11761 )  
							
							 
							
							... 
							
							
							
							* Revert to use out-of-tree GPU driver since the performance with out-of-tree driver is better than upsteam's
* add spaces
* add troubleshooting case
* update Troubleshooting 
							
						 
						
							2024-08-12 13:41:47 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								245dba0abc 
								
							 
						 
						
							
							
								
								Fix lightweight-serving codegeex error ( #11759 )  
							
							 
							
							
							
						 
						
							2024-08-12 10:35:37 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								66fe2ee464 
								
							 
						 
						
							
							
								
								initial support of IPEX_LLM_PERFORMANCE_MODE  ( #11754 )  
							
							 
							
							... 
							
							
							
							* add perf mode
* update
* fix style 
							
						 
						
							2024-08-09 19:04:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4b9c57cc60 
								
							 
						 
						
							
							
								
								Support compress kv with lookahead ( #11752 )  
							
							 
							
							... 
							
							
							
							* support compress kv with lookahead
* enough kv miss param 
							
						 
						
							2024-08-09 17:39:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								93455aac09 
								
							 
						 
						
							
							
								
								fix minicpm V 2.6 repeat output ( #11753 )  
							
							 
							
							
							
						 
						
							2024-08-09 17:39:24 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7e917d6cfb 
								
							 
						 
						
							
							
								
								fix gptq of llama ( #11749 )  
							
							 
							
							... 
							
							
							
							* fix gptq of llama
* small fix 
							
						 
						
							2024-08-09 16:39:25 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								dd46c141bd 
								
							 
						 
						
							
							
								
								Phi3 support compresskv ( #11733 )  
							
							 
							
							... 
							
							
							
							* phi3 support compresskv
* fix phi3 mtl error
* fix conflict with quant kv
* fix abnormal on mtl
* fix style
* use slide windows size to compress kv
* support sliding window
* fix style
* fix style
* temp: partial support quant kv
* support quant kv with compress kv, todo: model check
* temp
* fix style
* fix style
* remove prepare
* address comment
* default -> 1.8k 
							
						 
						
							2024-08-09 15:43:43 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d8808cc2e3 
								
							 
						 
						
							
							
								
								Mistral apply_rotary_pos_emb_no_cache_xpu use rope_theta from config ( #11747 )  
							
							 
							
							... 
							
							
							
							mistral-7B-instruct-v0.2 and mistral-7B-instruct-v0.1 use different rope_theta (0.2 is 1e, 0.1 is 1e5). Pass self.config.rope_theta to apply_rotary_pos_emb_no_cache_xpu to avoid output difference. 
							
						 
						
							2024-08-09 10:35:51 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								044e486480 
								
							 
						 
						
							
							
								
								Fix vLLM CPU /chat endpoint ( #11748 )  
							
							 
							
							
							
						 
						
							2024-08-09 10:33:52 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								27b4b104ed 
								
							 
						 
						
							
							
								
								Add qwen2-1.5b-instruct into igpu performance ( #11735 )  
							
							 
							
							... 
							
							
							
							* updated qwen1.5B to all transformer==4.37 yaml
* updated qwen1.5B to all transformer==4.37 yaml 
							
						 
						
							2024-08-08 16:42:18 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Shaojun Liu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								107f7aafd0 
								
							 
						 
						
							
							
								
								enable inference mode for deepspeed tp serving ( #11742 )  
							
							 
							
							
							
						 
						
							2024-08-08 14:38:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9e65cf00b3 
								
							 
						 
						
							
							
								
								Add openai-whisper pytorch gpu ( #11736 )  
							
							 
							
							... 
							
							
							
							* Add openai-whisper pytorch gpu
* Update README.md
* Update README.md
* fix typo
* fix names update readme
* Update README.md 
							
						 
						
							2024-08-08 12:32:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yuwen Hu 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7e61fa1af7 
								
							 
						 
						
							
							
								
								Revise GPU driver related guide in for Windows users ( #11740 )  
							
							 
							
							
							
						 
						
							2024-08-08 11:26:26 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jinhe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d0c89fb715 
								
							 
						 
						
							
							
								
								updated llama.cpp and ollama quickstart ( #11732 )  
							
							 
							
							... 
							
							
							
							* updated llama.cpp and ollama quickstart.md
* added qwen2-1.5B sample output
* revision on quickstart updates
* revision on quickstart updates
* revision on qwen2 readme
* added 2 troubleshoots“
”
* troubleshoot revision 
							
						 
						
							2024-08-08 11:04:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								54cc9353db 
								
							 
						 
						
							
							
								
								support and optimize minicpm-v-2_6 ( #11738 )  
							
							 
							
							
							
						 
						
							2024-08-07 18:21:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e956e71fc1 
								
							 
						 
						
							
							
								
								fix conflict with quant kv ( #11737 )  
							
							 
							
							
							
						 
						
							2024-08-07 18:10:30 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								00a5574c8a 
								
							 
						 
						
							
							
								
								Use merge_qkv to replace fused_qkv for llama2 ( #11727 )  
							
							 
							
							... 
							
							
							
							* update 4.38
* support new versions
* update
* fix style
* fix style
* update rope
* temp test sdpa
* fix style
* fix cpu ut 
							
						 
						
							2024-08-07 18:04:01 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d2abc9711b 
								
							 
						 
						
							
							
								
								Fix MTL 4k input qwen2 compresskv error ( #11734 )  
							
							 
							
							... 
							
							
							
							* fix
* fix style 
							
						 
						
							2024-08-07 16:21:57 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a71ae7c22b 
								
							 
						 
						
							
							
								
								Support minicpm compresskv & modify default compresskv config & default enable compresskv on mtl 2.5k~4.5k ( #11726 )  
							
							 
							
							... 
							
							
							
							* support minicpm & modify default & default enable on mtl 2.5k~4.5k
* fix style 
							
						 
						
							2024-08-07 11:35:39 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								c093f7d980 
								
							 
						 
						
							
							
								
								fix phi3 ( #11729 )  
							
							 
							
							
							
						 
						
							2024-08-07 09:39:46 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e32d13d78c 
								
							 
						 
						
							
							
								
								Remove Out of tree Driver from GPU driver installation document ( #11728 )  
							
							 
							
							... 
							
							
							
							GPU drivers are already upstreamed to Kernel 6.2+. Remove the out-of-tree driver (intel-i915-dkms) for 6.2-6.5. https://dgpu-docs.intel.com/driver/kernel-driver-types.html#gpu-driver-support 
* Remove intel-i915-dkms intel-fw-gpu (only for kernel 5.19) 
							
						 
						
							2024-08-07 09:38:19 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e7f7141781 
								
							 
						 
						
							
							
								
								Add benchmark util for transformers 4.42 ( #11725 )  
							
							 
							
							... 
							
							
							
							* add new benchmark_util.py
Add new benchmark_util.py for transformers>=4.43.1. The old one renamed to benchmark_util_prev.py.
* Small fix to import code
* Update __init__.py
* fix file names
* Update lint-python
Update lint-python to exclude benchmark_util_4_29.py
benchmark_util_4_43.py
* Update benchmark_util_4_43.py
* add benchmark_util for transformers 4.42 
							
						 
						
							2024-08-07 08:48:07 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ch1y0q 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4676af2054 
								
							 
						 
						
							
							
								
								add gemma2 example ( #11724 )  
							
							 
							
							... 
							
							
							
							* add `gemma2`
* update `transformers` version
* update `README.md` 
							
						 
						
							2024-08-06 21:17:50 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									SichengStevenLi 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								985213614b 
								
							 
						 
						
							
							
								
								Removed no longer needed models for Arc nightly perf  ( #11722 )  
							
							 
							
							... 
							
							
							
							* removed LLMs that are no longer needed
Removed: 
mistralai/Mistral-7B-v0.1
deepseek-ai/deepseek-coder-6.7b-instruct
* Update arc-perf-test-batch4.yaml
Removed: 
deepseek-ai/deepseek-coder-6.7b-instruct
mistralai/Mistral-7B-v0.1
* Update arc-perf-test.yaml
Removed: 
deepseek-ai/deepseek-coder-6.7b-instruct
mistralai/Mistral-7B-v0.1
* Create arc-perf-transformers-438.yaml
* Moved arc-perf-transformers-438.yaml location
* Create arc-perf-transformers-438-batch2.yaml
* Create arc-perf-transformers-438-batch4.yaml
* Delete python/llm/test/benchmark/arc-perf-transformers-438-batch2.yaml
* Delete python/llm/test/benchmark/arc-perf-transformers-438-batch4.yaml
* Delete python/llm/test/benchmark/arc-perf-transformers-438.yaml 
							
						 
						
							2024-08-06 16:12:00 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								929675aa6b 
								
							 
						 
						
							
							
								
								support latest phi3 ( #11721 )  
							
							 
							
							
							
						 
						
							2024-08-06 15:52:55 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								11650b6f81 
								
							 
						 
						
							
							
								
								upgrade glm-4v example transformers version ( #11719 )  
							
							 
							
							
							
						 
						
							2024-08-06 14:55:09 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								bbdff6edeb 
								
							 
						 
						
							
							
								
								optimize internvl2 4b performance ( #11720 )  
							
							 
							
							
							
						 
						
							2024-08-06 14:25:08 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yishuo Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f44b732aa8 
								
							 
						 
						
							
							
								
								support internvl2-4b ( #11718 )  
							
							 
							
							
							
						 
						
							2024-08-06 13:36:32 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7f241133da 
								
							 
						 
						
							
							
								
								Add MiniCPM-Llama3-V-2_5 GPU example ( #11693 )  
							
							 
							
							... 
							
							
							
							* Add MiniCPM-Llama3-V-2_5 GPU example
* fix 
							
						 
						
							2024-08-06 10:22:41 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Jin, Qiao 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								808d9a7bae 
								
							 
						 
						
							
							
								
								Add MiniCPM-V-2 GPU example ( #11699 )  
							
							 
							
							... 
							
							
							
							* Add MiniCPM-V-2 GPU example
* add example in README.md
* add example in README.md 
							
						 
						
							2024-08-06 10:22:33 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Zijie Li 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8fb36b9f4a 
								
							 
						 
						
							
							
								
								add new benchmark_util.py ( #11713 )  
							
							 
							
							... 
							
							
							
							* add new benchmark_util.py 
							
						 
						
							2024-08-05 16:18:48 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Wang, Jian4 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								493cbd9a36 
								
							 
						 
						
							
							
								
								Support  lightweight-serving with internlm-xcomposer2-vl-7b multimodal  input ( #11703 )  
							
							 
							
							... 
							
							
							
							* init image_list
* enable internlm-xcomposer2 image input
* update style
* add readme
* update model
* update readme 
							
						 
						
							2024-08-05 09:36:04 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								aa98ef96fe 
								
							 
						 
						
							
							
								
								change mixed_precision to q6_k ( #11706 )  
							
							 
							
							
							
						 
						
							2024-08-02 15:55:16 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Xiangyu Tian 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								1baa3efe0e 
								
							 
						 
						
							
							
								
								Optimizations for Pipeline Parallel Serving ( #11702 )  
							
							 
							
							... 
							
							
							
							Optimizations for Pipeline Parallel Serving 
							
						 
						
							2024-08-02 12:06:59 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8d1e0bd2f4 
								
							 
						 
						
							
							
								
								add sdp causal support in llama ( #11705 )  
							
							 
							
							
							
						 
						
							2024-08-02 10:27:40 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Ruonan Wang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								736a7ef72e 
								
							 
						 
						
							
							
								
								add sdp_causal for mistral 4.36 ( #11686 )  
							
							 
							
							... 
							
							
							
							* add sdp_causal for mistral
* fix
* update 
							
						 
						
							2024-08-01 18:57:31 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Yina Chen 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								45c730ff39 
								
							 
						 
						
							
							
								
								Chatglm support compresskv ( #11690 )  
							
							 
							
							... 
							
							
							
							* chatglm4 support compresskv
* fix
* fix style
* support chatglm2
* fix quantkv conflict
* fix style 
							
						 
						
							2024-08-01 18:20:20 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									Qiyuan Gong 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								762ad49362 
								
							 
						 
						
							
							
								
								Add RANK_WAIT_TIME into DeepSpeed-AutoTP to avoid CPU memory OOM ( #11704 )  
							
							 
							
							... 
							
							
							
							* DeepSpeed-AutoTP will start multiple processors to load models and convert them in CPU memory. If model/rank_num is large, this will lead to OOM. Add RANK_WAIT_TIME to reduce memory usage by controlling model reading parallelism. 
							
						 
						
							2024-08-01 18:16:21 +08:00  
						
						
							 
							
							
								 
							 
							
						 
					 
				
					
						
							
								
								
									 
									hxsz1997 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								8ef4caaf5d 
								
							 
						 
						
							
							
								
								add 3k and 4k input of nightly perf test on iGPU ( #11701 )  
							
							 
							
							... 
							
							
							
							* Add 3k&4k input in workflow for iGPU (#11685 )
* add 3k&4k input in workflow
* comment for test
* comment models for accelarate test
* remove OOM models
* modify typo
* change test model (#11696 )
* reverse test models (#11700 ) 
							
						 
						
							2024-08-01 14:17:46 +08:00