Update flashmoe_quickstart (#13154)
This commit is contained in:
		
							parent
							
								
									886c7632b2
								
							
						
					
					
						commit
						086a8b3ab9
					
				
					 3 changed files with 26 additions and 20 deletions
				
			
		
							
								
								
									
										17
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								README.md
									
									
									
									
									
								
							| 
						 | 
					@ -66,8 +66,8 @@ See demos of running local LLMs *on Intel Core Ultra iGPU, Intel Core Ultra NPU,
 | 
				
			||||||
  <tr>
 | 
					  <tr>
 | 
				
			||||||
    <td align="center" colspan="1"><strong>Intel Core Ultra iGPU</strong></td>
 | 
					    <td align="center" colspan="1"><strong>Intel Core Ultra iGPU</strong></td>
 | 
				
			||||||
    <td align="center" colspan="1"><strong>Intel Core Ultra NPU</strong></td>
 | 
					    <td align="center" colspan="1"><strong>Intel Core Ultra NPU</strong></td>
 | 
				
			||||||
    <td align="center" colspan="1"><strong>Intel Arc dGPU</strong></td>
 | 
					 | 
				
			||||||
    <td align="center" colspan="1"><strong>2-Card Intel Arc dGPUs</strong></td>
 | 
					    <td align="center" colspan="1"><strong>2-Card Intel Arc dGPUs</strong></td>
 | 
				
			||||||
 | 
					    <td align="center" colspan="1"><strong>Intel Xeon + Arc dGPU</strong></td>
 | 
				
			||||||
  </tr>
 | 
					  </tr>
 | 
				
			||||||
  <tr>
 | 
					  <tr>
 | 
				
			||||||
    <td>
 | 
					    <td>
 | 
				
			||||||
| 
						 | 
					@ -81,13 +81,13 @@ See demos of running local LLMs *on Intel Core Ultra iGPU, Intel Core Ultra NPU,
 | 
				
			||||||
      </a>
 | 
					      </a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td>
 | 
					    <td>
 | 
				
			||||||
      <a href="https://llm-assets.readthedocs.io/en/latest/_images/arc_llama3-8B_fp8_textwebui.gif" target="_blank">
 | 
					      <a href="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" target="_blank">
 | 
				
			||||||
        <img src="https://llm-assets.readthedocs.io/en/latest/_images/arc_llama3-8B_fp8_textwebui.gif" width=100%; />
 | 
					        <img src="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" width=100%; />
 | 
				
			||||||
      </a>
 | 
					      </a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td>
 | 
					    <td>
 | 
				
			||||||
      <a href="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" target="_blank">
 | 
					      <a href="https://llm-assets.readthedocs.io/en/latest/_images/FlashMoE-Qwen3-235B.gif" target="_blank">
 | 
				
			||||||
        <img src="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" width=100%; />
 | 
					        <img src="https://llm-assets.readthedocs.io/en/latest/_images/FlashMoE-Qwen3-235B.gif" width=100%; />
 | 
				
			||||||
      </a>
 | 
					      </a>
 | 
				
			||||||
    </td>    
 | 
					    </td>    
 | 
				
			||||||
  </tr>
 | 
					  </tr>
 | 
				
			||||||
| 
						 | 
					@ -99,11 +99,12 @@ See demos of running local LLMs *on Intel Core Ultra iGPU, Intel Core Ultra NPU,
 | 
				
			||||||
      <a href="docs/mddocs/Quickstart/npu_quickstart.md">HuggingFace <br> (Llama3.2-3B, SYM_INT4)</a>
 | 
					      <a href="docs/mddocs/Quickstart/npu_quickstart.md">HuggingFace <br> (Llama3.2-3B, SYM_INT4)</a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td align="center" width="25%">
 | 
					    <td align="center" width="25%">
 | 
				
			||||||
      <a href="docs/mddocs/Quickstart/webui_quickstart.md">TextGeneration-WebUI <br> (Llama3-8B, FP8) </a>
 | 
					      <a href="docs/mddocs/Quickstart/llamacpp_portable_zip_gpu_quickstart.md">llama.cpp <br> (DeepSeek-R1-Distill-Qwen-32B, Q4_K)</a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td align="center" width="25%">
 | 
					    <td align="center" width="25%">
 | 
				
			||||||
      <a href="docs/mddocs/Quickstart/llamacpp_portable_zip_gpu_quickstart.md">llama.cpp <br> (DeepSeek-R1-Distill-Qwen-32B, Q4_K)</a>
 | 
					      <a href="docs/mddocs/Quickstart/flashmoe_quickstart.md">FlashMoE <br> (Qwen3MoE-235B, Q4_K) </a>
 | 
				
			||||||
    </td>  </tr>
 | 
					    </td>
 | 
				
			||||||
 | 
					  </tr>
 | 
				
			||||||
</table>
 | 
					</table>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
<!--
 | 
					<!--
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -66,8 +66,8 @@
 | 
				
			||||||
  <tr>
 | 
					  <tr>
 | 
				
			||||||
    <td align="center" colspan="1"><strong>Intel Core Ultra iGPU</strong></td>
 | 
					    <td align="center" colspan="1"><strong>Intel Core Ultra iGPU</strong></td>
 | 
				
			||||||
    <td align="center" colspan="1"><strong>Intel Core Ultra NPU</strong></td>
 | 
					    <td align="center" colspan="1"><strong>Intel Core Ultra NPU</strong></td>
 | 
				
			||||||
    <td align="center" colspan="1"><strong>Intel Arc dGPU</strong></td>
 | 
					 | 
				
			||||||
    <td align="center" colspan="1"><strong>2-Card Intel Arc dGPUs</strong></td>
 | 
					    <td align="center" colspan="1"><strong>2-Card Intel Arc dGPUs</strong></td>
 | 
				
			||||||
 | 
					    <td align="center" colspan="1"><strong>Intel Xeon + Arc dGPU</strong></td>
 | 
				
			||||||
  </tr>
 | 
					  </tr>
 | 
				
			||||||
  <tr>
 | 
					  <tr>
 | 
				
			||||||
    <td>
 | 
					    <td>
 | 
				
			||||||
| 
						 | 
					@ -81,29 +81,30 @@
 | 
				
			||||||
      </a>
 | 
					      </a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td>
 | 
					    <td>
 | 
				
			||||||
      <a href="https://llm-assets.readthedocs.io/en/latest/_images/arc_llama3-8B_fp8_textwebui.gif" target="_blank">
 | 
					      <a href="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" target="_blank">
 | 
				
			||||||
        <img src="https://llm-assets.readthedocs.io/en/latest/_images/arc_llama3-8B_fp8_textwebui.gif" width=100%; />
 | 
					        <img src="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" width=100%; />
 | 
				
			||||||
      </a>
 | 
					      </a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td>
 | 
					    <td>
 | 
				
			||||||
      <a href="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" target="_blank">
 | 
					      <a href="https://llm-assets.readthedocs.io/en/latest/_images/FlashMoE-Qwen3-235B.gif" target="_blank">
 | 
				
			||||||
        <img src="https://llm-assets.readthedocs.io/en/latest/_images/2arc_DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gif" width=100%; />
 | 
					        <img src="https://llm-assets.readthedocs.io/en/latest/_images/FlashMoE-Qwen3-235B.gif" width=100%; />
 | 
				
			||||||
      </a>
 | 
					      </a>
 | 
				
			||||||
    </td>    
 | 
					    </td>    
 | 
				
			||||||
  </tr>
 | 
					  </tr>
 | 
				
			||||||
  <tr>
 | 
					  <tr>
 | 
				
			||||||
    <td align="center" width="25%">
 | 
					    <td align="center" width="25%">
 | 
				
			||||||
      <a href="docs/mddocs/Quickstart/ollama_portable_zip_quickstart.zh-CN.md">Ollama <br> (Mistral-7B, Q4_K) </a>
 | 
					      <a href="docs/mddocs/Quickstart/ollama_portable_zip_quickstart.md">Ollama <br> (Mistral-7B, Q4_K) </a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td align="center" width="25%">
 | 
					    <td align="center" width="25%">
 | 
				
			||||||
      <a href="docs/mddocs/Quickstart/npu_quickstart.md">HuggingFace <br> (Llama3.2-3B, SYM_INT4)</a>
 | 
					      <a href="docs/mddocs/Quickstart/npu_quickstart.md">HuggingFace <br> (Llama3.2-3B, SYM_INT4)</a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td align="center" width="25%">
 | 
					    <td align="center" width="25%">
 | 
				
			||||||
      <a href="docs/mddocs/Quickstart/webui_quickstart.md">TextGeneration-WebUI <br> (Llama3-8B, FP8) </a>
 | 
					      <a href="docs/mddocs/Quickstart/llamacpp_portable_zip_gpu_quickstart.md">llama.cpp <br> (DeepSeek-R1-Distill-Qwen-32B, Q4_K)</a>
 | 
				
			||||||
    </td>
 | 
					    </td>
 | 
				
			||||||
    <td align="center" width="25%">
 | 
					    <td align="center" width="25%">
 | 
				
			||||||
      <a href="docs/mddocs/Quickstart/llamacpp_portable_zip_gpu_quickstart.zh-CN.md">llama.cpp <br> (DeepSeek-R1-Distill-Qwen-32B, Q4_K)</a>
 | 
					      <a href="docs/mddocs/Quickstart/flashmoe_quickstart.md">FlashMoE <br> (Qwen3MoE-235B, Q4_K) </a>
 | 
				
			||||||
    </td>  </tr>
 | 
					    </td>
 | 
				
			||||||
 | 
					  </tr>
 | 
				
			||||||
</table>
 | 
					</table>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
<!--
 | 
					<!--
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,9 @@
 | 
				
			||||||
# FlashMoE
 | 
					# FlashMoE
 | 
				
			||||||
The `FlashMoe` support in `ipex-llm` allows you to run ***DeepSeek V3/R1 671B*** and ***Qwen3MoE 235B*** models with just 1 or 2 Intel Arc GPU.
 | 
					The `FlashMoe` support in `ipex-llm` allows you to run ***DeepSeek V3/R1 671B*** and ***Qwen3MoE 235B*** models with just 1 or 2 Intel Arc GPU (such as A770 and B580). 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the demo of running *Qwen3MoE 235B* model on 1 Arc A770 below.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<a href="https://llm-assets.readthedocs.io/en/latest/_images/FlashMoE-Qwen3-235B.gif"><img src="https://llm-assets.readthedocs.io/en/latest/_images/FlashMoE-Qwen3-235B.gif"/></a>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Install
 | 
					## Install
 | 
				
			||||||
### Prerequisites
 | 
					### Prerequisites
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue