Add moe_softmax_topk (#13157)

* add moe_softmax_topk * address comments * update
2025-05-13 14:50:59 +08:00 · 2025-05-13 14:50:59 +08:00 · f6441b4e3d
commit f6441b4e3d
parent aa12f69bbf
1 changed files with 8 additions and 0 deletions
--- a/python/llm/src/ipex_llm/transformers/models/common.py
+++ b/python/llm/src/ipex_llm/transformers/models/common.py
@ -365,3 +365,11 @@ def rotary_half_with_cache_inplaced(query_states: torch.Tensor, key_states: torc
    from ipex_llm.transformers.models.utils import make_cache_contiguous_inplaced
    make_cache_contiguous_inplaced(cos, sin)
    xe_addons.rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)
+
+
+def moe_softmax_topk(router_logits: torch.Tensor, top_k: int, norm_topk_prob: bool):
+    import xe_addons
+    selected_experts, routing_weights = xe_addons.moe_softmax_topk(
+        router_logits, top_k, norm_topk_prob
+    )
+    return selected_experts, routing_weights