vllm update for glm-4 model automatic not_convert (#12003)

2024-09-04 13:50:32 +08:00 · 2024-09-04 13:50:32 +08:00 · 2b993ad479
commit 2b993ad479
parent 9eaff5e47d
1 changed files with 2 additions and 1 deletions
--- a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
+++ b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
@ -250,7 +250,8 @@ def get_load_function(low_bit):
            from ipex_llm import optimize_model
            import os
            not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
-            if not_convert_last_mlp is not None:
+            is_glm4_model = "glm-4" in self.model_config.model.lower()
            if not_convert_last_mlp is not None or is_glm4_model:
                # only use to avoid nan value in last mlp forward running glm4-9b-chat
                modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
            else: