add vLLM glm4 fix (#12474)

This commit is contained in:
Guancheng Fu 2024-12-02 14:05:16 +08:00 committed by GitHub
parent 4b6c3160be
commit 59bd4a214f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -94,9 +94,7 @@ def get_load_function(low_bit):
from ipex_llm import optimize_model
import os
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
is_glm4_model = "glm-4" in self.model_config.model.lower()
is_codegeex4_model = "codegeex4-all" in self.model_config.model.lower()
if not_convert_last_mlp is not None or is_glm4_model or is_codegeex4_model:
if not_convert_last_mlp is not None:
# only use to avoid nan value in last mlp forward running glm4-9b-chat
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
else: