vllm update for glm-4 model automatic not_convert (#12003)
This commit is contained in:
parent
9eaff5e47d
commit
2b993ad479
1 changed files with 2 additions and 1 deletions
|
|
@ -250,7 +250,8 @@ def get_load_function(low_bit):
|
||||||
from ipex_llm import optimize_model
|
from ipex_llm import optimize_model
|
||||||
import os
|
import os
|
||||||
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
|
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
|
||||||
if not_convert_last_mlp is not None:
|
is_glm4_model = "glm-4" in self.model_config.model.lower()
|
||||||
|
if not_convert_last_mlp is not None or is_glm4_model:
|
||||||
# only use to avoid nan value in last mlp forward running glm4-9b-chat
|
# only use to avoid nan value in last mlp forward running glm4-9b-chat
|
||||||
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
|
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue