Fix glm4-9b-chat nan error on vllm 0.3.3 (#11970)
* fix nan value * update
This commit is contained in:
parent
77b04efcc5
commit
7d103417b8
1 changed files with 9 additions and 1 deletions
|
|
@ -248,7 +248,15 @@ def get_load_function(low_bit):
|
|||
parallel_config=self.parallel_config,
|
||||
scheduler_config=self.scheduler_config)
|
||||
from ipex_llm import optimize_model
|
||||
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype)
|
||||
import os
|
||||
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
|
||||
if not_convert_last_mlp is not None:
|
||||
# only use to avoid nan value in last mlp forward running glm4-9b-chat
|
||||
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
|
||||
else:
|
||||
modules = None
|
||||
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
|
||||
modules_to_not_convert=modules)
|
||||
self.model = self.model.to(device=self.device_config.device,
|
||||
dtype=self.model_config.dtype)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue