Fix glm4-9b-chat nan error on vllm 0.3.3 (#11970)

* fix nan value

* update
This commit is contained in:
Wang, Jian4 2024-08-30 09:50:18 +08:00 committed by GitHub
parent 77b04efcc5
commit 7d103417b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -248,7 +248,15 @@ def get_load_function(low_bit):
parallel_config=self.parallel_config, parallel_config=self.parallel_config,
scheduler_config=self.scheduler_config) scheduler_config=self.scheduler_config)
from ipex_llm import optimize_model from ipex_llm import optimize_model
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype) import os
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
if not_convert_last_mlp is not None:
# only use to avoid nan value in last mlp forward running glm4-9b-chat
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
else:
modules = None
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
modules_to_not_convert=modules)
self.model = self.model.to(device=self.device_config.device, self.model = self.model.to(device=self.device_config.device,
dtype=self.model_config.dtype) dtype=self.model_config.dtype)