LLM: Fix yi-6b fp16 error on pvc (#10781)

* updat for yi fp16

* update

* update
This commit is contained in:
Wang, Jian4 2024-04-17 16:49:59 +08:00 committed by GitHub
parent 0646e2c062
commit a20271ffe4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -547,7 +547,7 @@ def llama_attention_forward_4_31_original(
value_states = torch.cat(value_states, dim=-1) value_states = torch.cat(value_states, dim=-1)
else: else:
if fp16_fusion_check(self.q_proj, hidden_states, self.training) and \ if fp16_fusion_check(self.q_proj, hidden_states, self.training) and \
hidden_size == 4096: hidden_size == 4096 and self.q_proj.out_features == self.k_proj.out_features:
# only use mm_qkv_out on pvc for llama-7b # only use mm_qkv_out on pvc for llama-7b
if not hasattr(self, "qkv_proj_weight"): if not hasattr(self, "qkv_proj_weight"):
self.qkv_proj_weight = torch.stack([self.q_proj.weight, self.qkv_proj_weight = torch.stack([self.q_proj.weight,
@ -1200,7 +1200,7 @@ def llama_attention_forward_4_36_original(
value_states = torch.cat(value_states, dim=-1) value_states = torch.cat(value_states, dim=-1)
else: else:
if fp16_fusion_check(self.q_proj, hidden_states, self.training) and \ if fp16_fusion_check(self.q_proj, hidden_states, self.training) and \
hidden_size == 4096: hidden_size == 4096 and self.q_proj.out_features == self.k_proj.out_features:
# only use mm_qkv_out on pvc for llama-7b # only use mm_qkv_out on pvc for llama-7b
if not hasattr(self, "qkv_proj_weight"): if not hasattr(self, "qkv_proj_weight"):
self.qkv_proj_weight = torch.stack([self.q_proj.weight, self.qkv_proj_weight = torch.stack([self.q_proj.weight,