diff --git a/python/llm/src/bigdl/llm/transformers/models/baichuan2.py b/python/llm/src/bigdl/llm/transformers/models/baichuan2.py index 802b8871..8cd48ec4 100644 --- a/python/llm/src/bigdl/llm/transformers/models/baichuan2.py +++ b/python/llm/src/bigdl/llm/transformers/models/baichuan2.py @@ -54,6 +54,8 @@ def baichuan_13b_rms_norm_forward(self, hidden_states): self.epsilon) # if nelement == 0, means fused norm failed, go back to python implement. if result.nelement != 0: + # We should copy this result to avoid by unknown reason on Arc GPUs. + result = result.clone() return result input_dtype = hidden_states.dtype hidden_states = hidden_states.to(torch.float32) diff --git a/python/llm/src/bigdl/llm/transformers/models/chatglm2.py b/python/llm/src/bigdl/llm/transformers/models/chatglm2.py index 60213df8..92dfca03 100644 --- a/python/llm/src/bigdl/llm/transformers/models/chatglm2.py +++ b/python/llm/src/bigdl/llm/transformers/models/chatglm2.py @@ -88,6 +88,8 @@ def chatglm_rms_norm_forward(self, hidden_states): self.eps) # if nelement == 0, means fused norm failed, go back to python implement. if result.nelement != 0: + # We should copy this result to avoid by unknown reason on Arc GPUs. + result = result.clone() return result input_dtype = hidden_states.dtype hidden_states = hidden_states.to(torch.float32) diff --git a/python/llm/src/bigdl/llm/transformers/models/llama.py b/python/llm/src/bigdl/llm/transformers/models/llama.py index 836bc2bb..2d51af8c 100644 --- a/python/llm/src/bigdl/llm/transformers/models/llama.py +++ b/python/llm/src/bigdl/llm/transformers/models/llama.py @@ -91,6 +91,8 @@ def llama_rms_norm_forward(self, hidden_states): self.variance_epsilon) # if nelement == 0, means fused norm failed, go back to python implement. if result.nelement != 0: + # We should copy this result to avoid by unknown reason on Arc GPUs. + result = result.clone() return result input_dtype = hidden_states.dtype hidden_states = hidden_states.to(torch.float32)