From 6320bf201ec761c3452bd0fb3a7129b4081580b4 Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Thu, 13 Jul 2023 17:08:08 +0800 Subject: [PATCH] LLM: fix memory access violation (#8519) --- python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py | 2 +- python/llm/src/bigdl/llm/transformers/linear_quant.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py b/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py index c7eef289..245f613a 100644 --- a/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py +++ b/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py @@ -984,7 +984,7 @@ def ggml_type_size(qtype: ctypes.c_int) -> int: _lib.ggml_type_size.argtypes = [ ctypes.c_int, ] -_lib.ggml_type_size.restype = ctypes.c_int +_lib.ggml_type_size.restype = ctypes.c_size_t def ggml_qk_size(qtype: ctypes.c_int) -> int: diff --git a/python/llm/src/bigdl/llm/transformers/linear_quant.py b/python/llm/src/bigdl/llm/transformers/linear_quant.py index 7b19c224..622e7ea6 100644 --- a/python/llm/src/bigdl/llm/transformers/linear_quant.py +++ b/python/llm/src/bigdl/llm/transformers/linear_quant.py @@ -153,13 +153,13 @@ def ggml_matmul_src1_x_src0_t(src0: torch.Tensor, if src1.dtype != torch.float32: src1 = src1.float() - src0_ptr = src0.data_ptr() + (src0.storage_offset() * src0.element_size()) - src1_ptr = src1.data_ptr() + (src1.storage_offset() * src1.element_size()) + src0_ptr = src0.data_ptr() + src1_ptr = src1.data_ptr() result_shape = (src1.shape[0], src0_shape[0]) result_t = torch.empty(result_shape, dtype=torch.float32) - result_ptr = result_t.data_ptr() + (result_t.storage_offset() * result_t.element_size()) + result_ptr = result_t.data_ptr() src0_shape = tuple(reversed(src0_shape)) src1_shape = tuple(reversed(src1.shape))