diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py
index e312c73a..0eea6528 100644
--- a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py
+++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py
@@ -68,7 +68,7 @@ class Bloom(GenerationMixin):
         f16_kv: bool = True,
         logits_all: bool = False,
         vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
         use_mlock: bool = False,
         embedding: bool = False,
         n_threads: Optional[int] = 2,
@@ -127,7 +127,7 @@ class Bloom(GenerationMixin):
         self.verbose = verbose
         # TODO: Some parameters are temporarily not supported
         unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
-                           'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
+                           'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
                            'last_n_tokens_size': 64, 'lora_base': None,
                            'lora_path': None, 'verbose': True}
         for arg in unsupported_arg.keys():
diff --git a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py
index a4461b27..019e55ed 100644
--- a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py
+++ b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py
@@ -136,7 +136,7 @@ class Gptneox(GenerationMixin):
         f16_kv: bool = True,
         logits_all: bool = False,
         vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
         use_mlock: bool = False,
         embedding: bool = False,
         n_threads: Optional[int] = 2,
diff --git a/python/llm/src/bigdl/llm/ggml/model/llama/llama.py b/python/llm/src/bigdl/llm/ggml/model/llama/llama.py
index aafd87fc..aaf60fa9 100644
--- a/python/llm/src/bigdl/llm/ggml/model/llama/llama.py
+++ b/python/llm/src/bigdl/llm/ggml/model/llama/llama.py
@@ -134,7 +134,7 @@ class Llama(GenerationMixin):
         f16_kv: bool = True,
         logits_all: bool = False,
         vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
         use_mlock: bool = False,
         embedding: bool = False,
         n_threads: Optional[int] = 2,
diff --git a/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder.py b/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder.py
index ed00177f..e40fc991 100644
--- a/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder.py
+++ b/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder.py
@@ -69,7 +69,7 @@ class Starcoder(GenerationMixin):
         f16_kv: bool = True,
         logits_all: bool = False,
         vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
         use_mlock: bool = False,
         embedding: bool = False,
         n_threads: Optional[int] = 2,
@@ -128,7 +128,7 @@ class Starcoder(GenerationMixin):
         self.verbose = verbose
         # TODO: Some parameters are temporarily not supported
         unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
-                           'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
+                           'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
                            'last_n_tokens_size': 64, 'lora_base': None,
                            'lora_path': None, 'verbose': True}
         for arg in unsupported_arg.keys():