LLM: disable mmap by default for better performance (#8467)

2023-07-11 09:26:26 +08:00 · 2023-07-11 09:26:26 +08:00 · db39d0a6b3
commit db39d0a6b3
parent 52c6b057d6
4 changed files with 6 additions and 6 deletions
--- a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py
+++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py
@ -68,7 +68,7 @@ class Bloom(GenerationMixin):
        f16_kv: bool = True,
        logits_all: bool = False,
        vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
        use_mlock: bool = False,
        embedding: bool = False,
        n_threads: Optional[int] = 2,
@ -127,7 +127,7 @@ class Bloom(GenerationMixin):
        self.verbose = verbose
        # TODO: Some parameters are temporarily not supported
        unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
-                           'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
+                           'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
                           'last_n_tokens_size': 64, 'lora_base': None,
                           'lora_path': None, 'verbose': True}
        for arg in unsupported_arg.keys():
--- a/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py
+++ b/python/llm/src/bigdl/llm/ggml/model/gptneox/gptneox.py
@ -136,7 +136,7 @@ class Gptneox(GenerationMixin):
        f16_kv: bool = True,
        logits_all: bool = False,
        vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
        use_mlock: bool = False,
        embedding: bool = False,
        n_threads: Optional[int] = 2,
--- a/python/llm/src/bigdl/llm/ggml/model/llama/llama.py
+++ b/python/llm/src/bigdl/llm/ggml/model/llama/llama.py
@ -134,7 +134,7 @@ class Llama(GenerationMixin):
        f16_kv: bool = True,
        logits_all: bool = False,
        vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
        use_mlock: bool = False,
        embedding: bool = False,
        n_threads: Optional[int] = 2,
--- a/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder.py
+++ b/python/llm/src/bigdl/llm/ggml/model/starcoder/starcoder.py
@ -69,7 +69,7 @@ class Starcoder(GenerationMixin):
        f16_kv: bool = True,
        logits_all: bool = False,
        vocab_only: bool = False,
-        use_mmap: bool = True,
+        use_mmap: bool = False,
        use_mlock: bool = False,
        embedding: bool = False,
        n_threads: Optional[int] = 2,
@ -128,7 +128,7 @@ class Starcoder(GenerationMixin):
        self.verbose = verbose
        # TODO: Some parameters are temporarily not supported
        unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
-                           'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
+                           'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
                           'last_n_tokens_size': 64, 'lora_base': None,
                           'lora_path': None, 'verbose': True}
        for arg in unsupported_arg.keys():