LLM: disable mmap by default for better performance (#8467)

This commit is contained in:
Yishuo Wang 2023-07-11 09:26:26 +08:00 committed by GitHub
parent 52c6b057d6
commit db39d0a6b3
4 changed files with 6 additions and 6 deletions

View file

@ -68,7 +68,7 @@ class Bloom(GenerationMixin):
f16_kv: bool = True,
logits_all: bool = False,
vocab_only: bool = False,
use_mmap: bool = True,
use_mmap: bool = False,
use_mlock: bool = False,
embedding: bool = False,
n_threads: Optional[int] = 2,
@ -127,7 +127,7 @@ class Bloom(GenerationMixin):
self.verbose = verbose
# TODO: Some parameters are temporarily not supported
unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
'last_n_tokens_size': 64, 'lora_base': None,
'lora_path': None, 'verbose': True}
for arg in unsupported_arg.keys():

View file

@ -136,7 +136,7 @@ class Gptneox(GenerationMixin):
f16_kv: bool = True,
logits_all: bool = False,
vocab_only: bool = False,
use_mmap: bool = True,
use_mmap: bool = False,
use_mlock: bool = False,
embedding: bool = False,
n_threads: Optional[int] = 2,

View file

@ -134,7 +134,7 @@ class Llama(GenerationMixin):
f16_kv: bool = True,
logits_all: bool = False,
vocab_only: bool = False,
use_mmap: bool = True,
use_mmap: bool = False,
use_mlock: bool = False,
embedding: bool = False,
n_threads: Optional[int] = 2,

View file

@ -69,7 +69,7 @@ class Starcoder(GenerationMixin):
f16_kv: bool = True,
logits_all: bool = False,
vocab_only: bool = False,
use_mmap: bool = True,
use_mmap: bool = False,
use_mlock: bool = False,
embedding: bool = False,
n_threads: Optional[int] = 2,
@ -128,7 +128,7 @@ class Starcoder(GenerationMixin):
self.verbose = verbose
# TODO: Some parameters are temporarily not supported
unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
'last_n_tokens_size': 64, 'lora_base': None,
'lora_path': None, 'verbose': True}
for arg in unsupported_arg.keys():