LLM: disable mmap by default for better performance (#8467)
This commit is contained in:
parent
52c6b057d6
commit
db39d0a6b3
4 changed files with 6 additions and 6 deletions
|
|
@ -68,7 +68,7 @@ class Bloom(GenerationMixin):
|
||||||
f16_kv: bool = True,
|
f16_kv: bool = True,
|
||||||
logits_all: bool = False,
|
logits_all: bool = False,
|
||||||
vocab_only: bool = False,
|
vocab_only: bool = False,
|
||||||
use_mmap: bool = True,
|
use_mmap: bool = False,
|
||||||
use_mlock: bool = False,
|
use_mlock: bool = False,
|
||||||
embedding: bool = False,
|
embedding: bool = False,
|
||||||
n_threads: Optional[int] = 2,
|
n_threads: Optional[int] = 2,
|
||||||
|
|
@ -127,7 +127,7 @@ class Bloom(GenerationMixin):
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
# TODO: Some parameters are temporarily not supported
|
# TODO: Some parameters are temporarily not supported
|
||||||
unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
|
unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
|
||||||
'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
|
'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
|
||||||
'last_n_tokens_size': 64, 'lora_base': None,
|
'last_n_tokens_size': 64, 'lora_base': None,
|
||||||
'lora_path': None, 'verbose': True}
|
'lora_path': None, 'verbose': True}
|
||||||
for arg in unsupported_arg.keys():
|
for arg in unsupported_arg.keys():
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,7 @@ class Gptneox(GenerationMixin):
|
||||||
f16_kv: bool = True,
|
f16_kv: bool = True,
|
||||||
logits_all: bool = False,
|
logits_all: bool = False,
|
||||||
vocab_only: bool = False,
|
vocab_only: bool = False,
|
||||||
use_mmap: bool = True,
|
use_mmap: bool = False,
|
||||||
use_mlock: bool = False,
|
use_mlock: bool = False,
|
||||||
embedding: bool = False,
|
embedding: bool = False,
|
||||||
n_threads: Optional[int] = 2,
|
n_threads: Optional[int] = 2,
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ class Llama(GenerationMixin):
|
||||||
f16_kv: bool = True,
|
f16_kv: bool = True,
|
||||||
logits_all: bool = False,
|
logits_all: bool = False,
|
||||||
vocab_only: bool = False,
|
vocab_only: bool = False,
|
||||||
use_mmap: bool = True,
|
use_mmap: bool = False,
|
||||||
use_mlock: bool = False,
|
use_mlock: bool = False,
|
||||||
embedding: bool = False,
|
embedding: bool = False,
|
||||||
n_threads: Optional[int] = 2,
|
n_threads: Optional[int] = 2,
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ class Starcoder(GenerationMixin):
|
||||||
f16_kv: bool = True,
|
f16_kv: bool = True,
|
||||||
logits_all: bool = False,
|
logits_all: bool = False,
|
||||||
vocab_only: bool = False,
|
vocab_only: bool = False,
|
||||||
use_mmap: bool = True,
|
use_mmap: bool = False,
|
||||||
use_mlock: bool = False,
|
use_mlock: bool = False,
|
||||||
embedding: bool = False,
|
embedding: bool = False,
|
||||||
n_threads: Optional[int] = 2,
|
n_threads: Optional[int] = 2,
|
||||||
|
|
@ -128,7 +128,7 @@ class Starcoder(GenerationMixin):
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
# TODO: Some parameters are temporarily not supported
|
# TODO: Some parameters are temporarily not supported
|
||||||
unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
|
unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
|
||||||
'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
|
'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
|
||||||
'last_n_tokens_size': 64, 'lora_base': None,
|
'last_n_tokens_size': 64, 'lora_base': None,
|
||||||
'lora_path': None, 'verbose': True}
|
'lora_path': None, 'verbose': True}
|
||||||
for arg in unsupported_arg.keys():
|
for arg in unsupported_arg.keys():
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue