LLM: disable mmap by default for better performance (#8467)
This commit is contained in:
		
							parent
							
								
									52c6b057d6
								
							
						
					
					
						commit
						db39d0a6b3
					
				
					 4 changed files with 6 additions and 6 deletions
				
			
		| 
						 | 
				
			
			@ -68,7 +68,7 @@ class Bloom(GenerationMixin):
 | 
			
		|||
        f16_kv: bool = True,
 | 
			
		||||
        logits_all: bool = False,
 | 
			
		||||
        vocab_only: bool = False,
 | 
			
		||||
        use_mmap: bool = True,
 | 
			
		||||
        use_mmap: bool = False,
 | 
			
		||||
        use_mlock: bool = False,
 | 
			
		||||
        embedding: bool = False,
 | 
			
		||||
        n_threads: Optional[int] = 2,
 | 
			
		||||
| 
						 | 
				
			
			@ -127,7 +127,7 @@ class Bloom(GenerationMixin):
 | 
			
		|||
        self.verbose = verbose
 | 
			
		||||
        # TODO: Some parameters are temporarily not supported
 | 
			
		||||
        unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
 | 
			
		||||
                           'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
 | 
			
		||||
                           'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
 | 
			
		||||
                           'last_n_tokens_size': 64, 'lora_base': None,
 | 
			
		||||
                           'lora_path': None, 'verbose': True}
 | 
			
		||||
        for arg in unsupported_arg.keys():
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -136,7 +136,7 @@ class Gptneox(GenerationMixin):
 | 
			
		|||
        f16_kv: bool = True,
 | 
			
		||||
        logits_all: bool = False,
 | 
			
		||||
        vocab_only: bool = False,
 | 
			
		||||
        use_mmap: bool = True,
 | 
			
		||||
        use_mmap: bool = False,
 | 
			
		||||
        use_mlock: bool = False,
 | 
			
		||||
        embedding: bool = False,
 | 
			
		||||
        n_threads: Optional[int] = 2,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -134,7 +134,7 @@ class Llama(GenerationMixin):
 | 
			
		|||
        f16_kv: bool = True,
 | 
			
		||||
        logits_all: bool = False,
 | 
			
		||||
        vocab_only: bool = False,
 | 
			
		||||
        use_mmap: bool = True,
 | 
			
		||||
        use_mmap: bool = False,
 | 
			
		||||
        use_mlock: bool = False,
 | 
			
		||||
        embedding: bool = False,
 | 
			
		||||
        n_threads: Optional[int] = 2,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -69,7 +69,7 @@ class Starcoder(GenerationMixin):
 | 
			
		|||
        f16_kv: bool = True,
 | 
			
		||||
        logits_all: bool = False,
 | 
			
		||||
        vocab_only: bool = False,
 | 
			
		||||
        use_mmap: bool = True,
 | 
			
		||||
        use_mmap: bool = False,
 | 
			
		||||
        use_mlock: bool = False,
 | 
			
		||||
        embedding: bool = False,
 | 
			
		||||
        n_threads: Optional[int] = 2,
 | 
			
		||||
| 
						 | 
				
			
			@ -128,7 +128,7 @@ class Starcoder(GenerationMixin):
 | 
			
		|||
        self.verbose = verbose
 | 
			
		||||
        # TODO: Some parameters are temporarily not supported
 | 
			
		||||
        unsupported_arg = {'n_parts': -1, 'n_gpu_layers': 0, 'f16_kv': True, 'logits_all': False,
 | 
			
		||||
                           'vocab_only': False, 'use_mmap': True, 'use_mlock': False,
 | 
			
		||||
                           'vocab_only': False, 'use_mmap': False, 'use_mlock': False,
 | 
			
		||||
                           'last_n_tokens_size': 64, 'lora_base': None,
 | 
			
		||||
                           'lora_path': None, 'verbose': True}
 | 
			
		||||
        for arg in unsupported_arg.keys():
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue