LLM: improve PyTorch API doc (#9128)
This commit is contained in:
		
							parent
							
								
									4a0a3c376a
								
							
						
					
					
						commit
						eb3fb18eb4
					
				
					 2 changed files with 54 additions and 4 deletions
				
			
		| 
						 | 
				
			
			@ -1,10 +1,24 @@
 | 
			
		|||
BigDL-LLM PyTorch API
 | 
			
		||||
=====================
 | 
			
		||||
 | 
			
		||||
optimize model
 | 
			
		||||
Optimize Model
 | 
			
		||||
----------------------------------------
 | 
			
		||||
 | 
			
		||||
.. automodule:: bigdl.llm.optimize
 | 
			
		||||
You can run any PyTorch model with ``optimize_model`` through only one-line code change to benefit from BigDL-LLM optimization, regardless of the library or API you are using.
 | 
			
		||||
 | 
			
		||||
.. automodule:: bigdl.llm
 | 
			
		||||
    :members: optimize_model
 | 
			
		||||
    :undoc-members:
 | 
			
		||||
    :show-inheritance:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Load Optimized Model
 | 
			
		||||
----------------------------------------
 | 
			
		||||
 | 
			
		||||
To avoid high resource consumption during the loading processes of the original model, we provide save/load API to support the saving of model after low-bit optimization and the loading of the saved low-bit model. Saving and loading operations are platform-independent, regardless of their operating systems.
 | 
			
		||||
 | 
			
		||||
.. automodule:: bigdl.llm.optimize
 | 
			
		||||
    :members: load_low_bit
 | 
			
		||||
    :undoc-members:
 | 
			
		||||
    :show-inheritance:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -133,6 +133,33 @@ def low_memory_init():
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
def load_low_bit(model, model_path):
 | 
			
		||||
    """
 | 
			
		||||
    Load the optimized pytorch model.
 | 
			
		||||
 | 
			
		||||
    :param model: The PyTorch model instance
 | 
			
		||||
    :param model_path: The path of saved optimized model
 | 
			
		||||
 | 
			
		||||
    :return: The optimized model.
 | 
			
		||||
 | 
			
		||||
    >>> # Example 1:
 | 
			
		||||
    >>> # Take ChatGLM2-6B model as an example
 | 
			
		||||
    >>> # Make sure you have saved the optimized model by calling 'save_low_bit'
 | 
			
		||||
    >>> from bigdl.llm.optimize import low_memory_init, load_low_bit
 | 
			
		||||
    >>> with low_memory_init(): # Fast and low cost by loading model on meta device
 | 
			
		||||
    >>>     model = AutoModel.from_pretrained(saved_dir,
 | 
			
		||||
    >>>                                       torch_dtype="auto",
 | 
			
		||||
    >>>                                       trust_remote_code=True)
 | 
			
		||||
    >>> model = load_low_bit(model, saved_dir) # Load the optimized model
 | 
			
		||||
 | 
			
		||||
    >>> # Example 2:
 | 
			
		||||
    >>> # If the model doesn't fit 'low_memory_init' method,
 | 
			
		||||
    >>> # alternatively, you can obtain the model instance through traditional loading method.
 | 
			
		||||
    >>> # Take OpenAI Whisper model as an example
 | 
			
		||||
    >>> # Make sure you have saved the optimized model by calling 'save_low_bit'
 | 
			
		||||
    >>> from bigdl.llm.optimize import load_low_bit
 | 
			
		||||
    >>> model = whisper.load_model('tiny') # A model instance through traditional loading method
 | 
			
		||||
    >>> model = load_low_bit(model, saved_dir) # Load the optimized model
 | 
			
		||||
    """
 | 
			
		||||
    low_bit = low_bit_sanity_check(model_path)
 | 
			
		||||
    invalidInputError(isinstance(model, torch.nn.Module),
 | 
			
		||||
                      "model should be a instance of "
 | 
			
		||||
| 
						 | 
				
			
			@ -167,14 +194,23 @@ def load_low_bit(model, model_path):
 | 
			
		|||
 | 
			
		||||
def optimize_model(model, low_bit='sym_int4', optimize_llm=True):
 | 
			
		||||
    """
 | 
			
		||||
    A method to optimize any pytorch models.
 | 
			
		||||
    A method to optimize any pytorch model.
 | 
			
		||||
 | 
			
		||||
    :param model: The original PyTorch model (nn.module)
 | 
			
		||||
    :param low_bit: Supported low-bit options are "sym_int4", "asym_int4", "sym_int5",
 | 
			
		||||
        "asym_int5" or "sym_int8".
 | 
			
		||||
    :param optimize_llm: Whether to further optimize llm model.
 | 
			
		||||
 | 
			
		||||
    return: The optimized model.
 | 
			
		||||
    :return: The optimized model.
 | 
			
		||||
 | 
			
		||||
    >>> # Take OpenAI Whisper model as an example
 | 
			
		||||
    >>> from bigdl.llm import optimize_model
 | 
			
		||||
    >>> model = whisper.load_model('tiny') # Load whisper model under pytorch framework
 | 
			
		||||
    >>> model = optimize_model(model) # With only one line code change
 | 
			
		||||
    >>> # Use the optimized model without other API change
 | 
			
		||||
    >>> result = model.transcribe(audio, verbose=True, language="English")
 | 
			
		||||
    >>> # (Optional) you can also save the optimized model by calling 'save_low_bit'
 | 
			
		||||
    >>> model.save_low_bit(saved_dir)
 | 
			
		||||
    """
 | 
			
		||||
    invalidInputError(low_bit in ggml_tensor_qtype,
 | 
			
		||||
                      f"Unknown load_in_low_bit value: {low_bit}, expected:"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue