[LLM] convert_model bug fix (#8274)
* Renamed all bloomz to bloom in ggml/model & utls/convert_util.py * Add an optional parameter for specific the model conversion path to avoid running out of disk space
This commit is contained in:
		
							parent
							
								
									8bd2992a8d
								
							
						
					
					
						commit
						38be471140
					
				
					 3 changed files with 16 additions and 13 deletions
				
			
		| 
						 | 
				
			
			@ -68,8 +68,8 @@ def _convert_gptneox(model_path, outfile_dir, outtype):
 | 
			
		|||
    _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _convert_bloomz(model_path, outfile_dir, outtype):
 | 
			
		||||
    _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
 | 
			
		||||
def _convert_bloom(model_path, outfile_dir, outtype):
 | 
			
		||||
    _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _convert_to_ggml(model_path: str, outfile_dir: str,
 | 
			
		||||
| 
						 | 
				
			
			@ -80,12 +80,12 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
 | 
			
		|||
    :param model_path: str, path of model, for example `./llama-7b-hf`.
 | 
			
		||||
    :param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
 | 
			
		||||
    :param model_family: Which model family your input model belongs to. Default to `llama`.
 | 
			
		||||
            Now only `llama`/`bloomz`/`gptneox` are supported.
 | 
			
		||||
            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
			
		||||
    :param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
 | 
			
		||||
    """
 | 
			
		||||
    invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
 | 
			
		||||
    invalidInputError(model_family in ['llama', 'bloom', 'gptneox'],
 | 
			
		||||
                      "Now we only support quantization of model \
 | 
			
		||||
                       family('llama', 'bloomz', 'gptneox')",
 | 
			
		||||
                       family('llama', 'bloom', 'gptneox')",
 | 
			
		||||
                      "{} is not in the list.".format(model_family))
 | 
			
		||||
    invalidInputError(os.path.exists(model_path),
 | 
			
		||||
                      "The file {} was not found".format(model_path))
 | 
			
		||||
| 
						 | 
				
			
			@ -101,5 +101,5 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
 | 
			
		|||
        _convert_llama(model_path, outfile_dir, outtype)
 | 
			
		||||
    if model_family == 'gptneox':
 | 
			
		||||
        _convert_gptneox(model_path, outfile_dir, outtype)
 | 
			
		||||
    if model_family == 'bloomz':
 | 
			
		||||
        _convert_bloomz(model_path, outfile_dir, outtype)
 | 
			
		||||
    if model_family == 'bloom':
 | 
			
		||||
        _convert_bloom(model_path, outfile_dir, outtype)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,17 +13,19 @@
 | 
			
		|||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
from bigdl.llm.ggml.convert import _convert_to_ggml
 | 
			
		||||
from bigdl.llm.ggml.quantize import quantize
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def convert_model(input_path: str,
 | 
			
		||||
                  output_path: str,
 | 
			
		||||
                  model_family: str,
 | 
			
		||||
                  dtype: str = 'int4'):
 | 
			
		||||
                  dtype: str = 'int4',
 | 
			
		||||
                  tmp_path: str = '/tmp'):
 | 
			
		||||
    """
 | 
			
		||||
    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -36,6 +38,7 @@ def convert_model(input_path: str,
 | 
			
		|||
            Now only `llama`/`bloom`/`gptneox` are supported.
 | 
			
		||||
    :param dtype: Which quantized precision will be converted.
 | 
			
		||||
            Now only int4 supported.
 | 
			
		||||
    :param tmp_path: Which path to store the intermediate model during the conversion process.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    dtype = dtype.lower()
 | 
			
		||||
| 
						 | 
				
			
			@ -43,7 +46,7 @@ def convert_model(input_path: str,
 | 
			
		|||
        dtype = 'q4_0'
 | 
			
		||||
 | 
			
		||||
    model_name = Path(input_path).stem
 | 
			
		||||
    tmp_ggml_file_path = f'/tmp/{model_name}_{int(time.time())}'
 | 
			
		||||
    tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
 | 
			
		||||
    _convert_to_ggml(model_path=input_path,
 | 
			
		||||
                     outfile_dir=tmp_ggml_file_path,
 | 
			
		||||
                     model_family=model_family,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -87,7 +87,7 @@ __all__ = ['Params',
 | 
			
		|||
           'load_vocab',
 | 
			
		||||
           'default_outfile',
 | 
			
		||||
           '_convert_gptneox_hf_to_ggml',
 | 
			
		||||
           '_convert_bloomz_hf_to_ggml']
 | 
			
		||||
           '_convert_bloom_hf_to_ggml']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass(frozen=True)
 | 
			
		||||
| 
						 | 
				
			
			@ -1316,7 +1316,7 @@ def _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype):
 | 
			
		|||
    fout.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype):
 | 
			
		||||
def _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype):
 | 
			
		||||
    conv_map = {'word_embeddings': 'tok_embeddings',
 | 
			
		||||
                'word_embeddings_layernorm': 'norm',
 | 
			
		||||
                'input_layernorm': 'attention_norm',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue