[LLM] convert_model bug fix (#8274)

* Renamed all bloomz to bloom in ggml/model & utls/convert_util.py
* Add an optional parameter for specific the model conversion path to avoid running out of disk space
This commit is contained in:
xingyuan li 2023-06-06 15:16:42 +08:00 committed by GitHub
parent 8bd2992a8d
commit 38be471140
3 changed files with 16 additions and 13 deletions

View file

@ -68,8 +68,8 @@ def _convert_gptneox(model_path, outfile_dir, outtype):
_convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)
def _convert_bloomz(model_path, outfile_dir, outtype):
_convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
def _convert_bloom(model_path, outfile_dir, outtype):
_convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype)
def _convert_to_ggml(model_path: str, outfile_dir: str,
@ -80,12 +80,12 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
:param model_path: str, path of model, for example `./llama-7b-hf`.
:param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
:param model_family: Which model family your input model belongs to. Default to `llama`.
Now only `llama`/`bloomz`/`gptneox` are supported.
Now only `llama`/`bloom`/`gptneox` are supported.
:param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
"""
invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
invalidInputError(model_family in ['llama', 'bloom', 'gptneox'],
"Now we only support quantization of model \
family('llama', 'bloomz', 'gptneox')",
family('llama', 'bloom', 'gptneox')",
"{} is not in the list.".format(model_family))
invalidInputError(os.path.exists(model_path),
"The file {} was not found".format(model_path))
@ -101,5 +101,5 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
_convert_llama(model_path, outfile_dir, outtype)
if model_family == 'gptneox':
_convert_gptneox(model_path, outfile_dir, outtype)
if model_family == 'bloomz':
_convert_bloomz(model_path, outfile_dir, outtype)
if model_family == 'bloom':
_convert_bloom(model_path, outfile_dir, outtype)

View file

@ -13,17 +13,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import time
from pathlib import Path
from bigdl.llm.ggml.convert import _convert_to_ggml
from bigdl.llm.ggml.quantize import quantize
from pathlib import Path
import time
def convert_model(input_path: str,
output_path: str,
model_family: str,
dtype: str = 'int4'):
dtype: str = 'int4',
tmp_path: str = '/tmp'):
"""
Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision
@ -36,6 +38,7 @@ def convert_model(input_path: str,
Now only `llama`/`bloom`/`gptneox` are supported.
:param dtype: Which quantized precision will be converted.
Now only int4 supported.
:param tmp_path: Which path to store the intermediate model during the conversion process.
"""
dtype = dtype.lower()
@ -43,7 +46,7 @@ def convert_model(input_path: str,
dtype = 'q4_0'
model_name = Path(input_path).stem
tmp_ggml_file_path = f'/tmp/{model_name}_{int(time.time())}'
tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
_convert_to_ggml(model_path=input_path,
outfile_dir=tmp_ggml_file_path,
model_family=model_family,

View file

@ -87,7 +87,7 @@ __all__ = ['Params',
'load_vocab',
'default_outfile',
'_convert_gptneox_hf_to_ggml',
'_convert_bloomz_hf_to_ggml']
'_convert_bloom_hf_to_ggml']
@dataclass(frozen=True)
@ -1316,7 +1316,7 @@ def _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype):
fout.close()
def _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype):
def _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype):
conv_map = {'word_embeddings': 'tok_embeddings',
'word_embeddings_layernorm': 'norm',
'input_layernorm': 'attention_norm',