[LLM] convert_model bug fix (#8274)
* Renamed all bloomz to bloom in ggml/model & utls/convert_util.py * Add an optional parameter for specific the model conversion path to avoid running out of disk space
This commit is contained in:
parent
8bd2992a8d
commit
38be471140
3 changed files with 16 additions and 13 deletions
|
|
@ -68,8 +68,8 @@ def _convert_gptneox(model_path, outfile_dir, outtype):
|
|||
_convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)
|
||||
|
||||
|
||||
def _convert_bloomz(model_path, outfile_dir, outtype):
|
||||
_convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
|
||||
def _convert_bloom(model_path, outfile_dir, outtype):
|
||||
_convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype)
|
||||
|
||||
|
||||
def _convert_to_ggml(model_path: str, outfile_dir: str,
|
||||
|
|
@ -80,12 +80,12 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
|
|||
:param model_path: str, path of model, for example `./llama-7b-hf`.
|
||||
:param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
|
||||
:param model_family: Which model family your input model belongs to. Default to `llama`.
|
||||
Now only `llama`/`bloomz`/`gptneox` are supported.
|
||||
Now only `llama`/`bloom`/`gptneox` are supported.
|
||||
:param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
|
||||
"""
|
||||
invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
|
||||
invalidInputError(model_family in ['llama', 'bloom', 'gptneox'],
|
||||
"Now we only support quantization of model \
|
||||
family('llama', 'bloomz', 'gptneox')",
|
||||
family('llama', 'bloom', 'gptneox')",
|
||||
"{} is not in the list.".format(model_family))
|
||||
invalidInputError(os.path.exists(model_path),
|
||||
"The file {} was not found".format(model_path))
|
||||
|
|
@ -101,5 +101,5 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
|
|||
_convert_llama(model_path, outfile_dir, outtype)
|
||||
if model_family == 'gptneox':
|
||||
_convert_gptneox(model_path, outfile_dir, outtype)
|
||||
if model_family == 'bloomz':
|
||||
_convert_bloomz(model_path, outfile_dir, outtype)
|
||||
if model_family == 'bloom':
|
||||
_convert_bloom(model_path, outfile_dir, outtype)
|
||||
|
|
|
|||
|
|
@ -13,17 +13,19 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from bigdl.llm.ggml.convert import _convert_to_ggml
|
||||
from bigdl.llm.ggml.quantize import quantize
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
|
||||
def convert_model(input_path: str,
|
||||
output_path: str,
|
||||
model_family: str,
|
||||
dtype: str = 'int4'):
|
||||
dtype: str = 'int4',
|
||||
tmp_path: str = '/tmp'):
|
||||
"""
|
||||
Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision
|
||||
|
||||
|
|
@ -36,6 +38,7 @@ def convert_model(input_path: str,
|
|||
Now only `llama`/`bloom`/`gptneox` are supported.
|
||||
:param dtype: Which quantized precision will be converted.
|
||||
Now only int4 supported.
|
||||
:param tmp_path: Which path to store the intermediate model during the conversion process.
|
||||
"""
|
||||
|
||||
dtype = dtype.lower()
|
||||
|
|
@ -43,7 +46,7 @@ def convert_model(input_path: str,
|
|||
dtype = 'q4_0'
|
||||
|
||||
model_name = Path(input_path).stem
|
||||
tmp_ggml_file_path = f'/tmp/{model_name}_{int(time.time())}'
|
||||
tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
|
||||
_convert_to_ggml(model_path=input_path,
|
||||
outfile_dir=tmp_ggml_file_path,
|
||||
model_family=model_family,
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ __all__ = ['Params',
|
|||
'load_vocab',
|
||||
'default_outfile',
|
||||
'_convert_gptneox_hf_to_ggml',
|
||||
'_convert_bloomz_hf_to_ggml']
|
||||
'_convert_bloom_hf_to_ggml']
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
|
@ -1316,7 +1316,7 @@ def _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype):
|
|||
fout.close()
|
||||
|
||||
|
||||
def _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype):
|
||||
def _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype):
|
||||
conv_map = {'word_embeddings': 'tok_embeddings',
|
||||
'word_embeddings_layernorm': 'norm',
|
||||
'input_layernorm': 'attention_norm',
|
||||
|
|
|
|||
Loading…
Reference in a new issue