[LLM] convert_model bug fix (#8274)

* Renamed all bloomz to bloom in ggml/model & utls/convert_util.py * Add an optional parameter for specific the model conversion path to avoid running out of disk space
2023-06-06 15:16:42 +08:00 · 2023-06-06 15:16:42 +08:00 · 38be471140
commit 38be471140
parent 8bd2992a8d
3 changed files with 16 additions and 13 deletions
--- a/python/llm/src/bigdl/llm/ggml/convert.py
+++ b/python/llm/src/bigdl/llm/ggml/convert.py
@ -68,8 +68,8 @@ def _convert_gptneox(model_path, outfile_dir, outtype):
    _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)


-def _convert_bloomz(model_path, outfile_dir, outtype):
-    _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
+def _convert_bloom(model_path, outfile_dir, outtype):
+    _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype)


 def _convert_to_ggml(model_path: str, outfile_dir: str,
@ -80,12 +80,12 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
    :param model_path: str, path of model, for example `./llama-7b-hf`.
    :param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
    :param model_family: Which model family your input model belongs to. Default to `llama`.
-            Now only `llama`/`bloomz`/`gptneox` are supported.
+            Now only `llama`/`bloom`/`gptneox` are supported.
    :param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
    """
-    invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
+    invalidInputError(model_family in ['llama', 'bloom', 'gptneox'],
                      "Now we only support quantization of model \
-                       family('llama', 'bloomz', 'gptneox')",
+                       family('llama', 'bloom', 'gptneox')",
                      "{} is not in the list.".format(model_family))
    invalidInputError(os.path.exists(model_path),
                      "The file {} was not found".format(model_path))
@ -101,5 +101,5 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
        _convert_llama(model_path, outfile_dir, outtype)
    if model_family == 'gptneox':
        _convert_gptneox(model_path, outfile_dir, outtype)
-    if model_family == 'bloomz':
-        _convert_bloomz(model_path, outfile_dir, outtype)
+    if model_family == 'bloom':
+        _convert_bloom(model_path, outfile_dir, outtype)
--- a/python/llm/src/bigdl/llm/ggml/convert_model.py
+++ b/python/llm/src/bigdl/llm/ggml/convert_model.py
@ -13,17 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import time
+from pathlib import Path

 from bigdl.llm.ggml.convert import _convert_to_ggml
 from bigdl.llm.ggml.quantize import quantize
-from pathlib import Path
-import time


 def convert_model(input_path: str,
                  output_path: str,
                  model_family: str,
-                  dtype: str = 'int4'):
+                  dtype: str = 'int4',
+                  tmp_path: str = '/tmp'):
    """
    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision

@ -36,6 +38,7 @@ def convert_model(input_path: str,
            Now only `llama`/`bloom`/`gptneox` are supported.
    :param dtype: Which quantized precision will be converted.
            Now only int4 supported.
+    :param tmp_path: Which path to store the intermediate model during the conversion process.
    """

    dtype = dtype.lower()
@ -43,7 +46,7 @@ def convert_model(input_path: str,
        dtype = 'q4_0'

    model_name = Path(input_path).stem
-    tmp_ggml_file_path = f'/tmp/{model_name}_{int(time.time())}'
+    tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
    _convert_to_ggml(model_path=input_path,
                     outfile_dir=tmp_ggml_file_path,
                     model_family=model_family,
--- a/python/llm/src/bigdl/llm/utils/convert_util.py
+++ b/python/llm/src/bigdl/llm/utils/convert_util.py
@ -87,7 +87,7 @@ __all__ = ['Params',
           'load_vocab',
           'default_outfile',
           '_convert_gptneox_hf_to_ggml',
-           '_convert_bloomz_hf_to_ggml']
+           '_convert_bloom_hf_to_ggml']


@dataclass(frozen=True)
@ -1316,7 +1316,7 @@ def _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype):
    fout.close()


-def _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype):
+def _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype):
    conv_map = {'word_embeddings': 'tok_embeddings',
                'word_embeddings_layernorm': 'norm',
                'input_layernorm': 'attention_norm',