From 38be471140006f9b932a2d673e41674db2e6d5f7 Mon Sep 17 00:00:00 2001
From: xingyuan li <108672484+hoshibara@users.noreply.github.com>
Date: Tue, 6 Jun 2023 15:16:42 +0800
Subject: [PATCH] [LLM] convert_model bug fix (#8274)

* Renamed all bloomz to bloom in ggml/model & utls/convert_util.py
* Add an optional parameter for specific the model conversion path to avoid running out of disk space
---
 python/llm/src/bigdl/llm/ggml/convert.py       | 14 +++++++-------
 python/llm/src/bigdl/llm/ggml/convert_model.py | 11 +++++++----
 python/llm/src/bigdl/llm/utils/convert_util.py |  4 ++--
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/python/llm/src/bigdl/llm/ggml/convert.py b/python/llm/src/bigdl/llm/ggml/convert.py
index 35f02332..1dc472c4 100644
--- a/python/llm/src/bigdl/llm/ggml/convert.py
+++ b/python/llm/src/bigdl/llm/ggml/convert.py
@@ -68,8 +68,8 @@ def _convert_gptneox(model_path, outfile_dir, outtype):
     _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype)
 
 
-def _convert_bloomz(model_path, outfile_dir, outtype):
-    _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype)
+def _convert_bloom(model_path, outfile_dir, outtype):
+    _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype)
 
 
 def _convert_to_ggml(model_path: str, outfile_dir: str,
@@ -80,12 +80,12 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
     :param model_path: str, path of model, for example `./llama-7b-hf`.
     :param outfile_dir: str, the directory to save ggml compatible file, for example `./models`.
     :param model_family: Which model family your input model belongs to. Default to `llama`.
-            Now only `llama`/`bloomz`/`gptneox` are supported.
+            Now only `llama`/`bloom`/`gptneox` are supported.
     :param outtype: specify the output format. Defalut to `fp16`. Now `fp32`/`fp16` are supported.
     """
-    invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
+    invalidInputError(model_family in ['llama', 'bloom', 'gptneox'],
                       "Now we only support quantization of model \
-                       family('llama', 'bloomz', 'gptneox')",
+                       family('llama', 'bloom', 'gptneox')",
                       "{} is not in the list.".format(model_family))
     invalidInputError(os.path.exists(model_path),
                       "The file {} was not found".format(model_path))
@@ -101,5 +101,5 @@ def _convert_to_ggml(model_path: str, outfile_dir: str,
         _convert_llama(model_path, outfile_dir, outtype)
     if model_family == 'gptneox':
         _convert_gptneox(model_path, outfile_dir, outtype)
-    if model_family == 'bloomz':
-        _convert_bloomz(model_path, outfile_dir, outtype)
+    if model_family == 'bloom':
+        _convert_bloom(model_path, outfile_dir, outtype)
diff --git a/python/llm/src/bigdl/llm/ggml/convert_model.py b/python/llm/src/bigdl/llm/ggml/convert_model.py
index 637fbfea..c2c7e2f0 100644
--- a/python/llm/src/bigdl/llm/ggml/convert_model.py
+++ b/python/llm/src/bigdl/llm/ggml/convert_model.py
@@ -13,17 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import time
+from pathlib import Path
 
 from bigdl.llm.ggml.convert import _convert_to_ggml
 from bigdl.llm.ggml.quantize import quantize
-from pathlib import Path
-import time
 
 
 def convert_model(input_path: str,
                   output_path: str,
                   model_family: str,
-                  dtype: str = 'int4'):
+                  dtype: str = 'int4',
+                  tmp_path: str = '/tmp'):
     """
     Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision
 
@@ -36,6 +38,7 @@ def convert_model(input_path: str,
             Now only `llama`/`bloom`/`gptneox` are supported.
     :param dtype: Which quantized precision will be converted.
             Now only int4 supported.
+    :param tmp_path: Which path to store the intermediate model during the conversion process.
     """
 
     dtype = dtype.lower()
@@ -43,7 +46,7 @@ def convert_model(input_path: str,
         dtype = 'q4_0'
 
     model_name = Path(input_path).stem
-    tmp_ggml_file_path = f'/tmp/{model_name}_{int(time.time())}'
+    tmp_ggml_file_path = os.path.join(tmp_path, f'{model_name}_{int(time.time())}')
     _convert_to_ggml(model_path=input_path,
                      outfile_dir=tmp_ggml_file_path,
                      model_family=model_family,
diff --git a/python/llm/src/bigdl/llm/utils/convert_util.py b/python/llm/src/bigdl/llm/utils/convert_util.py
index d78af47b..c3706335 100644
--- a/python/llm/src/bigdl/llm/utils/convert_util.py
+++ b/python/llm/src/bigdl/llm/utils/convert_util.py
@@ -87,7 +87,7 @@ __all__ = ['Params',
            'load_vocab',
            'default_outfile',
            '_convert_gptneox_hf_to_ggml',
-           '_convert_bloomz_hf_to_ggml']
+           '_convert_bloom_hf_to_ggml']
 
 
 @dataclass(frozen=True)
@@ -1316,7 +1316,7 @@ def _convert_gptneox_hf_to_ggml(model_path, outfile_dir, outtype):
     fout.close()
 
 
-def _convert_bloomz_hf_to_ggml(model_path, outfile_dir, outtype):
+def _convert_bloom_hf_to_ggml(model_path, outfile_dir, outtype):
     conv_map = {'word_embeddings': 'tok_embeddings',
                 'word_embeddings_layernorm': 'norm',
                 'input_layernorm': 'attention_norm',