diff --git a/python/llm/src/bigdl/llm/ggml/__init__.py b/python/llm/src/bigdl/llm/ggml/__init__.py
index cba29fd7..adeb474d 100644
--- a/python/llm/src/bigdl/llm/ggml/__init__.py
+++ b/python/llm/src/bigdl/llm/ggml/__init__.py
@@ -21,3 +21,4 @@
 
 from .quantize import quantize
 from .convert import _convert_to_ggml
+from .convert_model import convert_model
diff --git a/python/llm/src/bigdl/llm/ggml/convert_model.py b/python/llm/src/bigdl/llm/ggml/convert_model.py
new file mode 100644
index 00000000..8547e1b5
--- /dev/null
+++ b/python/llm/src/bigdl/llm/ggml/convert_model.py
@@ -0,0 +1,41 @@
+from bigdl.llm.ggml.convert import _convert_to_ggml
+from bigdl.llm.ggml.quantize import quantize
+from pathlib import Path
+import time
+
+
+def convert_model(input_path: str,
+                  output_path: str,
+                  model_family: str,
+                  dtype: str = 'int4'):
+    """
+    Convert Hugging Face llama-like / gpt-neox-like / bloom-like model to lower precision
+
+    :param input_path: str, path of model, for example `./llama-7b-hf`.
+    :param output_path: Save path of output quantized model. Default to `None`.
+            If you don't specify this parameter, quantized model will be saved in
+            the same directory as the input and just replace precision with quantize_type
+            like `./ggml-model-q4_0.bin`.
+    :param model_family: Which model family your input model belongs to.
+            Now only `llama`/`bloom`/`gptneox` are supported.
+    :param dtype: Which quantized precision will be converted.
+            Now only int4 supported.
+    """
+
+    dtype = dtype.lower()
+    if dtype == 'int4':
+        dtype = 'q4_0'
+
+    model_name = Path(input_path).stem
+    tmp_ggml_file_path = f'/tmp/{model_name}_{int(time.time())}'
+    _convert_to_ggml(model_path=input_path,
+                     outfile_dir=tmp_ggml_file_path,
+                     model_family=model_family,
+                     outtype="fp16")
+    
+    tmp_ggml_file_path = next(Path(tmp_ggml_file_path).iterdir())
+
+    quantize(input_path=tmp_ggml_file_path,
+             output_path=output_path,
+             model_family=model_family,
+             dtype=dtype)
\ No newline at end of file
diff --git a/python/llm/src/bigdl/llm/ggml/quantize.py b/python/llm/src/bigdl/llm/ggml/quantize.py
index a5527ee8..baee0ce4 100644
--- a/python/llm/src/bigdl/llm/ggml/quantize.py
+++ b/python/llm/src/bigdl/llm/ggml/quantize.py
@@ -18,6 +18,7 @@ import os
 import subprocess
 from bigdl.llm.utils.common import invalidInputError
 import platform
+from pathlib import Path
 
 
 dirname, _ = os.path.split(os.path.abspath(__file__))
@@ -28,7 +29,7 @@ _llama_quantize_type = {"q4_0": 2,
                         "q5_0": 8,
                         "q5_1": 9,
                         "q8_0": 7}
-_bloomz_quantize_type = {"q4_0": 2,
+_bloom_quantize_type = {"q4_0": 2,
                          "q4_1": 3}
 _gptneox_quantize_type = {"q4_0": 2,
                           "q4_1": 3,
@@ -38,7 +39,7 @@ _gptneox_quantize_type = {"q4_0": 2,
                           "q8_0": 7}
 
 _quantize_type = {"llama": _llama_quantize_type,
-                  "bloomz": _bloomz_quantize_type,
+                  "bloom": _bloom_quantize_type,
                   "gptneox": _gptneox_quantize_type}
 
 _valid_types = set(list(_llama_quantize_type.keys()) + list(_bloomz_quantize_type.keys()) +
@@ -56,23 +57,23 @@ def quantize(input_path: str, output_path: str=None,
             the same directory as the input and just replace precision with quantize_type
             like `./ggml-model-q4_0.bin`.
     :param model_family: Which model family your input model belongs to. Default to `llama`.
-            Now only `llama`/`bloomz`/`gptneox` are supported.
+            Now only `llama`/`bloom`/`gptneox` are supported.
     :param dtype: Quantization method which differs in the resulting model disk size and
             inference speed. Defalut to `q4_0`. Difference model family may support different types,
             now the supported list is:
             llama : "q4_0", "q4_1", "q4_2"
-            bloomz : "q4_0", "q4_1"
+            bloom : "q4_0", "q4_1"
             gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0"
     """
-    invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
+    invalidInputError(model_family in ['llama', 'bloom', 'gptneox'],
                       "Now we only support quantization of model \
-                       family('llama', 'bloomz', 'gptneox')",
+                       family('llama', 'bloom', 'gptneox')",
                       "{} is not in the list.".format(model_family))
     invalidInputError(os.path.isfile(input_path),
                       "The file {} was not found".format(input_path))
     # TODO : multi input model path
     if output_path is None:
-        output_path = input_path.replace("f16", dtype)
+        output_path = Path(str(input_path).replace('f16', dtype))
     # convert quantize type str into corresponding int value
     quantize_type_map = _quantize_type[model_family]
     invalidInputError(dtype in quantize_type_map, "{0} model just accept {1} now, \