diff --git a/python/llm/CMakeLists.txt b/python/llm/CMakeLists.txt
new file mode 100644
index 00000000..166fa673
--- /dev/null
+++ b/python/llm/CMakeLists.txt
@@ -0,0 +1,58 @@
+cmake_minimum_required(VERSION 3.4...3.22)
+
+project(bigdl_llm)
+
+option(FORCE_CMAKE "Force CMake build of Related CPP project" OFF)
+
+set(FORCE_CMAKE $ENV{FORCE_CMAKE})
+
+add_custom_command(
+    OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
+    COMMAND make libllama.so quantize
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp
+)
+add_custom_command(
+    OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
+    COMMAND make libgptneox.so quantize-gptneox
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp
+)
+add_custom_command(
+    OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
+    COMMAND make libbloom.so quantize
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp
+)
+add_custom_target(
+    run ALL
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
+            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
+            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so
+            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
+            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so
+            ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
+)
+install(
+    FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/libllama.so
+    DESTINATION src/bigdl/llm/lib
+)
+install(
+    PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/quantize
+    RENAME quantize-llama
+    DESTINATION src/bigdl/llm/bin
+)
+install(
+    FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/libgptneox.so
+    DESTINATION src/bigdl/llm/lib
+)
+install(
+    PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/redpajama.cpp/quantize-gptneox
+    DESTINATION src/bigdl/llm/bin
+)
+install(
+    FILES ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/libbloom.so
+    DESTINATION src/bigdl/llm/lib
+)
+install(
+    PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/bloomz.cpp/quantize
+    RENAME quantize-bloomz
+    DESTINATION src/bigdl/llm/bin
+)
diff --git a/python/llm/readme.md b/python/llm/README.md
similarity index 100%
rename from python/llm/readme.md
rename to python/llm/README.md
diff --git a/python/llm/setup.py b/python/llm/setup.py
index 6ea10db4..2b162dda 100644
--- a/python/llm/setup.py
+++ b/python/llm/setup.py
@@ -16,27 +16,9 @@
 # limitations under the License.
 #
 
-#!/usr/bin/env python
-
-#
-# Copyright 2016 The BigDL Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 import os
 import fnmatch
-from setuptools import setup
+from skbuild import setup
 
 long_description = '''
     BigDL LLM
@@ -75,7 +57,7 @@ def setup_package():
         url='https://github.com/intel-analytics/BigDL',
         packages=get_llm_packages(),
         package_dir={"": "src"},
-        install_requires=[],
+        install_requires=[""],
         include_package_data=True,
         classifiers=[
             'License :: OSI Approved :: Apache Software License',
diff --git a/python/llm/src/bigdl/llm/ggml/__init__.py b/python/llm/src/bigdl/llm/ggml/__init__.py
index dbdafd2a..54a5122e 100644
--- a/python/llm/src/bigdl/llm/ggml/__init__.py
+++ b/python/llm/src/bigdl/llm/ggml/__init__.py
@@ -18,3 +18,5 @@
 # physically located elsewhere.
 # Otherwise there would be module not found error in non-pip's setting as Python would
 # only search the first bigdl package and end up finding only one sub-package.
+
+from .quantize import quantize
diff --git a/python/llm/src/bigdl/llm/ggml/quantize.py b/python/llm/src/bigdl/llm/ggml/quantize.py
new file mode 100644
index 00000000..1d739125
--- /dev/null
+++ b/python/llm/src/bigdl/llm/ggml/quantize.py
@@ -0,0 +1,86 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import subprocess
+from bigdl.llm.utils.common import invalidInputError
+
+
+dirname, _ = os.path.split(os.path.abspath(__file__))
+bin_dirname = os.path.dirname(dirname)
+
+_llama_quantize_type = {"q4_0": 2,
+                        "q4_1": 3,
+                        "q4_2": 5}
+_bloomz_quantize_type = {"q4_0": 2,
+                         "q4_1": 3}
+_gptneox_quantize_type = {"q4_0": 2,
+                          "q4_1": 3,
+                          "q4_2": 5,
+                          "q5_0": 8,
+                          "q5_1": 9,
+                          "q8_0": 7}
+
+_quantize_type = {"llama": _llama_quantize_type,
+                  "bloomz": _bloomz_quantize_type,
+                  "gptneox": _gptneox_quantize_type}
+
+_valid_types = set(list(_llama_quantize_type.keys()) + list(_bloomz_quantize_type.keys()) + list(_gptneox_quantize_type.keys()))
+
+
+def quantize(input_path: str, output_path: str=None, model_family: str = 'llama', dtype: str='q4_0'):
+    """
+    Quantize ggml file to lower precision.
+
+    :param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`.
+    :param output_path: Save path of output quantized model. Default to `None`. 
+            If you don't specify this parameter, quantized model will be saved in
+            the same directory as the input and just replace precision with quantize_type
+            like `./ggml-model-q4_0.bin`.
+    :param model_family: Which model family your input model belongs to. Default to `llama`.
+            Now only `llama`/`bloomz`/`gptneox` are supported.
+    :param dtype: Quantization method which differs in the resulting model disk size and
+            inference speed. Defalut to `q4_0`. Difference model family may support different types,
+            now the supported list is:
+            llama : "q4_0", "q4_1", "q4_2"
+            bloomz : "q4_0", "q4_1"
+            gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0"
+    """
+    invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
+                      "Now we only support quantization of model family('llama', 'bloomz', 'gptneox')",
+                      "{} is not in the list.".format(model_family))
+    invalidInputError(os.path.isfile(input_path),
+                      "The file {} was not found".format(input_path))
+    # TODO : multi input model path
+    if output_path is None:
+        output_path = input_path.replace("f16", dtype)
+    # convert quantize type str into corresponding int value
+    quantize_type_map = _quantize_type[model_family]
+    invalidInputError(dtype in quantize_type_map,
+                      "{0} model just accept {1} now, but you pass in {2}.".format(
+                        model_family,
+                        list(quantize_type_map.keys()),
+                        dtype))
+    quantize_type = quantize_type_map[dtype]
+    quantize_args = "{0}/bin/quantize-{1} {2} {3} {4}".format(bin_dirname,
+                                                              model_family,
+                                                              input_path,
+                                                              output_path,
+                                                              str(quantize_type))
+    p = subprocess.Popen(quantize_args.split())
+    p.communicate()
+    invalidInputError(not p.returncode,
+                      "Fail to quantize {}.".format(str(input_path)))
diff --git a/python/llm/src/bigdl/llm/utils/__init__.py b/python/llm/src/bigdl/llm/utils/__init__.py
new file mode 100644
index 00000000..dbdafd2a
--- /dev/null
+++ b/python/llm/src/bigdl/llm/utils/__init__.py
@@ -0,0 +1,20 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This would makes sure Python is aware there is more than one sub-package within bigdl,
+# physically located elsewhere.
+# Otherwise there would be module not found error in non-pip's setting as Python would
+# only search the first bigdl package and end up finding only one sub-package.
diff --git a/python/llm/src/bigdl/llm/utils/common/__init__.py b/python/llm/src/bigdl/llm/utils/common/__init__.py
new file mode 100644
index 00000000..7d318395
--- /dev/null
+++ b/python/llm/src/bigdl/llm/utils/common/__init__.py
@@ -0,0 +1,22 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This would makes sure Python is aware there is more than one sub-package within bigdl,
+# physically located elsewhere.
+# Otherwise there would be module not found error in non-pip's setting as Python would
+# only search the first bigdl package and end up finding only one sub-package.
+
+from .log4Error import invalidInputError
diff --git a/python/llm/src/bigdl/llm/utils/common/log4Error.py b/python/llm/src/bigdl/llm/utils/common/log4Error.py
new file mode 100644
index 00000000..9db756f0
--- /dev/null
+++ b/python/llm/src/bigdl/llm/utils/common/log4Error.py
@@ -0,0 +1,41 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def outputUserMessage(errMsg, fixMsg=None):
+    logger.error(f"\n\n****************************Usage Error************************\n" + errMsg)
+    if fixMsg:
+        logger.error(f"\n\n**************************How to fix***********************\n" + fixMsg)
+    logger.error(f"\n\n****************************Call Stack*************************")
+
+
+def invalidInputError(condition, errMsg, fixMsg=None):
+    if not condition:
+        outputUserMessage(errMsg, fixMsg)
+        raise RuntimeError(errMsg)
+
+
+def invalidOperationError(condition, errMsg, fixMsg=None, cause=None):
+    if not condition:
+        outputUserMessage(errMsg, fixMsg)
+        if cause:
+            raise cause
+        else:
+            raise RuntimeError(errMsg)
diff --git a/python/llm/vendor/bloomz.cpp b/python/llm/vendor/bloomz.cpp
new file mode 160000
index 00000000..6d2dee07
--- /dev/null
+++ b/python/llm/vendor/bloomz.cpp
@@ -0,0 +1 @@
+Subproject commit 6d2dee07626b7e8b6af1ada2bfb1bb17de4deaea
diff --git a/python/llm/vendor/llama.cpp b/python/llm/vendor/llama.cpp
new file mode 160000
index 00000000..f6ab8e2a
--- /dev/null
+++ b/python/llm/vendor/llama.cpp
@@ -0,0 +1 @@
+Subproject commit f6ab8e2a03f537e853fef2deb36be89c4f698b05
diff --git a/python/llm/vendor/redpajama.cpp b/python/llm/vendor/redpajama.cpp
new file mode 160000
index 00000000..bec989c2
--- /dev/null
+++ b/python/llm/vendor/redpajama.cpp
@@ -0,0 +1 @@
+Subproject commit bec989c272fd733d1db1069fb47ad5008ae1cd7e