diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/__init__.py b/python/llm/src/bigdl/llm/ggml/model/bloom/__init__.py
new file mode 100644
index 00000000..b578caae
--- /dev/null
+++ b/python/llm/src/bigdl/llm/ggml/model/bloom/__init__.py
@@ -0,0 +1,22 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This would makes sure Python is aware there is more than one sub-package within bigdl,
+# physically located elsewhere.
+# Otherwise there would be module not found error in non-pip's setting as Python would
+# only search the first bigdl package and end up finding only one sub-package.
+
+from .bloom import Bloom
diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py
new file mode 100644
index 00000000..52c883ee
--- /dev/null
+++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom.py
@@ -0,0 +1,129 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ===========================================================================
+#
+# This file is adapted from
+# https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py
+#
+# MIT License
+#
+# Copyright (c) 2023 Andrei Betlen
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# This would makes sure Python is aware there is more than one sub-package within bigdl,
+# physically located elsewhere.
+# Otherwise there would be module not found error in non-pip's setting as Python would
+# only search the first bigdl package and end up finding only one sub-package.
+
+from .bloom_cpp import bloom_load, bloom_free, bloom_run
+
+
+class Bloom:
+    """High-level Python wrapper for a bloom.cpp model."""
+
+    def __init__(self,
+                 model_path: str,
+                 n_ctx: int = 512,
+                 seed: int = 1337,
+                 logits_all: bool = False,
+                 n_threads: int = -1,
+                 n_batch: int = 8,
+                 last_n_tokens_size: int = 64,
+                 verbose: bool = True,
+                 ):
+        """Load a bloom.cpp model from `model_path`.
+
+        Args:
+            model_path: Path to the model.
+            n_ctx: Maximum context size.
+            seed: Random seed. 0 for random.
+            logits_all: Return logits for all tokens, not just the last token.
+            n_threads: Number of threads to use.
+                       If None, the number of threads is automatically determined.
+            n_batch: Maximum number of prompt tokens to batch together when calling llama_eval.
+            last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
+            verbose: Print verbose output to stderr.
+
+        Raises:
+            ValueError: If the model path does not exist.
+
+        Returns:
+            A Bloom instance.
+        """
+        self.ctx = bloom_load(bytes(model_path, encoding='utf-8'), n_ctx, n_threads)
+        if not self.ctx:
+            raise RuntimeError(f"Failed to load model from {model_path}")
+        self.n_ctx = n_ctx
+        self.seed = seed
+        self.logits_all = logits_all
+        self.n_threads = n_threads
+        self.n_batch = n_batch
+        self.last_n_tokens_size = last_n_tokens_size
+        self.verbose = verbose
+
+    def __call__(self, prompt: str, max_tokens: int, stream: bool = False):
+        if stream:
+            return self.stream(prompt, max_tokens)
+        else:
+            return self._eval(prompt, max_tokens, False)
+
+    def _eval(self, prompt: str, max_tokens: int, match_str: bool):
+        if prompt.endswith("</s>") or max_tokens < 1:
+            return prompt
+        # use `buf` to store prompt and generated string,
+        # assume the average length of words is less than 20 bytes
+        buf = bytes((len(prompt) + max_tokens) * 20)
+        ret = bloom_run(ctx=self.ctx,
+                        seed=self.seed,
+                        n_threads=self.n_threads,
+                        n_batch=self.n_batch,
+                        n_predict=max_tokens,
+                        match_str=match_str,
+                        prompt=bytes(prompt, encoding='utf-8'),
+                        buf=buf)
+        s = str(buf, encoding='utf-8').rstrip("\x00")
+        return s
+
+    def stream(self, prompt: str, max_tokens: int):
+        if prompt.endswith("</s>") or max_tokens < 1:
+            yield prompt
+        else:
+            for i in range(max_tokens):
+                if prompt.endswith("</s>"):
+                    break
+                else:
+                    prompt = self._eval(prompt, 1, i != 0)
+                    yield prompt
+
+    def free(self):
+        bloom_free(self.ctx)
diff --git a/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py
new file mode 100644
index 00000000..e46cc352
--- /dev/null
+++ b/python/llm/src/bigdl/llm/ggml/model/bloom/bloom_cpp.py
@@ -0,0 +1,146 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ===========================================================================
+#
+# This file is adapted from
+# https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama_cpp.py
+#
+# MIT License
+#
+# Copyright (c) 2023 Andrei Betlen
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# This would makes sure Python is aware there is more than one sub-package within bigdl,
+# physically located elsewhere.
+# Otherwise there would be module not found error in non-pip's setting as Python would
+# only search the first bigdl package and end up finding only one sub-package.
+
+import sys
+import os
+import ctypes
+from ctypes import (
+    c_int,
+    c_float,
+    c_char_p,
+    c_void_p,
+    c_bool,
+    POINTER,
+    Structure,
+    Array,
+    c_uint8,
+    c_size_t,
+)
+import pathlib
+
+
+# Load the library
+def _load_shared_library(lib_base_name: str):
+    # Determine the file extension based on the platform
+    if sys.platform.startswith("linux"):
+        lib_ext = ".so"
+    elif sys.platform == "darwin":
+        lib_ext = ".so"
+    elif sys.platform == "win32":
+        lib_ext = ".dll"
+    else:
+        raise RuntimeError("Unsupported platform")
+
+    # Construct the paths to the possible shared library names (python/llm/src/bigdl/llm/libs)
+    _base_path = pathlib.Path(__file__).parent.parent.parent.parent.resolve()
+    _base_path = _base_path / 'libs'
+    # Searching for the library in the current directory under the name "libbloom" (default name
+    # for bloomcpp) and "bloom" (default name for this repo)
+    _lib_paths = [
+        _base_path / f"lib{lib_base_name}{lib_ext}",
+        _base_path / f"{lib_base_name}{lib_ext}",
+    ]
+
+    if "BLOOM_CPP_LIB" in os.environ:
+        lib_base_name = os.environ["BLOOM_CPP_LIB"]
+        _lib = pathlib.Path(lib_base_name)
+        _base_path = _lib.parent.resolve()
+        _lib_paths = [_lib.resolve()]
+
+    # Add the library directory to the DLL search path on Windows (if needed)
+    if sys.platform == "win32" and sys.version_info >= (3, 8):
+        os.add_dll_directory(str(_base_path))
+
+    # Try to load the shared library, handling potential errors
+    for _lib_path in _lib_paths:
+        if _lib_path.exists():
+            try:
+                return ctypes.CDLL(str(_lib_path))
+            except Exception as e:
+                raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}")
+
+    raise FileNotFoundError(f"Shared library with base name '{lib_base_name}' not found")
+
+
+# Specify the base name of the shared library to load
+_lib_base_name = "bloom"
+
+# Load the library
+_lib = _load_shared_library(_lib_base_name)
+
+
+def bloom_load(fname: bytes, n_ctx: c_int, n_threads: c_int) -> c_void_p:
+    return _lib.bloom_load(fname, n_ctx, n_threads)
+
+
+_lib.bloom_load.argtypes = [c_char_p, c_int, c_int]
+_lib.bloom_load.restype = c_void_p
+
+
+def bloom_free(ctx: c_void_p):
+    return _lib.bloom_free(ctx)
+
+
+_lib.bloom_free.argtypes = [c_void_p]
+_lib.bloom_free.restype = None
+
+
+def bloom_run(ctx: c_void_p,
+              seed: c_int,
+              n_threads: c_int,
+              n_batch: c_int,
+              n_predict: c_int,
+              match_str: c_bool,
+              prompt: bytes,
+              buf: bytes) -> c_int:
+    return _lib.bloom_run(ctx, seed, n_threads, n_batch, n_predict, match_str, prompt, buf)
+
+
+_lib.bloom_run.argtypes = [c_void_p, c_int, c_int, c_int, c_int, c_bool, c_char_p, c_char_p]
+_lib.bloom_run.restype = c_int
+
+# ------------------------------------------------------------------- #