diff --git a/python/llm/README.md b/python/llm/README.md
index 9e314278..192952c0 100644
--- a/python/llm/README.md
+++ b/python/llm/README.md
@@ -55,7 +55,7 @@ Currently `bigdl-llm` CLI supports *LLaMA* (e.g., *vicuna*), *GPT-NeoX* (e.g., *
    ```bash
    #convert PyTorch (fp16 or fp32) model; 
    #llama/bloom/gptneox/starcoder model family is currently supported
-   lm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
+   llm-convert "/path/to/model/" --model-format pth --model-family "bloom" --outfile "/path/to/output/"
 
    #convert GPTQ-4bit model
    #only llama model family is currently supported
@@ -102,7 +102,7 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 
   See the complete example [here](example/transformers/transformers_int4_pipeline.py). 
   
-  - ##### Using native INT4 format
+- ##### Using native INT4 format
 
   You may also convert Hugging Face *Transformers* models into native INT4 format for maximum performance as follows.
 
@@ -115,8 +115,8 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
       outfile='/path/to/output/', outtype='int4', model_family="llama")
 
   #load the converted model
-  from bigdl.llm.transformers import BigdlForCausalLM
-  llm = BigdlForCausalLM.from_pretrained("/path/to/output/model.bin",...)
+  from bigdl.llm.transformers import BigdlNativeForCausalLM
+  llm = BigdlNativeForCausalLM.from_pretrained("/path/to/output/model.bin",...)
    
   #run the converted  model
   input_ids = llm.tokenize(prompt)
@@ -130,13 +130,13 @@ You may run the models using `transformers`-style API in `bigdl-llm`.
 You may convert Hugging Face *Transformers* models into *native INT4* format (currently only *llama*/*bloom*/*gptneox*/*starcoder* model family is supported), and then run the converted models using the LangChain API in `bigdl-llm` as follows.
 
 ```python
-from bigdl.llm.langchain.llms import BigdlLLM
-from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
+from bigdl.llm.langchain.llms import BigdlNativeLLM
+from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 from langchain.chains.question_answering import load_qa_chain
 
-embeddings = BigdlLLMEmbeddings(model_path='/path/to/converted/model.bin',
+embeddings = BigdlNativeEmbeddings(model_path='/path/to/converted/model.bin',
                                 model_family="llama",...)
-bigdl_llm = BigdlLLM(model_path='/path/to/converted/model.bin',
+bigdl_llm = BigdlNativeLLM(model_path='/path/to/converted/model.bin',
                      model_family="llama",...)
 
 doc_chain = load_qa_chain(bigdl_llm, ...)
diff --git a/python/llm/example/langchain/docqa.py b/python/llm/example/langchain/docqa.py
index 7f508fb8..42d808bf 100644
--- a/python/llm/example/langchain/docqa.py
+++ b/python/llm/example/langchain/docqa.py
@@ -29,8 +29,8 @@ from langchain.chains.question_answering import load_qa_chain
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
-from bigdl.llm.langchain.llms import BigdlLLM
-from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
+from bigdl.llm.langchain.llms import BigdlNativeLLM
+from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
 
 
 
@@ -53,13 +53,13 @@ def main(args):
     texts = text_splitter.split_text(input_doc)
 
     # create embeddings and store into vectordb
-    embeddings = BigdlLLMEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
+    embeddings = BigdlNativeEmbeddings(model_path=model_path, model_family=model_family, n_threads=n_threads, n_ctx=n_ctx)
     docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
 
     #get relavant texts
     docs = docsearch.get_relevant_documents(query)
         
-    bigdl_llm = BigdlLLM(
+    bigdl_llm = BigdlNativeLLM(
         model_path=model_path, model_family=model_family, n_ctx=n_ctx, n_threads=n_threads, callback_manager=callback_manager
     )
 
diff --git a/python/llm/example/langchain/streamchat.py b/python/llm/example/langchain/streamchat.py
index b070a642..f3b32e91 100644
--- a/python/llm/example/langchain/streamchat.py
+++ b/python/llm/example/langchain/streamchat.py
@@ -21,7 +21,7 @@
 
 import argparse
 
-from bigdl.llm.langchain.llms import BigdlLLM
+from bigdl.llm.langchain.llms import BigdlNativeLLM
 from langchain import PromptTemplate, LLMChain
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
@@ -42,7 +42,7 @@ def main(args):
     callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
     
     # Verbose is required to pass to the callback manager
-    llm = BigdlLLM(
+    llm = BigdlNativeLLM(
         model_path=model_path,
         model_family=model_family,
         n_threads=n_threads,
diff --git a/python/llm/example/transformers/native_int4_pipeline.py b/python/llm/example/transformers/native_int4_pipeline.py
index 1d0b5856..b39ccb13 100644
--- a/python/llm/example/transformers/native_int4_pipeline.py
+++ b/python/llm/example/transformers/native_int4_pipeline.py
@@ -31,8 +31,8 @@ def convert(repo_id_or_model_path, model_family, tmp_path):
     return bigdl_llm_path
 
 def load(model_path, model_family, n_threads):
-    from bigdl.llm.transformers import BigdlForCausalLM
-    llm = BigdlForCausalLM.from_pretrained(
+    from bigdl.llm.transformers import BigdlNativeForCausalLM
+    llm = BigdlNativeForCausalLM.from_pretrained(
         pretrained_model_name_or_path=model_path,
         model_family=model_family,
         n_threads=n_threads)
diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py b/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py
index 1b19ac26..f5c9fac3 100644
--- a/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/__init__.py
@@ -19,8 +19,8 @@
 # Otherwise there would be module not found error in non-pip's setting as Python would
 # only search the first bigdl package and end up finding only one sub-package.
 
-from .bigdlllm import BigdlLLMEmbeddings
+from .bigdlllm import BigdlNativeEmbeddings
 
 __all__ = [
-    "BigdlLLMEmbeddings",
+    "BigdlNativeEmbeddings",
 ]
diff --git a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
index 5049f9c8..94ed9d61 100644
--- a/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/embeddings/bigdlllm.py
@@ -44,7 +44,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
-"""Wrapper around BigdlLLM embedding models."""
+"""Wrapper around BigdlNative embedding models."""
 import importlib
 from typing import Any, Dict, List, Optional
 
@@ -53,14 +53,14 @@ from pydantic import BaseModel, Extra, Field, root_validator
 from langchain.embeddings.base import Embeddings
 
 
-class BigdlLLMEmbeddings(BaseModel, Embeddings):
+class BigdlNativeEmbeddings(BaseModel, Embeddings):
     """Wrapper around bigdl-llm embedding models.
 
     Example:
         .. code-block:: python
 
-            from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
-            llama = BigdlLLMEmbeddings(model_path="/path/to/model.bin")
+            from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
+            llama = BigdlNativeEmbeddings(model_path="/path/to/model.bin")
     """
 
     model_family: str = "llama"
diff --git a/python/llm/src/bigdl/llm/langchain/llms/__init__.py b/python/llm/src/bigdl/llm/langchain/llms/__init__.py
index dbc75f61..5ec5b38d 100644
--- a/python/llm/src/bigdl/llm/langchain/llms/__init__.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/__init__.py
@@ -23,12 +23,12 @@
 from typing import Dict, Type
 from langchain.llms.base import BaseLLM
 
-from .bigdlllm import BigdlLLM
+from .bigdlllm import BigdlNativeLLM
 
 __all__ = [
-    "BigdlLLM",
+    "BigdlNativeLLM",
 ]
 
 type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
-    "BigdlLLM": BigdlLLM,
+    "BigdlNativeLLM": BigdlNativeLLM,
 }
\ No newline at end of file
diff --git a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
index ecefde0d..ef1f6590 100644
--- a/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
+++ b/python/llm/src/bigdl/llm/langchain/llms/bigdlllm.py
@@ -54,14 +54,14 @@ from langchain.llms.base import LLM
 
 
 
-class BigdlLLM(LLM):
+class BigdlNativeLLM(LLM):
     """Wrapper around the BigDL-LLM
 
     Example:
         .. code-block:: python
 
-            from langchain.llms import BigdlLLM
-            llm = BigdlLLM(model_path="/path/to/llama/model")
+            from langchain.llms import BigdlNativeLLM
+            llm = BigdlNativeLLM(model_path="/path/to/llama/model")
     """
 
 
@@ -281,8 +281,8 @@ class BigdlLLM(LLM):
         Example:
             .. code-block:: python
 
-                from langchain.llms import BigdlLLM
-                llm = BigdlLLM(model_path="/path/to/local/llama/model.bin")
+                from langchain.llms import BigdlNativeLLM
+                llm = BigdlNativeLLM(model_path="/path/to/local/llama/model.bin")
                 llm("This is a prompt.")
         """
         if self.streaming:
@@ -326,8 +326,8 @@ class BigdlLLM(LLM):
         Example:
             .. code-block:: python
 
-                from langchain.llms import BigdlLLM
-                llm = BigdlLLM(
+                from langchain.llms import BigdlNativeLLM
+                llm = BigdlNativeLLM(
                     model_path="/path/to/local/model.bin",
                     temperature = 0.5
                 )
diff --git a/python/llm/src/bigdl/llm/transformers/__init__.py b/python/llm/src/bigdl/llm/transformers/__init__.py
index eaef6320..67bd2474 100644
--- a/python/llm/src/bigdl/llm/transformers/__init__.py
+++ b/python/llm/src/bigdl/llm/transformers/__init__.py
@@ -16,4 +16,4 @@
 
 from .convert import ggml_convert_int4
 from .model import AutoModelForCausalLM, AutoModel
-from .modelling_bigdl import BigdlForCausalLM
+from .modelling_bigdl import BigdlNativeForCausalLM
diff --git a/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py b/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
index 2b067a3b..dc89b573 100644
--- a/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
+++ b/python/llm/src/bigdl/llm/transformers/modelling_bigdl.py
@@ -22,7 +22,7 @@
 from bigdl.llm.utils.common import invalidInputError
 
 
-class BigdlForCausalLM:
+class BigdlNativeForCausalLM:
     """
     A generic model class that mimics the behavior of
     ``transformers.LlamaForCausalLM.from_pretrained`` API
diff --git a/python/llm/test/langchain/test_langchain.py b/python/llm/test/langchain/test_langchain.py
index 1d221225..b333fe64 100644
--- a/python/llm/test/langchain/test_langchain.py
+++ b/python/llm/test/langchain/test_langchain.py
@@ -15,8 +15,8 @@
 #
 
 from bigdl.llm.utils import get_avx_flags
-from bigdl.llm.langchain.embeddings import BigdlLLMEmbeddings
-from bigdl.llm.langchain.llms import BigdlLLM
+from bigdl.llm.langchain.embeddings import BigdlNativeEmbeddings
+from bigdl.llm.langchain.llms import BigdlNativeLLM
 import pytest
 from unittest import TestCase
 import os
@@ -35,7 +35,7 @@ class Test_Models_Basics(TestCase):
 
         
     def test_langchain_llm_embedding_llama(self):
-        bigdl_embeddings = BigdlLLMEmbeddings(
+        bigdl_embeddings = BigdlNativeEmbeddings(
             model_path=self.llama_model_path,
             model_family="llama")
         text = "This is a test document."
@@ -43,7 +43,7 @@ class Test_Models_Basics(TestCase):
         doc_result = bigdl_embeddings.embed_documents([text])
     
     def test_langchain_llm_embedding_gptneox(self):
-        bigdl_embeddings = BigdlLLMEmbeddings(
+        bigdl_embeddings = BigdlNativeEmbeddings(
             model_path=self.gptneox_model_path,
             model_family="gptneox")
         text = "This is a test document."
@@ -51,7 +51,7 @@ class Test_Models_Basics(TestCase):
         doc_result = bigdl_embeddings.embed_documents([text])
         
     def test_langchain_llm_llama(self):
-        llm = BigdlLLM(
+        llm = BigdlNativeLLM(
             model_path=self.llama_model_path, 
             max_tokens=32,
             n_threads=self.n_threads)
@@ -59,7 +59,7 @@ class Test_Models_Basics(TestCase):
         result = llm(question)
         
     def test_langchain_llm_gptneox(self):
-        llm = BigdlLLM(
+        llm = BigdlNativeLLM(
             model_path=self.gptneox_model_path,
             model_family="gptneox", 
             max_tokens=32,
@@ -68,7 +68,7 @@ class Test_Models_Basics(TestCase):
         result = llm(question)
         
     def test_langchain_llm_bloom(self):
-        llm = BigdlLLM(
+        llm = BigdlNativeLLM(
             model_path=self.bloom_model_path, 
             model_family="bloom",
             max_tokens=32,