diff --git a/python/llm/setup.py b/python/llm/setup.py
index fa5af247..fba5d17e 100644
--- a/python/llm/setup.py
+++ b/python/llm/setup.py
@@ -299,8 +299,7 @@ def setup_package():
                         "intel_extension_for_pytorch==2.1.10+xpu",
                         "bigdl-core-xe-21==" + CORE_XE_VERSION,
                         "bigdl-core-xe-batch-21==" + CORE_XE_VERSION,
-                        "bigdl-core-xe-addons-21==" + CORE_XE_VERSION,
-                        "bigdl-core-xe-esimd-21==" + CORE_XE_VERSION]
+                        "bigdl-core-xe-addons-21==" + CORE_XE_VERSION]
     xpu_21_requires += oneapi_2024_0_requires
     # default to ipex 2.1 for linux and windows
     xpu_requires = copy.deepcopy(xpu_21_requires)
diff --git a/python/llm/src/ipex_llm/transformers/low_bit_linear.py b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
index 14a25fe0..69ae91f1 100644
--- a/python/llm/src/ipex_llm/transformers/low_bit_linear.py
+++ b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
@@ -720,8 +720,7 @@ class LowBitLinear(nn.Linear):
                     if use_batch_forward(x_2d, self.weight.qtype, self.out_len):
                         import xe_batch
                         result = xe_batch.batch_forward(x_2d, self.weight.data,
-                                                        self.weight.qtype,
-                                                        input_seq_size)
+                                                        self.weight.qtype)
                     else:
                         result = xe_linear.forward_new(x_2d, self.weight.data, self.weight.qtype,
                                                        input_seq_size)
@@ -730,8 +729,7 @@ class LowBitLinear(nn.Linear):
                     if use_batch_forward(x_2d, self.weight.qtype, self.out_len):
                         import xe_batch
                         result = xe_batch.batch_forward(x_2d, self.weight.data,
-                                                        self.weight.qtype,
-                                                        input_seq_size)
+                                                        self.weight.qtype)
                     else:
                         result = xe_linear.forward_new(x_2d, self.weight.data, self.weight.qtype,
                                                        input_seq_size)
@@ -843,13 +841,6 @@ class FP16Linear(nn.Linear):
             if x_2d.is_contiguous() is False:
                 x_2d = x_2d.contiguous()
 
-            try:
-                import intel_extension_for_pytorch
-                import linear_fp16_esimd
-            except ModuleNotFoundError:
-                invalidInputError(False,
-                                  "Please `pip install bigdl_core_xe_esimd` first.")
-
             if x_2d.shape[0] > 8:
                 # first token or batch size > 8, re-convert weight
                 if self.weight_type == 3:
@@ -861,7 +852,9 @@ class FP16Linear(nn.Linear):
                     result = F.linear(x_2d, self.weight)
             else:
                 # batch size <= 8, use esimd optimization
-                result = linear_fp16_esimd.forward(x_2d, self.weight.data)
+                import xe_batch
+                result = xe_batch.batch_forward(x_2d, self.weight.data,
+                                                self.qtype)
 
             new_shape = x_shape[:-1] + (self.out_len,)
             result = result.view(new_shape)