diff --git a/python/llm/example/CPU/vLLM-Serving/README.md b/python/llm/example/CPU/vLLM-Serving/README.md
index af619562..44162d30 100644
--- a/python/llm/example/CPU/vLLM-Serving/README.md
+++ b/python/llm/example/CPU/vLLM-Serving/README.md
@@ -17,13 +17,12 @@ To run vLLM continuous batching on Intel CPUs, install the dependencies as follo
 conda create -n bigdl-vllm python==3.9
 conda activate bigdl-vllm
 # Install dependencies
-pip install --pre --upgrade bigdl-llm[all]
+pip3 install numpy
+pip3 install --pre --upgrade bigdl-llm[all]
 pip3 install psutil
 pip3 install sentencepiece  # Required for LLaMA tokenizer.
-pip3 install numpy
 pip3 install "torch==2.0.1"
 pip3 install "transformers>=4.33.1"  # Required for Code Llama.
-pip3 install "xformers == 0.0.22"
 pip3 install fastapi
 pip3 install "uvicorn[standard]"
 pip3 install "pydantic<2"  # Required for OpenAI server.
diff --git a/python/llm/src/bigdl/llm/vllm/model_executor/input_metadata.py b/python/llm/src/bigdl/llm/vllm/model_executor/input_metadata.py
index 9d88fcd4..0a7a24e5 100644
--- a/python/llm/src/bigdl/llm/vllm/model_executor/input_metadata.py
+++ b/python/llm/src/bigdl/llm/vllm/model_executor/input_metadata.py
@@ -33,7 +33,7 @@
 
 from typing import Dict, List, Optional, Tuple
 import torch
-from xformers.ops import AttentionBias
+# from xformers.ops import AttentionBias
 from bigdl.llm.vllm.sequence import SequenceData
 from bigdl.llm.vllm.sampling_params import SamplingParams
 
@@ -74,7 +74,7 @@ class InputMetadata:
 
         # Set during the execution of the first attention op.
         # TODO(gc): we might want to delete this
-        self.attn_bias: List[AttentionBias] = []
+        # self.attn_bias: List[AttentionBias] = []
 
     def __repr__(self) -> str:
         # Print only useful metadata.