Remove xformers from vLLM-CPU (#9535)
This commit is contained in:
parent
2b9c7d2a59
commit
b6c3520748
2 changed files with 4 additions and 5 deletions
|
|
@ -17,13 +17,12 @@ To run vLLM continuous batching on Intel CPUs, install the dependencies as follo
|
||||||
conda create -n bigdl-vllm python==3.9
|
conda create -n bigdl-vllm python==3.9
|
||||||
conda activate bigdl-vllm
|
conda activate bigdl-vllm
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
pip install --pre --upgrade bigdl-llm[all]
|
pip3 install numpy
|
||||||
|
pip3 install --pre --upgrade bigdl-llm[all]
|
||||||
pip3 install psutil
|
pip3 install psutil
|
||||||
pip3 install sentencepiece # Required for LLaMA tokenizer.
|
pip3 install sentencepiece # Required for LLaMA tokenizer.
|
||||||
pip3 install numpy
|
|
||||||
pip3 install "torch==2.0.1"
|
pip3 install "torch==2.0.1"
|
||||||
pip3 install "transformers>=4.33.1" # Required for Code Llama.
|
pip3 install "transformers>=4.33.1" # Required for Code Llama.
|
||||||
pip3 install "xformers == 0.0.22"
|
|
||||||
pip3 install fastapi
|
pip3 install fastapi
|
||||||
pip3 install "uvicorn[standard]"
|
pip3 install "uvicorn[standard]"
|
||||||
pip3 install "pydantic<2" # Required for OpenAI server.
|
pip3 install "pydantic<2" # Required for OpenAI server.
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@
|
||||||
|
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
import torch
|
import torch
|
||||||
from xformers.ops import AttentionBias
|
# from xformers.ops import AttentionBias
|
||||||
from bigdl.llm.vllm.sequence import SequenceData
|
from bigdl.llm.vllm.sequence import SequenceData
|
||||||
from bigdl.llm.vllm.sampling_params import SamplingParams
|
from bigdl.llm.vllm.sampling_params import SamplingParams
|
||||||
|
|
||||||
|
|
@ -74,7 +74,7 @@ class InputMetadata:
|
||||||
|
|
||||||
# Set during the execution of the first attention op.
|
# Set during the execution of the first attention op.
|
||||||
# TODO(gc): we might want to delete this
|
# TODO(gc): we might want to delete this
|
||||||
self.attn_bias: List[AttentionBias] = []
|
# self.attn_bias: List[AttentionBias] = []
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
# Print only useful metadata.
|
# Print only useful metadata.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue