Remove xformers from vLLM-CPU (#9535)

This commit is contained in:
Guancheng Fu 2023-11-27 11:21:25 +08:00 committed by GitHub
parent 2b9c7d2a59
commit b6c3520748
2 changed files with 4 additions and 5 deletions

View file

@ -17,13 +17,12 @@ To run vLLM continuous batching on Intel CPUs, install the dependencies as follo
conda create -n bigdl-vllm python==3.9 conda create -n bigdl-vllm python==3.9
conda activate bigdl-vllm conda activate bigdl-vllm
# Install dependencies # Install dependencies
pip install --pre --upgrade bigdl-llm[all] pip3 install numpy
pip3 install --pre --upgrade bigdl-llm[all]
pip3 install psutil pip3 install psutil
pip3 install sentencepiece # Required for LLaMA tokenizer. pip3 install sentencepiece # Required for LLaMA tokenizer.
pip3 install numpy
pip3 install "torch==2.0.1" pip3 install "torch==2.0.1"
pip3 install "transformers>=4.33.1" # Required for Code Llama. pip3 install "transformers>=4.33.1" # Required for Code Llama.
pip3 install "xformers == 0.0.22"
pip3 install fastapi pip3 install fastapi
pip3 install "uvicorn[standard]" pip3 install "uvicorn[standard]"
pip3 install "pydantic<2" # Required for OpenAI server. pip3 install "pydantic<2" # Required for OpenAI server.

View file

@ -33,7 +33,7 @@
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import torch import torch
from xformers.ops import AttentionBias # from xformers.ops import AttentionBias
from bigdl.llm.vllm.sequence import SequenceData from bigdl.llm.vllm.sequence import SequenceData
from bigdl.llm.vllm.sampling_params import SamplingParams from bigdl.llm.vllm.sampling_params import SamplingParams
@ -74,7 +74,7 @@ class InputMetadata:
# Set during the execution of the first attention op. # Set during the execution of the first attention op.
# TODO(gc): we might want to delete this # TODO(gc): we might want to delete this
self.attn_bias: List[AttentionBias] = [] # self.attn_bias: List[AttentionBias] = []
def __repr__(self) -> str: def __repr__(self) -> str:
# Print only useful metadata. # Print only useful metadata.