Remove xformers from vLLM-CPU (#9535)
This commit is contained in:
		
							parent
							
								
									2b9c7d2a59
								
							
						
					
					
						commit
						b6c3520748
					
				
					 2 changed files with 4 additions and 5 deletions
				
			
		| 
						 | 
					@ -17,13 +17,12 @@ To run vLLM continuous batching on Intel CPUs, install the dependencies as follo
 | 
				
			||||||
conda create -n bigdl-vllm python==3.9
 | 
					conda create -n bigdl-vllm python==3.9
 | 
				
			||||||
conda activate bigdl-vllm
 | 
					conda activate bigdl-vllm
 | 
				
			||||||
# Install dependencies
 | 
					# Install dependencies
 | 
				
			||||||
pip install --pre --upgrade bigdl-llm[all]
 | 
					pip3 install numpy
 | 
				
			||||||
 | 
					pip3 install --pre --upgrade bigdl-llm[all]
 | 
				
			||||||
pip3 install psutil
 | 
					pip3 install psutil
 | 
				
			||||||
pip3 install sentencepiece  # Required for LLaMA tokenizer.
 | 
					pip3 install sentencepiece  # Required for LLaMA tokenizer.
 | 
				
			||||||
pip3 install numpy
 | 
					 | 
				
			||||||
pip3 install "torch==2.0.1"
 | 
					pip3 install "torch==2.0.1"
 | 
				
			||||||
pip3 install "transformers>=4.33.1"  # Required for Code Llama.
 | 
					pip3 install "transformers>=4.33.1"  # Required for Code Llama.
 | 
				
			||||||
pip3 install "xformers == 0.0.22"
 | 
					 | 
				
			||||||
pip3 install fastapi
 | 
					pip3 install fastapi
 | 
				
			||||||
pip3 install "uvicorn[standard]"
 | 
					pip3 install "uvicorn[standard]"
 | 
				
			||||||
pip3 install "pydantic<2"  # Required for OpenAI server.
 | 
					pip3 install "pydantic<2"  # Required for OpenAI server.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,7 +33,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from typing import Dict, List, Optional, Tuple
 | 
					from typing import Dict, List, Optional, Tuple
 | 
				
			||||||
import torch
 | 
					import torch
 | 
				
			||||||
from xformers.ops import AttentionBias
 | 
					# from xformers.ops import AttentionBias
 | 
				
			||||||
from bigdl.llm.vllm.sequence import SequenceData
 | 
					from bigdl.llm.vllm.sequence import SequenceData
 | 
				
			||||||
from bigdl.llm.vllm.sampling_params import SamplingParams
 | 
					from bigdl.llm.vllm.sampling_params import SamplingParams
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -74,7 +74,7 @@ class InputMetadata:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Set during the execution of the first attention op.
 | 
					        # Set during the execution of the first attention op.
 | 
				
			||||||
        # TODO(gc): we might want to delete this
 | 
					        # TODO(gc): we might want to delete this
 | 
				
			||||||
        self.attn_bias: List[AttentionBias] = []
 | 
					        # self.attn_bias: List[AttentionBias] = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __repr__(self) -> str:
 | 
					    def __repr__(self) -> str:
 | 
				
			||||||
        # Print only useful metadata.
 | 
					        # Print only useful metadata.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue