Add IPEX_LLM_FORCE_BATCH_FORWARD for vLLM docker image (#13151)
This commit is contained in:
		
							parent
							
								
									5df03ced2c
								
							
						
					
					
						commit
						886c7632b2
					
				
					 1 changed files with 3 additions and 0 deletions
				
			
		| 
						 | 
					@ -186,5 +186,8 @@ RUN set -eux && \
 | 
				
			||||||
    # Re-enable sym_int4
 | 
					    # Re-enable sym_int4
 | 
				
			||||||
    sed -i 's/qtype = ggml_tensor_qtype\["woq_int4"\]/qtype = ggml_tensor_qtype["sym_int4"]/' /usr/local/lib/python3.11/dist-packages/ipex_llm/transformers/convert.py
 | 
					    sed -i 's/qtype = ggml_tensor_qtype\["woq_int4"\]/qtype = ggml_tensor_qtype["sym_int4"]/' /usr/local/lib/python3.11/dist-packages/ipex_llm/transformers/convert.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Set environment variables to force use batch_forward_new()
 | 
				
			||||||
 | 
					ENV IPEX_LLM_FORCE_BATCH_FORWARD=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
WORKDIR /llm/
 | 
					WORKDIR /llm/
 | 
				
			||||||
ENTRYPOINT ["bash", "/llm/start-vllm-service.sh"]
 | 
					ENTRYPOINT ["bash", "/llm/start-vllm-service.sh"]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue