From 8331875f348fbb4a1b99efdac739a1d86a74c513 Mon Sep 17 00:00:00 2001 From: Guancheng Fu <110874468+gc-fu@users.noreply.github.com> Date: Wed, 27 Nov 2024 10:41:58 +0800 Subject: [PATCH] Fix (#12390) --- docker/llm/serving/xpu/docker/vllm_offline_inference.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/llm/serving/xpu/docker/vllm_offline_inference.py b/docker/llm/serving/xpu/docker/vllm_offline_inference.py index 6ee7598d..6587ae12 100644 --- a/docker/llm/serving/xpu/docker/vllm_offline_inference.py +++ b/docker/llm/serving/xpu/docker/vllm_offline_inference.py @@ -54,6 +54,8 @@ llm = LLM(model="YOUR_MODEL", disable_async_output_proc=True, distributed_executor_backend="ray", max_model_len=2000, + trust_remote_code=True, + block_size=8, max_num_batched_tokens=2000) # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information.