diff --git a/docker/llm/serving/xpu/docker/start-lightweight_serving-service.sh b/docker/llm/serving/xpu/docker/start-lightweight_serving-service.sh index 86e9d56f..b51e4fc3 100644 --- a/docker/llm/serving/xpu/docker/start-lightweight_serving-service.sh +++ b/docker/llm/serving/xpu/docker/start-lightweight_serving-service.sh @@ -1,5 +1,4 @@ cd /llm/lightweight_serving model_path="/llm/models/Llama-2-7b-chat-hf" low_bit="sym_int4" -source /opt/intel/1ccl-wks/setvars.sh python lightweight_serving.py --repo-id-or-model-path $model_path --low-bit $low_bit \ No newline at end of file diff --git a/docker/llm/serving/xpu/docker/vllm_online_benchmark.py b/docker/llm/serving/xpu/docker/vllm_online_benchmark.py index a2a51bf8..2db0ff18 100644 --- a/docker/llm/serving/xpu/docker/vllm_online_benchmark.py +++ b/docker/llm/serving/xpu/docker/vllm_online_benchmark.py @@ -281,6 +281,8 @@ for MAX_CONCURRENT_REQUESTS in [max_batch]: NUM_WARMUP = 2 * MAX_CONCURRENT_REQUESTS NUM_REQUESTS = 5 * MAX_CONCURRENT_REQUESTS # 总请求次数 + # to avoid warm_up time out + benchmark(LLM_URLS, MODEL, PROMPT_1024, 2, 1, 32, is_warmup = True) benchmark(LLM_URLS, MODEL, PROMPT, NUM_WARMUP, MAX_CONCURRENT_REQUESTS, MAX_TOKENS, is_warmup = True) # 运行benchmark diff --git a/python/llm/src/ipex_llm/serving/fastapi/tgi_protocol.py b/python/llm/src/ipex_llm/serving/fastapi/tgi_protocol.py index 63f1ff91..21ca5edf 100644 --- a/python/llm/src/ipex_llm/serving/fastapi/tgi_protocol.py +++ b/python/llm/src/ipex_llm/serving/fastapi/tgi_protocol.py @@ -31,33 +31,3 @@ class Parameters(BaseModel): top_k: Optional[int] = None top_p: Optional[float] = None typical_p: Optional[float] = None - - @field_validator("repetition_penalty") - def valid_repetition_penalty(cls, v): - if v is not None and v <= 0: - invalidInputError(False, "`repetition_penalty` must be strictly positive") - return v - - @field_validator("temperature") - def valid_temp(cls, v): - if v is not None and v <= 0: - invalidInputError(False, "`temperature` must be strictly positive") - return v - - @field_validator("top_k") - def valid_top_k(cls, v): - if v is not None and v <= 0: - invalidInputError(False, "`top_k` must be strictly positive") - return v - - @field_validator("top_p") - def valid_top_p(cls, v): - if v is not None and (v <= 0 or v >= 1.0): - invalidInputError(False, "`top_p` must be > 0.0 and < 1.0") - return v - - @field_validator("typical_p") - def valid_typical_p(cls, v): - if v is not None and (v <= 0 or v >= 1.0): - invalidInputError(False, "`typical_p` must be > 0.0 and < 1.0") - return v