From 26adb82ee3a88dc634a58fdbc6fd6661f8777f27 Mon Sep 17 00:00:00 2001
From: Yuwen Hu <54161268+Oscilloscope98@users.noreply.github.com>
Date: Mon, 2 Dec 2024 18:26:07 +0800
Subject: [PATCH] [NPU] Remove hard code (#12479)

---
 python/llm/src/ipex_llm/transformers/npu_models/convert.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py
index 3b076e21..e76619c7 100644
--- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py
+++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py
@@ -321,9 +321,6 @@ def generate(
     new_tokens = new_generate_kwargs['max_new_tokens']
     invalidInputError(input_length + new_tokens <= self.kv_len + 1,
                       "Input plus output tokens should not exceed max_context_len.")
-    # TODO: may optimize this part later
-    invalidInputError(new_tokens < 1024,
-                      f"Generated tokens ({new_tokens}) exceed named pipeline limitation.")
 
     if "eos_token_id" not in new_generate_kwargs:
         generation_config = GenerationConfig.from_model_config(self.config)