Replace gradio_web_server.patch to adjust webui (#12329)
* replace gradio_web_server.patch to adjust webui * fix patch problem --------- Co-authored-by: ATMxsp01 <shou.xu@intel.com>
This commit is contained in:
parent
7240c283a3
commit
899a30331a
1 changed files with 33 additions and 32 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
--- gradio_web_server.py 2024-06-20 14:21:48.013518726 +0800
|
--- a/gradio_web_server.py
|
||||||
+++ gradio_web_server_new.py 2024-06-20 14:23:09.822830709 +0800
|
+++ b/gradio_web_server_new.py
|
||||||
@@ -9,8 +9,10 @@
|
@@ -9,8 +9,10 @@ import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import requests
|
import requests
|
||||||
@@ -241,7 +243,7 @@
|
@@ -241,7 +243,7 @@ def clear_history(request: gr.Request):
|
||||||
ip = get_ip(request)
|
ip = get_ip(request)
|
||||||
logger.info(f"clear_history. ip: {ip}")
|
logger.info(f"clear_history. ip: {ip}")
|
||||||
state = None
|
state = None
|
||||||
|
|
@ -20,7 +20,7 @@
|
||||||
|
|
||||||
|
|
||||||
def get_ip(request: gr.Request):
|
def get_ip(request: gr.Request):
|
||||||
@@ -354,6 +356,18 @@
|
@@ -354,6 +356,18 @@ def is_limit_reached(model_name, ip):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -30,16 +30,16 @@
|
||||||
+ first_token_latency = "None"
|
+ first_token_latency = "None"
|
||||||
+ next_token_latency = "None"
|
+ next_token_latency = "None"
|
||||||
+ if first_token_time is not None:
|
+ if first_token_time is not None:
|
||||||
+ first_token_latency = str(first_token_time * 1000) + " ms"
|
+ first_token_latency = f"{first_token_time * 1000 :.2f} ms"
|
||||||
+ if next_token_time.size > 0:
|
+ if next_token_time.size > 0:
|
||||||
+ next_token_latency = str(np.mean(next_token_time) * 1000) + " ms"
|
+ next_token_latency = f"{np.mean(next_token_time) * 1000 :.2f} ms"
|
||||||
+ return first_token_latency, next_token_latency
|
+ return first_token_latency, next_token_latency
|
||||||
+
|
+
|
||||||
+
|
+
|
||||||
def bot_response(
|
def bot_response(
|
||||||
state,
|
state,
|
||||||
temperature,
|
temperature,
|
||||||
@@ -372,7 +386,7 @@
|
@@ -372,7 +386,7 @@ def bot_response(
|
||||||
if state.skip_next:
|
if state.skip_next:
|
||||||
# This generate call is skipped due to invalid inputs
|
# This generate call is skipped due to invalid inputs
|
||||||
state.skip_next = False
|
state.skip_next = False
|
||||||
|
|
@ -48,7 +48,7 @@
|
||||||
return
|
return
|
||||||
|
|
||||||
if apply_rate_limit:
|
if apply_rate_limit:
|
||||||
@@ -381,7 +395,7 @@
|
@@ -381,7 +395,7 @@ def bot_response(
|
||||||
error_msg = RATE_LIMIT_MSG + "\n\n" + ret["reason"]
|
error_msg = RATE_LIMIT_MSG + "\n\n" + ret["reason"]
|
||||||
logger.info(f"rate limit reached. ip: {ip}. error_msg: {ret['reason']}")
|
logger.info(f"rate limit reached. ip: {ip}. error_msg: {ret['reason']}")
|
||||||
state.conv.update_last_message(error_msg)
|
state.conv.update_last_message(error_msg)
|
||||||
|
|
@ -57,7 +57,7 @@
|
||||||
return
|
return
|
||||||
|
|
||||||
conv, model_name = state.conv, state.model_name
|
conv, model_name = state.conv, state.model_name
|
||||||
@@ -404,6 +418,10 @@
|
@@ -404,6 +418,10 @@ def bot_response(
|
||||||
yield (
|
yield (
|
||||||
state,
|
state,
|
||||||
state.to_gradio_chatbot(),
|
state.to_gradio_chatbot(),
|
||||||
|
|
@ -68,7 +68,7 @@
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
@@ -444,18 +462,32 @@
|
@@ -444,18 +462,32 @@ def bot_response(
|
||||||
)
|
)
|
||||||
|
|
||||||
conv.update_last_message("▌")
|
conv.update_last_message("▌")
|
||||||
|
|
@ -104,7 +104,7 @@
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
@@ -465,13 +497,14 @@
|
@@ -465,13 +497,14 @@ def bot_response(
|
||||||
return
|
return
|
||||||
output = data["text"].strip()
|
output = data["text"].strip()
|
||||||
conv.update_last_message(output)
|
conv.update_last_message(output)
|
||||||
|
|
@ -121,7 +121,7 @@
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
@@ -484,7 +517,7 @@
|
@@ -484,7 +517,7 @@ def bot_response(
|
||||||
f"{SERVER_ERROR_MSG}\n\n"
|
f"{SERVER_ERROR_MSG}\n\n"
|
||||||
f"(error_code: {ErrorCode.GRADIO_STREAM_UNKNOWN_ERROR}, {e})"
|
f"(error_code: {ErrorCode.GRADIO_STREAM_UNKNOWN_ERROR}, {e})"
|
||||||
)
|
)
|
||||||
|
|
@ -130,7 +130,7 @@
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
disable_btn,
|
disable_btn,
|
||||||
@@ -646,7 +679,8 @@
|
@@ -646,7 +679,8 @@ def build_single_model_ui(models, add_promotion_links=False):
|
||||||
)
|
)
|
||||||
|
|
||||||
notice_markdown = f"""
|
notice_markdown = f"""
|
||||||
|
|
@ -140,34 +140,30 @@
|
||||||
{promotion}
|
{promotion}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -691,6 +725,26 @@
|
@@ -717,6 +751,22 @@ def build_single_model_ui(models, add_promotion_links=False):
|
||||||
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
|
label="Max output tokens",
|
||||||
clear_btn = gr.Button(value="🗑️ Clear history", interactive=False)
|
)
|
||||||
|
|
||||||
+ with gr.Row():
|
+ with gr.Row():
|
||||||
+ with gr.Column():
|
+ with gr.Column():
|
||||||
+ gr.Markdown("### Performance Metrics")
|
+ gr.Markdown("### Performance Metrics")
|
||||||
+ prompt_token = gr.Textbox(
|
+ prompt_token = gr.Label(
|
||||||
+ label="Prompt token length:",
|
+ label="Prompt token length:",
|
||||||
+ interactive=False,
|
|
||||||
+ )
|
+ )
|
||||||
+ next_token = gr.Textbox(
|
+ next_token = gr.Label(
|
||||||
+ label="Generated token length:",
|
+ label="Generated token length:",
|
||||||
+ interactive=False,
|
|
||||||
+ )
|
+ )
|
||||||
+ first_token_latency = gr.Textbox(
|
+ first_token_latency = gr.Label(
|
||||||
+ interactive=False,
|
|
||||||
+ label="First token Latency:",
|
+ label="First token Latency:",
|
||||||
+ )
|
+ )
|
||||||
+ next_token_latency = gr.Textbox(
|
+ next_token_latency = gr.Label(
|
||||||
+ interactive=False,
|
|
||||||
+ label="Next token Latency:",
|
+ label="Next token Latency:",
|
||||||
+ )
|
+ )
|
||||||
+
|
+
|
||||||
with gr.Accordion("Parameters", open=False) as parameter_row:
|
if add_promotion_links:
|
||||||
temperature = gr.Slider(
|
gr.Markdown(acknowledgment_md, elem_id="ack_markdown")
|
||||||
minimum=0.0,
|
|
||||||
@@ -743,9 +797,9 @@
|
@@ -743,9 +793,9 @@ def build_single_model_ui(models, add_promotion_links=False):
|
||||||
).then(
|
).then(
|
||||||
bot_response,
|
bot_response,
|
||||||
[state, temperature, top_p, max_output_tokens],
|
[state, temperature, top_p, max_output_tokens],
|
||||||
|
|
@ -179,7 +175,7 @@
|
||||||
|
|
||||||
model_selector.change(
|
model_selector.change(
|
||||||
clear_history, None, [state, chatbot, textbox, imagebox] + btn_list
|
clear_history, None, [state, chatbot, textbox, imagebox] + btn_list
|
||||||
@@ -758,7 +812,7 @@
|
@@ -758,7 +808,7 @@ def build_single_model_ui(models, add_promotion_links=False):
|
||||||
).then(
|
).then(
|
||||||
bot_response,
|
bot_response,
|
||||||
[state, temperature, top_p, max_output_tokens],
|
[state, temperature, top_p, max_output_tokens],
|
||||||
|
|
@ -188,7 +184,7 @@
|
||||||
)
|
)
|
||||||
send_btn.click(
|
send_btn.click(
|
||||||
add_text,
|
add_text,
|
||||||
@@ -767,7 +821,7 @@
|
@@ -767,7 +817,7 @@ def build_single_model_ui(models, add_promotion_links=False):
|
||||||
).then(
|
).then(
|
||||||
bot_response,
|
bot_response,
|
||||||
[state, temperature, top_p, max_output_tokens],
|
[state, temperature, top_p, max_output_tokens],
|
||||||
|
|
@ -197,7 +193,7 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
return [state, model_selector]
|
return [state, model_selector]
|
||||||
@@ -775,7 +829,7 @@
|
@@ -775,7 +825,7 @@ def build_single_model_ui(models, add_promotion_links=False):
|
||||||
|
|
||||||
def build_demo(models):
|
def build_demo(models):
|
||||||
with gr.Blocks(
|
with gr.Blocks(
|
||||||
|
|
@ -206,3 +202,8 @@
|
||||||
theme=gr.themes.Default(),
|
theme=gr.themes.Default(),
|
||||||
css=block_css,
|
css=block_css,
|
||||||
) as demo:
|
) as demo:
|
||||||
|
@@ -885,3 +935,4 @@ if __name__ == "__main__":
|
||||||
|
auth=auth,
|
||||||
|
root_path=args.gradio_root_path,
|
||||||
|
)
|
||||||
|
+
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue