Replace gradio_web_server.patch to adjust webui (#12329)

* replace gradio_web_server.patch to adjust webui

* fix patch problem

---------

Co-authored-by: ATMxsp01 <shou.xu@intel.com>
This commit is contained in:
Xu, Shuo 2024-11-06 09:16:32 +08:00 committed by GitHub
parent 7240c283a3
commit 899a30331a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,6 +1,6 @@
--- gradio_web_server.py 2024-06-20 14:21:48.013518726 +0800 --- a/gradio_web_server.py
+++ gradio_web_server_new.py 2024-06-20 14:23:09.822830709 +0800 +++ b/gradio_web_server_new.py
@@ -9,8 +9,10 @@ @@ -9,8 +9,10 @@ import hashlib
import json import json
import os import os
import random import random
@ -11,7 +11,7 @@
import gradio as gr import gradio as gr
import requests import requests
@@ -241,7 +243,7 @@ @@ -241,7 +243,7 @@ def clear_history(request: gr.Request):
ip = get_ip(request) ip = get_ip(request)
logger.info(f"clear_history. ip: {ip}") logger.info(f"clear_history. ip: {ip}")
state = None state = None
@ -20,7 +20,7 @@
def get_ip(request: gr.Request): def get_ip(request: gr.Request):
@@ -354,6 +356,18 @@ @@ -354,6 +356,18 @@ def is_limit_reached(model_name, ip):
return None return None
@ -30,16 +30,16 @@
+ first_token_latency = "None" + first_token_latency = "None"
+ next_token_latency = "None" + next_token_latency = "None"
+ if first_token_time is not None: + if first_token_time is not None:
+ first_token_latency = str(first_token_time * 1000) + " ms" + first_token_latency = f"{first_token_time * 1000 :.2f} ms"
+ if next_token_time.size > 0: + if next_token_time.size > 0:
+ next_token_latency = str(np.mean(next_token_time) * 1000) + " ms" + next_token_latency = f"{np.mean(next_token_time) * 1000 :.2f} ms"
+ return first_token_latency, next_token_latency + return first_token_latency, next_token_latency
+ +
+ +
def bot_response( def bot_response(
state, state,
temperature, temperature,
@@ -372,7 +386,7 @@ @@ -372,7 +386,7 @@ def bot_response(
if state.skip_next: if state.skip_next:
# This generate call is skipped due to invalid inputs # This generate call is skipped due to invalid inputs
state.skip_next = False state.skip_next = False
@ -48,7 +48,7 @@
return return
if apply_rate_limit: if apply_rate_limit:
@@ -381,7 +395,7 @@ @@ -381,7 +395,7 @@ def bot_response(
error_msg = RATE_LIMIT_MSG + "\n\n" + ret["reason"] error_msg = RATE_LIMIT_MSG + "\n\n" + ret["reason"]
logger.info(f"rate limit reached. ip: {ip}. error_msg: {ret['reason']}") logger.info(f"rate limit reached. ip: {ip}. error_msg: {ret['reason']}")
state.conv.update_last_message(error_msg) state.conv.update_last_message(error_msg)
@ -57,7 +57,7 @@
return return
conv, model_name = state.conv, state.model_name conv, model_name = state.conv, state.model_name
@@ -404,6 +418,10 @@ @@ -404,6 +418,10 @@ def bot_response(
yield ( yield (
state, state,
state.to_gradio_chatbot(), state.to_gradio_chatbot(),
@ -68,7 +68,7 @@
disable_btn, disable_btn,
disable_btn, disable_btn,
disable_btn, disable_btn,
@@ -444,18 +462,32 @@ @@ -444,18 +462,32 @@ def bot_response(
) )
conv.update_last_message("▌") conv.update_last_message("▌")
@ -104,7 +104,7 @@
disable_btn, disable_btn,
disable_btn, disable_btn,
disable_btn, disable_btn,
@@ -465,13 +497,14 @@ @@ -465,13 +497,14 @@ def bot_response(
return return
output = data["text"].strip() output = data["text"].strip()
conv.update_last_message(output) conv.update_last_message(output)
@ -121,7 +121,7 @@
disable_btn, disable_btn,
disable_btn, disable_btn,
disable_btn, disable_btn,
@@ -484,7 +517,7 @@ @@ -484,7 +517,7 @@ def bot_response(
f"{SERVER_ERROR_MSG}\n\n" f"{SERVER_ERROR_MSG}\n\n"
f"(error_code: {ErrorCode.GRADIO_STREAM_UNKNOWN_ERROR}, {e})" f"(error_code: {ErrorCode.GRADIO_STREAM_UNKNOWN_ERROR}, {e})"
) )
@ -130,7 +130,7 @@
disable_btn, disable_btn,
disable_btn, disable_btn,
disable_btn, disable_btn,
@@ -646,7 +679,8 @@ @@ -646,7 +679,8 @@ def build_single_model_ui(models, add_promotion_links=False):
) )
notice_markdown = f""" notice_markdown = f"""
@ -140,34 +140,30 @@
{promotion} {promotion}
""" """
@@ -691,6 +725,26 @@ @@ -717,6 +751,22 @@ def build_single_model_ui(models, add_promotion_links=False):
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False) label="Max output tokens",
clear_btn = gr.Button(value="🗑️ Clear history", interactive=False) )
+ with gr.Row(): + with gr.Row():
+ with gr.Column(): + with gr.Column():
+ gr.Markdown("### Performance Metrics") + gr.Markdown("### Performance Metrics")
+ prompt_token = gr.Textbox( + prompt_token = gr.Label(
+ label="Prompt token length:", + label="Prompt token length:",
+ interactive=False,
+ ) + )
+ next_token = gr.Textbox( + next_token = gr.Label(
+ label="Generated token length:", + label="Generated token length:",
+ interactive=False,
+ ) + )
+ first_token_latency = gr.Textbox( + first_token_latency = gr.Label(
+ interactive=False,
+ label="First token Latency:", + label="First token Latency:",
+ ) + )
+ next_token_latency = gr.Textbox( + next_token_latency = gr.Label(
+ interactive=False,
+ label="Next token Latency:", + label="Next token Latency:",
+ ) + )
+ +
with gr.Accordion("Parameters", open=False) as parameter_row: if add_promotion_links:
temperature = gr.Slider( gr.Markdown(acknowledgment_md, elem_id="ack_markdown")
minimum=0.0,
@@ -743,9 +797,9 @@ @@ -743,9 +793,9 @@ def build_single_model_ui(models, add_promotion_links=False):
).then( ).then(
bot_response, bot_response,
[state, temperature, top_p, max_output_tokens], [state, temperature, top_p, max_output_tokens],
@ -179,7 +175,7 @@
model_selector.change( model_selector.change(
clear_history, None, [state, chatbot, textbox, imagebox] + btn_list clear_history, None, [state, chatbot, textbox, imagebox] + btn_list
@@ -758,7 +812,7 @@ @@ -758,7 +808,7 @@ def build_single_model_ui(models, add_promotion_links=False):
).then( ).then(
bot_response, bot_response,
[state, temperature, top_p, max_output_tokens], [state, temperature, top_p, max_output_tokens],
@ -188,7 +184,7 @@
) )
send_btn.click( send_btn.click(
add_text, add_text,
@@ -767,7 +821,7 @@ @@ -767,7 +817,7 @@ def build_single_model_ui(models, add_promotion_links=False):
).then( ).then(
bot_response, bot_response,
[state, temperature, top_p, max_output_tokens], [state, temperature, top_p, max_output_tokens],
@ -197,7 +193,7 @@
) )
return [state, model_selector] return [state, model_selector]
@@ -775,7 +829,7 @@ @@ -775,7 +825,7 @@ def build_single_model_ui(models, add_promotion_links=False):
def build_demo(models): def build_demo(models):
with gr.Blocks( with gr.Blocks(
@ -206,3 +202,8 @@
theme=gr.themes.Default(), theme=gr.themes.Default(),
css=block_css, css=block_css,
) as demo: ) as demo:
@@ -885,3 +935,4 @@ if __name__ == "__main__":
auth=auth,
root_path=args.gradio_root_path,
)
+