Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/thin-mails-behave.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@gradio/core": minor
"gradio": minor
---

feat:Display performance metrics for API/MCP requests in View API page
6 changes: 5 additions & 1 deletion gradio/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,11 @@ async def get_complete_schema(self, request) -> JSONResponse:
if hasattr(block_fn.fn, "_mcp_type"):
mcp_type = block_fn.fn._mcp_type

meta = {"file_data_present": file_data_present, "mcp_type": mcp_type}
meta = {
"file_data_present": file_data_present,
"mcp_type": mcp_type,
"endpoint_name": block_fn.api_name,
}
if required_headers:
meta["headers"] = required_headers

Expand Down
43 changes: 43 additions & 0 deletions gradio/queueing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from typing import TYPE_CHECKING, Literal, cast

import fastapi
import numpy as np
import pandas as pd
from anyio.to_thread import run_sync

from gradio import route_utils, routes, wasm_utils
from gradio.data_classes import (
Expand Down Expand Up @@ -137,6 +140,44 @@ def __init__(
default_concurrency_limit
)
self.event_analytics: dict[str, dict[str, float | str | None]] = {}
self.cached_event_analytics_summary = {"functions": {}}
self.event_count_at_last_cache = 0
self.ANAYLTICS_CACHE_FREQUENCY = int(
os.getenv("GRADIO_ANALYTICS_CACHE_FREQUENCY", "1")
)

def compute_analytics_summary(self, event_analytics):
if (
len(event_analytics) - self.event_count_at_last_cache
>= self.ANAYLTICS_CACHE_FREQUENCY
):
with pd.option_context("future.no_silent_downcasting", True):
df = (
pd.DataFrame(list(event_analytics.values()))
.fillna(value=np.nan)
.infer_objects(copy=False)
) # type: ignore
self.event_count_at_last_cache = len(event_analytics)
grouped = df.groupby("function")
metrics = {"functions": {}}
for fn_name, fn_df in grouped:
status = fn_df["status"].values
success = np.sum(status == "success")
failure = np.sum(status == "failed")
total = success + failure
success_rate = success / total if total > 0 else None
percentiles = np.percentile(fn_df["process_time"].values, [50, 90, 99]) # type: ignore
metrics["functions"][fn_name] = {
"success_rate": success_rate,
"process_time_percentiles": {
"50th": percentiles[0], # type: ignore
"90th": percentiles[1], # type: ignore
"99th": percentiles[2], # type: ignore
},
"total_requests": fn_df.shape[0],
}
self.cached_event_analytics_summary = metrics
return self.cached_event_analytics_summary

def start(self):
self.active_jobs = [None] * self.max_thread_count
Expand Down Expand Up @@ -654,6 +695,7 @@ async def process_events(
success=False,
),
)
await run_sync(self.compute_analytics_summary, self.event_analytics)
if response and response.get("is_generating", False):
old_response = response
old_err = err
Expand Down Expand Up @@ -799,6 +841,7 @@ async def process_events(
)
else:
self.event_analytics[event._id]["status"] = "cancelled"
await run_sync(self.compute_analytics_summary, self.event_analytics)

async def reset_iterators(self, event_id: str):
# Do the same thing as the /reset route
Expand Down
4 changes: 4 additions & 0 deletions gradio/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1835,6 +1835,10 @@ async def analytics_login(request: fastapi.Request):
print(f"* Monitoring URL: {monitoring_url} *")
return HTMLResponse("See console for monitoring URL.")

@app.get("/monitoring/summary")
async def _():
return app.get_blocks()._queue.cached_event_analytics_summary

@app.get("/monitoring/{key}")
async def analytics_dashboard(key: str):
if not blocks.enable_monitoring:
Expand Down
27 changes: 24 additions & 3 deletions js/core/src/api_docs/ApiDocs.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
};

let js_info: Record<string, any>;
let analytics: Record<string, any>;

get_info().then((data) => {
info = data;
Expand All @@ -108,6 +109,18 @@
js_info = js_api_info;
});

async function get_summary(): Promise<{
functions: any;
}> {
let response = await fetch(root.replace(/\/$/, "") + "/monitoring/summary");
let data = await response.json();
return data;
}

get_summary().then((summary) => {
analytics = summary.functions;
});

const dispatch = createEventDispatcher();

$: selected_tools_array = Array.from(selected_tools);
Expand Down Expand Up @@ -148,6 +161,7 @@
meta: {
mcp_type: "tool" | "resource" | "prompt";
file_data_present: boolean;
endpoint_name: string;
};
}

Expand Down Expand Up @@ -192,7 +206,8 @@
description: tool.description || "",
parameters: tool.inputSchema?.properties || {},
meta: tool.meta,
expanded: false
expanded: false,
endpoint_name: tool.endpoint_name
}));
selected_tools = new Set(tools.map((tool) => tool.name));
headers = schema.map((tool: any) => tool.meta?.headers || []).flat();
Expand Down Expand Up @@ -260,6 +275,9 @@
}

onMount(() => {
const controller = new AbortController();
const signal = controller.signal;

document.body.style.overflow = "hidden";
if ("parentIFrame" in window) {
window.parentIFrame?.scrollTo(0, 0);
Expand All @@ -271,7 +289,7 @@
}

// Check MCP server status and fetch tools if active
fetch(mcp_server_url)
fetch(mcp_server_url, { signal: signal })
.then((response) => {
mcp_server_active = response.ok;
if (mcp_server_active) {
Expand All @@ -284,6 +302,7 @@
current_language = "python";
}
}
controller.abort();
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fetching the mcp_server_url opens a persistent connection. That means opening the view api page 5 or more times from the same browser session causes the whole page to freeze because we reach the 5 concurrent connection limit. So I changed it to close the connection immediately.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, interesting

})
.catch(() => {
mcp_server_active = false;
Expand All @@ -295,7 +314,7 @@
});
</script>

{#if info}
{#if info && analytics}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are no analytics, the entire api docs will not be visible?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. Analytics will always be an object once the request completes. So this is just so that the page is not visible while there are requests in flight. But I will verify jic.

{#if api_count}
<div class="banner-wrap">
<ApiBanner
Expand Down Expand Up @@ -385,6 +404,7 @@
{mcp_json_stdio}
{file_data_present}
{mcp_docs}
{analytics}
/>
{:else}
1. Confirm that you have cURL installed on your system.
Expand Down Expand Up @@ -461,6 +481,7 @@
api_description={info.named_endpoints[
"/" + dependency.api_name
].description}
{analytics}
/>

<ParametersSnippet
Expand Down
74 changes: 38 additions & 36 deletions js/core/src/api_docs/CodeSnippet.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
export let username: string | null;
export let current_language: "python" | "javascript" | "bash";
export let api_description: string | null = null;
export let analytics: Record<string, any>;

let python_code: HTMLElement;
let js_code: HTMLElement;
Expand All @@ -44,6 +45,7 @@
<EndpointDetail
api_name={dependency.api_name}
description={api_description}
{analytics}
/>
{#if current_language === "python"}
<Block>
Expand All @@ -56,13 +58,13 @@
class="highlight">import</span
> Client{#if has_file_path}, handle_file{/if}

client = Client(<span class="token string">"{space_id || root}"</span
client = Client(<span class="token string">"{space_id || root}"</span
>{#if username !== null}, auth=("{username}", **password**){/if})
result = client.<span class="highlight">predict</span
result = client.<span class="highlight">predict</span
>(<!--
-->{#each endpoint_parameters as { python_type, example_input, parameter_name, parameter_has_default, parameter_default }, i}<!--
-->
{parameter_name
-->{#each endpoint_parameters as { python_type, example_input, parameter_name, parameter_has_default, parameter_default }, i}<!--
-->
{parameter_name
? parameter_name + "="
: ""}<span
>{represent_value(
Expand All @@ -72,11 +74,11 @@ result = client.<span class="highlight">predict</span
)}</span
>,{/each}<!--

-->
api_name=<span class="api-name">"/{dependency.api_name}"</span><!--
-->
)
<span class="highlight">print</span>(result)</pre>
-->
api_name=<span class="api-name">"/{dependency.api_name}"</span><!--
-->
)
<span class="highlight">print</span>(result)</pre>
</div>
</code>
</Block>
Expand All @@ -88,44 +90,44 @@ result = client.<span class="highlight">predict</span
</div>
<div bind:this={js_code}>
<pre>import &lbrace; Client &rbrace; from "@gradio/client";
{#each blob_examples as { component, example_input }, i}<!--
-->
const response_{i} = await fetch("{example_input.url}");
const example{component} = await response_{i}.blob();
{#each blob_examples as { component, example_input }, i}<!--
-->
const response_{i} = await fetch("{example_input.url}");
const example{component} = await response_{i}.blob();
{/each}<!--
-->
const client = await Client.connect(<span class="token string"
-->
const client = await Client.connect(<span class="token string"
>"{space_id || root}"</span
>{#if username !== null}, &lbrace;auth: ["{username}", **password**]&rbrace;{/if});
const result = await client.predict(<span class="api-name"
const result = await client.predict(<span class="api-name"
>"/{dependency.api_name}"</span
>, &lbrace; <!--
-->{#each endpoint_parameters as { label, parameter_name, type, python_type, component, example_input, serializer }, i}<!--
-->{#if blob_components.includes(component)}<!--
-->
<span
-->{#each endpoint_parameters as { label, parameter_name, type, python_type, component, example_input, serializer }, i}<!--
-->{#if blob_components.includes(component)}<!--
-->
<span
class="example-inputs"
>{parameter_name}: example{component}</span
>, <!--
--><span class="desc"><!--
--></span
--><span class="desc"><!--
--></span
><!--
-->{:else}<!--
-->
<span class="example-inputs"
-->{:else}<!--
-->
<span class="example-inputs"
>{parameter_name}: {represent_value(
example_input,
python_type.type,
"js"
)}</span
>, <!--
--><!--
-->{/if}
--><!--
-->{/if}
{/each}
&rbrace;);
&rbrace;);

console.log(result.data);
</pre>
console.log(result.data);
</pre>
</div>
</code>
</Block>
Expand All @@ -138,18 +140,18 @@ console.log(result.data);

<div bind:this={bash_post_code}>
<pre>curl -X POST {normalised_root}{normalised_api_prefix}/call/{dependency.api_name} -s -H "Content-Type: application/json" -d '{"{"}
"data": [{#each endpoint_parameters as { label, parameter_name, type, python_type, component, example_input, serializer }, i}
"data": [{#each endpoint_parameters as { label, parameter_name, type, python_type, component, example_input, serializer }, i}
<!--
-->{represent_value(
-->{represent_value(
example_input,
python_type.type,
"bash"
)}{#if i < endpoint_parameters.length - 1},
{/if}
{/each}
]{"}"}' \
| awk -F'"' '{"{"} print $4{"}"}' \
| read EVENT_ID; curl -N {normalised_root}{normalised_api_prefix}/call/{dependency.api_name}/$EVENT_ID</pre>
]{"}"}' \
| awk -F'"' '{"{"} print $4{"}"}' \
| read EVENT_ID; curl -N {normalised_root}{normalised_api_prefix}/call/{dependency.api_name}/$EVENT_ID</pre>
</div>
</code>
</Block>
Expand Down
25 changes: 24 additions & 1 deletion js/core/src/api_docs/EndpointDetail.svelte
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
<script lang="ts">
export let api_name: string | null = null;
export let description: string | null = null;
export let analytics: Record<string, any>;
import { format_latency, get_color_from_success_rate } from "./utils";

const success_rate = api_name ? analytics[api_name]?.success_rate : 0;
const color = get_color_from_success_rate(success_rate);
</script>

<h3>
API name:
<span class="post">{"/" + api_name}</span>
<span class="desc">{description}</span>
{#if analytics && api_name && analytics[api_name]}
<span class="analytics">
Total requests: {analytics[api_name].total_requests} (<span style={color}
>{Math.round(success_rate * 100)}%</span
>
successful) &nbsp;|&nbsp; p50/p90/p99:
{format_latency(analytics[api_name].process_time_percentiles["50th"])}
/
{format_latency(analytics[api_name].process_time_percentiles["90th"])}
/
{format_latency(analytics[api_name].process_time_percentiles["99th"])}
</span>
{/if}
</h3>

<style>
Expand All @@ -28,8 +46,13 @@
font-weight: var(--weight-semibold);
}

.desc {
.analytics {
color: var(--body-text-color-subdued);
margin-top: var(--size-1);
}

.desc {
color: var(--body-text-color);
font-size: var(--text-lg);
margin-top: var(--size-1);
}
Expand Down
Loading
Loading