ui-svelte: fix histogram calculation (#695)

- Fix the histogram calculation to use server provided generation
tokens/second.
- Move histogram to Activities page where it can exist with the rest of
the token metrics

Fixes #681
This commit is contained in:
Benson Wong
2026-04-22 23:42:39 -07:00
committed by GitHub
parent 5938dbee8f
commit 0b31ccacc1
11 changed files with 365 additions and 238 deletions
+2 -11
View File
@@ -19,9 +19,6 @@ jobs:
run-tests:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ui-svelte
steps:
- uses: actions/checkout@v4
@@ -32,11 +29,5 @@ jobs:
cache: 'npm'
cache-dependency-path: ui-svelte/package-lock.json
- name: Install dependencies
run: npm ci
- name: Type check
run: npm run check
- name: Run tests
run: npm test
- name: Run UI tests
run: make test-ui
+1
View File
@@ -24,6 +24,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
- Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
- Use `make test-all` before completing work. This includes long running concurrency tests.
- Use `make test-ui` after making changes to the UI in ui-svelte/
### Commit message example format:
+4 -1
View File
@@ -97,6 +97,9 @@ wol-proxy: $(BUILD_DIR)
@echo "Building wol-proxy"
go build -o $(BUILD_DIR)/wol-proxy-$(GOOS)-$(GOARCH)-$(shell date +%Y-%m-%d) cmd/wol-proxy/wol-proxy.go
test-ui:
cd ui-svelte && npm ci && npm run check && npm test
# Phony targets
.PHONY: all clean ui mac windows simple-responder simple-responder-windows test test-all test-dev wol-proxy
.PHONY: all clean ui mac windows simple-responder simple-responder-windows test test-all test-dev test-ui wol-proxy
.PHONE: linux linux-arm64 linux-amd64
@@ -0,0 +1,72 @@
<script lang="ts">
import { inFlightRequests, metrics } from "../stores/api";
import { persistentStore } from "../stores/persistent";
import { calculateHistogramData } from "../lib/histogram";
import TokenHistogram from "./TokenHistogram.svelte";
const nf = new Intl.NumberFormat();
const histogramCollapsed = persistentStore<boolean>("activity-histogram-collapsed", false);
let stats = $derived.by(() => {
const totalRequests = $metrics.length;
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
const totalOutputTokens = $metrics.reduce((sum, m) => sum + m.output_tokens, 0);
const tokensPerSecond = $metrics
.filter((m) => m.tokens_per_second > 0)
.map((m) => m.tokens_per_second);
const histogramData = tokensPerSecond.length > 0
? calculateHistogramData(tokensPerSecond, { minBins: 20, maxBins: 80, binScaling: 3 })
: null;
return {
totalRequests,
totalInputTokens,
totalOutputTokens,
inFlightRequests: $inFlightRequests,
histogramData,
};
});
</script>
<div class="card">
<button
class="flex items-center gap-1 px-4 pt-3 text-xs font-medium text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-200 transition-colors"
onclick={() => $histogramCollapsed = !$histogramCollapsed}
>
<svg
class="w-3 h-3 transition-transform"
style="transform: rotate({$histogramCollapsed ? -90 : 0}deg)"
viewBox="0 0 16 16"
fill="currentColor"
>
<path d="M4.5 6l3.5 4 3.5-4H4.5z" />
</svg>
Tokens/sec Distribution
</button>
{#if !$histogramCollapsed}
{#if stats.histogramData}
<TokenHistogram data={stats.histogramData} />
{:else}
<div class="px-4 py-6 text-center text-sm text-gray-500 dark:text-gray-400">
No token speed data yet
</div>
{/if}
{/if}
<div class="grid grid-cols-3 gap-x-6 gap-y-1 px-4 pb-3 text-sm">
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Requests</div>
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Processed</div>
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Generated</div>
<div class="text-sm text-gray-700 dark:text-gray-300">
<span class="font-semibold">{nf.format(stats.totalRequests)}</span> completed,
<span class="font-semibold">{nf.format(stats.inFlightRequests)}</span> waiting
</div>
<div class="text-sm text-gray-700 dark:text-gray-300">
<span class="font-semibold">{nf.format(stats.totalInputTokens)}</span> tokens
</div>
<div class="text-sm text-gray-700 dark:text-gray-300">
<span class="font-semibold">{nf.format(stats.totalOutputTokens)}</span> tokens
</div>
</div>
</div>
-167
View File
@@ -1,167 +0,0 @@
<script lang="ts">
import { inFlightRequests, metrics } from "../stores/api";
import TokenHistogram from "./TokenHistogram.svelte";
interface HistogramData {
bins: number[];
min: number;
max: number;
binSize: number;
p99: number;
p95: number;
p50: number;
}
let stats = $derived.by(() => {
const totalRequests = $metrics.length;
if (totalRequests === 0) {
return {
totalRequests: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
inFlightRequests: $inFlightRequests,
tokenStats: { p99: "0", p95: "0", p50: "0" },
histogramData: null,
};
}
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
const totalOutputTokens = $metrics.reduce((sum, m) => sum + m.output_tokens, 0);
// Calculate token statistics using output_tokens and duration_ms
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
if (validMetrics.length === 0) {
return {
totalRequests,
totalInputTokens,
totalOutputTokens,
inFlightRequests: $inFlightRequests,
tokenStats: { p99: "0", p95: "0", p50: "0" },
histogramData: null,
};
}
// Calculate tokens/second for each valid metric
const tokensPerSecond = validMetrics.map((m) => m.output_tokens / (m.duration_ms / 1000));
// Sort for percentile calculation
const sortedTokensPerSecond = [...tokensPerSecond].sort((a, b) => a - b);
const p99 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.99)];
const p95 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.95)];
const p50 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.5)];
// Create histogram data
const min = Math.min(...tokensPerSecond);
const max = Math.max(...tokensPerSecond);
const binCount = Math.min(30, Math.max(10, Math.floor(tokensPerSecond.length / 5)));
const binSize = (max - min) / binCount;
const bins = Array(binCount).fill(0);
tokensPerSecond.forEach((value) => {
const binIndex = Math.min(Math.floor((value - min) / binSize), binCount - 1);
bins[binIndex]++;
});
const histogramData: HistogramData = {
bins,
min,
max,
binSize,
p99,
p95,
p50,
};
return {
totalRequests,
totalInputTokens,
totalOutputTokens,
inFlightRequests: $inFlightRequests,
tokenStats: {
p99: p99.toFixed(2),
p95: p95.toFixed(2),
p50: p50.toFixed(2),
},
histogramData,
};
});
const nf = new Intl.NumberFormat();
</script>
<div class="card">
<div class="rounded-lg overflow-hidden border border-card-border-inner">
<table class="min-w-full divide-y divide-card-border-inner">
<thead class="bg-secondary">
<tr>
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain">Requests</th>
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
Processed
</th>
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
Generated
</th>
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
Token Stats (tokens/sec)
</th>
</tr>
</thead>
<tbody class="bg-surface divide-y divide-card-border-inner">
<tr class="hover:bg-secondary">
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">
<div class="flex flex-col gap-1">
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Completed: {nf.format(stats.totalRequests)}</span>
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Waiting: {nf.format(stats.inFlightRequests)}</span>
</div>
</td>
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
<div class="flex items-center gap-2">
<span class="text-sm font-medium">{nf.format(stats.totalInputTokens)}</span>
<span class="text-xs text-gray-500 dark:text-gray-400">tokens</span>
</div>
</td>
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
<div class="flex items-center gap-2">
<span class="text-sm font-medium">{nf.format(stats.totalOutputTokens)}</span>
<span class="text-xs text-gray-500 dark:text-gray-400">tokens</span>
</div>
</td>
<td class="px-4 py-4 border-l border-gray-200 dark:border-white/10">
<div class="space-y-3">
<div class="grid grid-cols-3 gap-2 items-center">
<div class="text-center">
<div class="text-xs text-gray-500 dark:text-gray-400">P50</div>
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
{stats.tokenStats.p50}
</div>
</div>
<div class="text-center">
<div class="text-xs text-gray-500 dark:text-gray-400">P95</div>
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
{stats.tokenStats.p95}
</div>
</div>
<div class="text-center">
<div class="text-xs text-gray-500 dark:text-gray-400">P99</div>
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
{stats.tokenStats.p99}
</div>
</div>
</div>
{#if stats.histogramData}
<TokenHistogram data={stats.histogramData} />
{/if}
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
+5 -21
View File
@@ -1,23 +1,11 @@
<script lang="ts">
interface HistogramData {
bins: number[];
min: number;
max: number;
binSize: number;
p99: number;
p95: number;
p50: number;
}
import type { HistogramData } from "../lib/types";
interface Props {
data: HistogramData;
}
let { data }: { data: HistogramData } = $props();
let { data }: Props = $props();
const height = 120;
const padding = { top: 10, right: 15, bottom: 25, left: 45 };
const viewBoxWidth = 600;
const height = 55;
const padding = { top: 5, right: 45, bottom: 15, left: 45 };
const viewBoxWidth = 1200;
const chartWidth = viewBoxWidth - padding.left - padding.right;
const chartHeight = height - padding.top - padding.bottom;
@@ -121,9 +109,5 @@
{data.max.toFixed(1)}
</text>
<!-- X-axis label -->
<text x={padding.left + chartWidth / 2} y={height - 2} font-size="10" fill="currentColor" opacity="0.6" text-anchor="middle">
Tokens/Second Distribution
</text>
</svg>
</div>
+163
View File
@@ -0,0 +1,163 @@
import { describe, it, expect } from "vitest";
import { calculateHistogramData } from "./histogram";
describe("calculateHistogramData", () => {
describe("edge cases", () => {
it("returns null for empty input", () => {
expect(calculateHistogramData([])).toBeNull();
});
it("handles single value", () => {
const result = calculateHistogramData([42]);
expect(result).not.toBeNull();
expect(result!.bins).toEqual([1]);
expect(result!.min).toBe(42);
expect(result!.max).toBe(42);
expect(result!.binSize).toBe(0);
expect(result!.p50).toBe(42);
expect(result!.p95).toBe(42);
expect(result!.p99).toBe(42);
});
it("handles all identical values", () => {
const result = calculateHistogramData([10, 10, 10, 10, 10]);
expect(result).not.toBeNull();
expect(result!.bins).toEqual([5]);
expect(result!.min).toBe(10);
expect(result!.max).toBe(10);
expect(result!.binSize).toBe(0);
});
it("handles two distinct values", () => {
const result = calculateHistogramData([10, 20]);
expect(result).not.toBeNull();
expect(result!.min).toBe(10);
expect(result!.max).toBe(20);
expect(result!.p50).toBe(15);
const binSum = result!.bins.reduce((s, b) => s + b, 0);
expect(binSum).toBe(2);
});
});
describe("bin distribution", () => {
it("bins sum to total number of values", () => {
const values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
const binSum = result!.bins.reduce((s, b) => s + b, 0);
expect(binSum).toBe(values.length);
});
it("distributes uniform values across bins", () => {
const values = Array.from({ length: 100 }, (_, i) => i);
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
expect(result!.bins.length).toBe(20);
const binSum = result!.bins.reduce((s, b) => s + b, 0);
expect(binSum).toBe(100);
});
it("places values in correct bins", () => {
const values = [1, 1, 1, 5, 5, 9, 9, 9];
const result = calculateHistogramData(values, { minBins: 3, maxBins: 3, binScaling: 1 });
expect(result).not.toBeNull();
expect(result!.bins.length).toBe(3);
expect(result!.bins.reduce((s, b) => s + b, 0)).toBe(8);
});
it("handles skewed distribution", () => {
const values = [1, 1, 1, 1, 1, 100];
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
const binSum = result!.bins.reduce((s, b) => s + b, 0);
expect(binSum).toBe(6);
});
});
describe("percentiles", () => {
it("calculates correct p50 for even-length array", () => {
const values = [1, 2, 3, 4];
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
expect(result!.p50).toBe(2.5);
});
it("calculates correct p50 for odd-length array", () => {
const values = [1, 2, 3, 4, 5];
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
expect(result!.p50).toBe(3);
});
it("calculates p99 with interpolation", () => {
const values = Array.from({ length: 100 }, (_, i) => i + 1);
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
expect(result!.p99).toBeCloseTo(99.01);
});
it("calculates p95 with interpolation", () => {
const values = Array.from({ length: 100 }, (_, i) => i + 1);
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
expect(result!.p95).toBeCloseTo(95.05);
});
it("percentiles are monotonically increasing", () => {
const values = Array.from({ length: 200 }, () => Math.random() * 100);
const result = calculateHistogramData(values);
expect(result).not.toBeNull();
expect(result!.p50).toBeLessThanOrEqual(result!.p95);
expect(result!.p95).toBeLessThanOrEqual(result!.p99);
});
});
describe("bin count adaptation", () => {
it("uses minimum bins for small datasets", () => {
const values = Array.from({ length: 20 }, (_, i) => i);
const result = calculateHistogramData(values);
expect(result!.bins.length).toBe(10);
});
it("scales bins with dataset size", () => {
const values = Array.from({ length: 100 }, (_, i) => i);
const result = calculateHistogramData(values);
expect(result!.bins.length).toBe(20);
});
it("caps bins at maximum", () => {
const values = Array.from({ length: 200 }, (_, i) => i);
const result = calculateHistogramData(values);
expect(result!.bins.length).toBe(30);
});
it("respects custom options", () => {
const values = Array.from({ length: 100 }, (_, i) => i);
const result = calculateHistogramData(values, { minBins: 5, maxBins: 10, binScaling: 2 });
expect(result!.bins.length).toBe(10);
});
});
describe("min and max", () => {
it("correctly identifies min and max", () => {
const values = [5, 3, 8, 1, 9, 2];
const result = calculateHistogramData(values);
expect(result!.min).toBe(1);
expect(result!.max).toBe(9);
});
it("handles negative values", () => {
const values = [-10, -5, 0, 5, 10];
const result = calculateHistogramData(values);
expect(result!.min).toBe(-10);
expect(result!.max).toBe(10);
});
it("handles floating point values", () => {
const values = [1.5, 2.7, 3.14, 0.5, 4.99];
const result = calculateHistogramData(values);
expect(result!.min).toBe(0.5);
expect(result!.max).toBe(4.99);
});
});
});
+72
View File
@@ -0,0 +1,72 @@
import type { HistogramData } from "./types";
export interface HistogramOptions {
minBins?: number;
maxBins?: number;
binScaling?: number;
}
const DEFAULT_OPTIONS: HistogramOptions = {
minBins: 10,
maxBins: 30,
binScaling: 5,
};
function percentile(sorted: number[], p: number): number {
if (sorted.length === 0) return 0;
if (sorted.length === 1) return sorted[0];
const rank = (p / 100) * (sorted.length - 1);
const lower = Math.floor(rank);
const upper = Math.ceil(rank);
const fraction = rank - lower;
return sorted[lower] + fraction * (sorted[upper] - sorted[lower]);
}
export function calculateHistogramData(
values: number[],
options: HistogramOptions = DEFAULT_OPTIONS,
): HistogramData | null {
if (values.length === 0) return null;
const sorted = [...values].sort((a, b) => a - b);
const min = sorted[0];
const max = sorted[sorted.length - 1];
const p50 = percentile(sorted, 50);
const p95 = percentile(sorted, 95);
const p99 = percentile(sorted, 99);
if (min === max) {
return {
bins: [values.length],
min,
max,
binSize: 0,
p50,
p95,
p99,
};
}
const { minBins = 10, maxBins = 30, binScaling = 5 } = options;
const binCount = Math.min(maxBins, Math.max(minBins, Math.floor(values.length / binScaling)));
const binSize = (max - min) / binCount;
const bins = new Array(binCount).fill(0);
for (const value of values) {
const binIndex = Math.min(Math.floor((value - min) / binSize), binCount - 1);
bins[binIndex]++;
}
return {
bins,
min,
max,
binSize,
p50,
p95,
p99,
};
}
+10
View File
@@ -48,6 +48,16 @@ export interface APIEventEnvelope {
data: string;
}
export interface HistogramData {
bins: number[];
min: number;
max: number;
binSize: number;
p99: number;
p95: number;
p50: number;
}
export interface VersionInfo {
build_date: string;
commit: string;
+35 -29
View File
@@ -1,5 +1,6 @@
<script lang="ts">
import { metrics, getCapture } from "../stores/api";
import ActivityStats from "../components/ActivityStats.svelte";
import Tooltip from "../components/Tooltip.svelte";
import CaptureDialog from "../components/CaptureDialog.svelte";
import type { ReqRespCapture } from "../lib/types";
@@ -63,33 +64,38 @@
<div class="p-2">
<h1 class="text-2xl font-bold">Activity</h1>
<div class="mt-4 mb-4">
<ActivityStats />
</div>
{#if $metrics.length === 0}
<div class="text-center py-8">
<p class="text-gray-600">No metrics data available</p>
</div>
{:else}
<div class="card overflow-auto">
<table class="min-w-full divide-y">
<thead class="border-gray-200 dark:border-white/10">
<tr class="text-left text-xs uppercase tracking-wider">
<th class="px-6 py-3">ID</th>
<th class="px-6 py-3">Time</th>
<th class="px-6 py-3">Model</th>
<th class="px-6 py-3">
Cached <Tooltip content="prompt tokens from cache" />
</th>
<th class="px-6 py-3">
Prompt <Tooltip content="new prompt tokens processed" />
</th>
<th class="px-6 py-3">Generated</th>
<th class="px-6 py-3">Prompt Processing</th>
<th class="px-6 py-3">Generation Speed</th>
<th class="px-6 py-3">Duration</th>
<th class="px-6 py-3">Capture</th>
<div class="card overflow-auto">
<table class="min-w-full divide-y">
<thead class="border-gray-200 dark:border-white/10">
<tr class="text-left text-xs uppercase tracking-wider">
<th class="px-6 py-3">ID</th>
<th class="px-6 py-3">Time</th>
<th class="px-6 py-3">Model</th>
<th class="px-6 py-3">
Cached <Tooltip content="prompt tokens from cache" />
</th>
<th class="px-6 py-3">
Prompt <Tooltip content="new prompt tokens processed" />
</th>
<th class="px-6 py-3">Generated</th>
<th class="px-6 py-3">Prompt Processing</th>
<th class="px-6 py-3">Generation Speed</th>
<th class="px-6 py-3">Duration</th>
<th class="px-6 py-3">Capture</th>
</tr>
</thead>
<tbody class="divide-y">
{#if sortedMetrics.length === 0}
<tr>
<td colspan="10" class="px-6 py-8 text-center text-sm text-gray-500 dark:text-gray-400">
No activity recorded
</td>
</tr>
</thead>
<tbody class="divide-y">
{:else}
{#each sortedMetrics as metric (metric.id)}
<tr class="whitespace-nowrap text-sm border-gray-200 dark:border-white/10">
<td class="px-4 py-4">{metric.id + 1}</td>
@@ -116,10 +122,10 @@
</td>
</tr>
{/each}
</tbody>
</table>
</div>
{/if}
{/if}
</tbody>
</table>
</div>
</div>
<CaptureDialog capture={selectedCapture} open={dialogOpen} onclose={closeDialog} />
+1 -9
View File
@@ -2,7 +2,6 @@
import { isNarrow } from "../stores/theme";
import { upstreamLogs } from "../stores/api";
import ModelsPanel from "../components/ModelsPanel.svelte";
import StatsPanel from "../components/StatsPanel.svelte";
import LogPanel from "../components/LogPanel.svelte";
import ResizablePanels from "../components/ResizablePanels.svelte";
@@ -14,13 +13,6 @@
<ModelsPanel />
{/snippet}
{#snippet rightPanel()}
<div class="flex flex-col h-full space-y-4">
{#if direction === "horizontal"}
<StatsPanel />
{/if}
<div class="flex-1 min-h-0">
<LogPanel id="modelsupstream" title="Upstream Logs" logData={$upstreamLogs} />
</div>
</div>
<LogPanel id="modelsupstream" title="Upstream Logs" logData={$upstreamLogs} />
{/snippet}
</ResizablePanels>