proxy,ui: add performance monitoring with Prometheus metrics (#743)

Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint.

Backend changes:
- New internal/perf package with configurable interval-based stats collection
- GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux
- Ring buffer (internal/ring) for time-series stat storage
- Prometheus /metrics endpoint with all system and GPU metrics
- Moved LogMonitor to internal/logmon package
- New PerformanceConfig for hot-reloadable monitoring settings
- REST /api/performance endpoint replacing SSE streaming

UI changes:
- New Performance page with real-time charts for CPU, memory, GPU, and network
- Reusable PerformanceChart component
- LLAMA_SWAP_URL environment variable support
- Improved capture dialog display

Other:
- Example Grafana dashboard for Prometheus metrics
- monitor-test standalone binary
- Config schema and example updates

fixes #596
This commit is contained in:
Benson Wong
2026-05-09 13:29:22 -07:00
committed by GitHub
parent e261745c66
commit 7e3e94a08a
49 changed files with 4322 additions and 273 deletions
+15
View File
@@ -7,6 +7,7 @@ import type {
APIEventEnvelope,
ReqRespCapture,
InFlightStats,
PerformanceResponse,
} from "../lib/types";
import { connectionState } from "./theme";
@@ -204,3 +205,17 @@ export async function getCapture(id: number): Promise<ReqRespCapture | null> {
return null;
}
}
export async function fetchPerformance(after?: string): Promise<PerformanceResponse | null> {
try {
const url = after ? `/api/performance?after=${encodeURIComponent(after)}` : "/api/performance";
const response = await fetch(url);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
return await response.json();
} catch (error) {
console.error("Failed to fetch performance data:", error);
return null;
}
}