proxy,ui: add performance monitoring with Prometheus metrics (#743)

Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint.

Backend changes:
- New internal/perf package with configurable interval-based stats collection
- GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux
- Ring buffer (internal/ring) for time-series stat storage
- Prometheus /metrics endpoint with all system and GPU metrics
- Moved LogMonitor to internal/logmon package
- New PerformanceConfig for hot-reloadable monitoring settings
- REST /api/performance endpoint replacing SSE streaming

UI changes:
- New Performance page with real-time charts for CPU, memory, GPU, and network
- Reusable PerformanceChart component
- LLAMA_SWAP_URL environment variable support
- Improved capture dialog display

Other:
- Example Grafana dashboard for Prometheus metrics
- monitor-test standalone binary
- Config schema and example updates

fixes #596
This commit is contained in:
Benson Wong
2026-05-09 13:29:22 -07:00
committed by GitHub
parent e261745c66
commit 7e3e94a08a
49 changed files with 4322 additions and 273 deletions
+41 -1
View File
@@ -50,8 +50,48 @@ export interface InFlightStats {
total: number;
}
export interface NetIOStat {
name: string;
bytes_recv: number;
bytes_sent: number;
}
export interface SysStat {
timestamp: string;
cpu_util_per_core: number[];
mem_total_mb: number;
mem_used_mb: number;
mem_free_mb: number;
swap_total_mb: number;
swap_used_mb: number;
load_avg_1: number;
load_avg_5: number;
load_avg_15: number;
net_io: NetIOStat[];
}
export interface GpuStat {
timestamp: string;
id: number;
name: string;
uuid: string;
temp_c: number;
vram_temp_c: number;
gpu_util_pct: number;
mem_util_pct: number;
mem_used_mb: number;
mem_total_mb: number;
fan_speed_pct: number;
power_draw_w: number;
}
export interface PerformanceResponse {
sys_stats: SysStat[];
gpu_stats: GpuStat[];
}
export interface APIEventEnvelope {
type: "modelStatus" | "logData" | "metrics" | "inflight";
type: "modelStatus" | "logData" | "metrics" | "inflight" | "perfsys" | "perfgpu";
data: string;
}