proxy,ui: add performance monitoring with Prometheus metrics (#743)

Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint. Backend changes: - New internal/perf package with configurable interval-based stats collection - GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux - Ring buffer (internal/ring) for time-series stat storage - Prometheus /metrics endpoint with all system and GPU metrics - Moved LogMonitor to internal/logmon package - New PerformanceConfig for hot-reloadable monitoring settings - REST /api/performance endpoint replacing SSE streaming UI changes: - New Performance page with real-time charts for CPU, memory, GPU, and network - Reusable PerformanceChart component - LLAMA_SWAP_URL environment variable support - Improved capture dialog display Other: - Example Grafana dashboard for Prometheus metrics - monitor-test standalone binary - Config schema and example updates fixes #596
2026-05-09 13:29:22 -07:00
parent e261745c66
commit 7e3e94a08a
49 changed files with 4322 additions and 273 deletions
@@ -8,9 +8,11 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"time"

 	"github.com/gin-gonic/gin"
 	"github.com/mostlygeek/llama-swap/event"
+	"github.com/mostlygeek/llama-swap/internal/perf"
 )

 type Model struct {
@@ -32,6 +34,7 @@ func addApiHandlers(pm *ProxyManager) {
 		apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
 		apiGroup.GET("/events", pm.apiSendEvents)
 		apiGroup.GET("/metrics", pm.apiGetMetrics)
+		apiGroup.GET("/performance", pm.apiGetPerformance)
 		apiGroup.GET("/version", pm.apiGetVersion)
 		apiGroup.GET("/captures/:id", pm.apiGetCapture)
 	}
@@ -247,6 +250,56 @@ func (pm *ProxyManager) apiGetMetrics(c *gin.Context) {
 	c.Data(http.StatusOK, "application/json", jsonData)
 }

+func (pm *ProxyManager) prometheusMetricsHandler(c *gin.Context) {
+	if pm.perfMonitor == nil {
+		c.String(http.StatusServiceUnavailable, "# performance monitor not available\n")
+		return
+	}
+	pm.perfMonitor.MetricsHandler().ServeHTTP(c.Writer, c.Request)
+}
+
+func (pm *ProxyManager) apiGetPerformance(c *gin.Context) {
+	if pm.perfMonitor == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "performance monitor not available"})
+		return
+	}
+
+	sysStats, gpuStats := pm.perfMonitor.Current()
+
+	var after time.Time
+	if afterStr := c.Query("after"); afterStr != "" {
+		ts, err := time.Parse(time.RFC3339, afterStr)
+		if err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid 'after' timestamp, use RFC3339 format"})
+			return
+		}
+		after = ts
+	}
+
+	if !after.IsZero() {
+		filtered := make([]perf.SysStat, 0, len(sysStats))
+		for _, s := range sysStats {
+			if s.Timestamp.After(after) {
+				filtered = append(filtered, s)
+			}
+		}
+		sysStats = filtered
+
+		filteredGpu := make([]perf.GpuStat, 0, len(gpuStats))
+		for _, g := range gpuStats {
+			if g.Timestamp.After(after) {
+				filteredGpu = append(filteredGpu, g)
+			}
+		}
+		gpuStats = filteredGpu
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"sys_stats": sysStats,
+		"gpu_stats": gpuStats,
+	})
+}
+
 func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) {
 	requestedModel := strings.TrimPrefix(c.Param("model"), "/")
 	realModelName, found := pm.config.RealModelName(requestedModel)
@@ -291,7 +344,7 @@ func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
 	}

 	capture := pm.metricsMonitor.getCaptureByID(id)
-	if capture == nil {
+	if capture == nil || (capture.ReqPath == "" && capture.ReqHeaders == nil && capture.ReqBody == nil && capture.RespHeaders == nil && capture.RespBody == nil) {
 		c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
 		return
 	}