proxy,ui: add performance monitoring with Prometheus metrics (#743)

Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint.

Backend changes:
- New internal/perf package with configurable interval-based stats collection
- GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux
- Ring buffer (internal/ring) for time-series stat storage
- Prometheus /metrics endpoint with all system and GPU metrics
- Moved LogMonitor to internal/logmon package
- New PerformanceConfig for hot-reloadable monitoring settings
- REST /api/performance endpoint replacing SSE streaming

UI changes:
- New Performance page with real-time charts for CPU, memory, GPU, and network
- Reusable PerformanceChart component
- LLAMA_SWAP_URL environment variable support
- Improved capture dialog display

Other:
- Example Grafana dashboard for Prometheus metrics
- monitor-test standalone binary
- Config schema and example updates

fixes #596
This commit is contained in:
Benson Wong
2026-05-09 13:29:22 -07:00
committed by GitHub
parent e261745c66
commit 7e3e94a08a
49 changed files with 4322 additions and 273 deletions
+54 -1
View File
@@ -8,9 +8,11 @@ import (
"sort"
"strconv"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/mostlygeek/llama-swap/event"
"github.com/mostlygeek/llama-swap/internal/perf"
)
type Model struct {
@@ -32,6 +34,7 @@ func addApiHandlers(pm *ProxyManager) {
apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
apiGroup.GET("/events", pm.apiSendEvents)
apiGroup.GET("/metrics", pm.apiGetMetrics)
apiGroup.GET("/performance", pm.apiGetPerformance)
apiGroup.GET("/version", pm.apiGetVersion)
apiGroup.GET("/captures/:id", pm.apiGetCapture)
}
@@ -247,6 +250,56 @@ func (pm *ProxyManager) apiGetMetrics(c *gin.Context) {
c.Data(http.StatusOK, "application/json", jsonData)
}
func (pm *ProxyManager) prometheusMetricsHandler(c *gin.Context) {
if pm.perfMonitor == nil {
c.String(http.StatusServiceUnavailable, "# performance monitor not available\n")
return
}
pm.perfMonitor.MetricsHandler().ServeHTTP(c.Writer, c.Request)
}
func (pm *ProxyManager) apiGetPerformance(c *gin.Context) {
if pm.perfMonitor == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "performance monitor not available"})
return
}
sysStats, gpuStats := pm.perfMonitor.Current()
var after time.Time
if afterStr := c.Query("after"); afterStr != "" {
ts, err := time.Parse(time.RFC3339, afterStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid 'after' timestamp, use RFC3339 format"})
return
}
after = ts
}
if !after.IsZero() {
filtered := make([]perf.SysStat, 0, len(sysStats))
for _, s := range sysStats {
if s.Timestamp.After(after) {
filtered = append(filtered, s)
}
}
sysStats = filtered
filteredGpu := make([]perf.GpuStat, 0, len(gpuStats))
for _, g := range gpuStats {
if g.Timestamp.After(after) {
filteredGpu = append(filteredGpu, g)
}
}
gpuStats = filteredGpu
}
c.JSON(http.StatusOK, gin.H{
"sys_stats": sysStats,
"gpu_stats": gpuStats,
})
}
func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) {
requestedModel := strings.TrimPrefix(c.Param("model"), "/")
realModelName, found := pm.config.RealModelName(requestedModel)
@@ -291,7 +344,7 @@ func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
}
capture := pm.metricsMonitor.getCaptureByID(id)
if capture == nil {
if capture == nil || (capture.ReqPath == "" && capture.ReqHeaders == nil && capture.ReqBody == nil && capture.RespHeaders == nil && capture.RespBody == nil) {
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
return
}