proxy,ui: add performance monitoring with Prometheus metrics (#743)
Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint. Backend changes: - New internal/perf package with configurable interval-based stats collection - GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux - Ring buffer (internal/ring) for time-series stat storage - Prometheus /metrics endpoint with all system and GPU metrics - Moved LogMonitor to internal/logmon package - New PerformanceConfig for hot-reloadable monitoring settings - REST /api/performance endpoint replacing SSE streaming UI changes: - New Performance page with real-time charts for CPU, memory, GPU, and network - Reusable PerformanceChart component - LLAMA_SWAP_URL environment variable support - Improved capture dialog display Other: - Example Grafana dashboard for Prometheus metrics - monitor-test standalone binary - Config schema and example updates fixes #596
This commit is contained in:
@@ -8,9 +8,11 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
)
|
||||
|
||||
type Model struct {
|
||||
@@ -32,6 +34,7 @@ func addApiHandlers(pm *ProxyManager) {
|
||||
apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
|
||||
apiGroup.GET("/events", pm.apiSendEvents)
|
||||
apiGroup.GET("/metrics", pm.apiGetMetrics)
|
||||
apiGroup.GET("/performance", pm.apiGetPerformance)
|
||||
apiGroup.GET("/version", pm.apiGetVersion)
|
||||
apiGroup.GET("/captures/:id", pm.apiGetCapture)
|
||||
}
|
||||
@@ -247,6 +250,56 @@ func (pm *ProxyManager) apiGetMetrics(c *gin.Context) {
|
||||
c.Data(http.StatusOK, "application/json", jsonData)
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) prometheusMetricsHandler(c *gin.Context) {
|
||||
if pm.perfMonitor == nil {
|
||||
c.String(http.StatusServiceUnavailable, "# performance monitor not available\n")
|
||||
return
|
||||
}
|
||||
pm.perfMonitor.MetricsHandler().ServeHTTP(c.Writer, c.Request)
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) apiGetPerformance(c *gin.Context) {
|
||||
if pm.perfMonitor == nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "performance monitor not available"})
|
||||
return
|
||||
}
|
||||
|
||||
sysStats, gpuStats := pm.perfMonitor.Current()
|
||||
|
||||
var after time.Time
|
||||
if afterStr := c.Query("after"); afterStr != "" {
|
||||
ts, err := time.Parse(time.RFC3339, afterStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid 'after' timestamp, use RFC3339 format"})
|
||||
return
|
||||
}
|
||||
after = ts
|
||||
}
|
||||
|
||||
if !after.IsZero() {
|
||||
filtered := make([]perf.SysStat, 0, len(sysStats))
|
||||
for _, s := range sysStats {
|
||||
if s.Timestamp.After(after) {
|
||||
filtered = append(filtered, s)
|
||||
}
|
||||
}
|
||||
sysStats = filtered
|
||||
|
||||
filteredGpu := make([]perf.GpuStat, 0, len(gpuStats))
|
||||
for _, g := range gpuStats {
|
||||
if g.Timestamp.After(after) {
|
||||
filteredGpu = append(filteredGpu, g)
|
||||
}
|
||||
}
|
||||
gpuStats = filteredGpu
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"sys_stats": sysStats,
|
||||
"gpu_stats": gpuStats,
|
||||
})
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) {
|
||||
requestedModel := strings.TrimPrefix(c.Param("model"), "/")
|
||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
||||
@@ -291,7 +344,7 @@ func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
|
||||
}
|
||||
|
||||
capture := pm.metricsMonitor.getCaptureByID(id)
|
||||
if capture == nil {
|
||||
if capture == nil || (capture.ReqPath == "" && capture.ReqHeaders == nil && capture.ReqBody == nil && capture.RespHeaders == nil && capture.RespBody == nil) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user