proxy,ui: add performance monitoring with Prometheus metrics (#743)
Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint. Backend changes: - New internal/perf package with configurable interval-based stats collection - GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux - Ring buffer (internal/ring) for time-series stat storage - Prometheus /metrics endpoint with all system and GPU metrics - Moved LogMonitor to internal/logmon package - New PerformanceConfig for hot-reloadable monitoring settings - REST /api/performance endpoint replacing SSE streaming UI changes: - New Performance page with real-time charts for CPU, memory, GPU, and network - Reusable PerformanceChart component - LLAMA_SWAP_URL environment variable support - Improved capture dialog display Other: - Example Grafana dashboard for Prometheus metrics - monitor-test standalone binary - Config schema and example updates fixes #596
This commit is contained in:
+46
-34
@@ -17,6 +17,8 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -69,11 +71,12 @@ type ProxyManager struct {
|
||||
ginEngine *gin.Engine
|
||||
|
||||
// logging
|
||||
proxyLogger *LogMonitor
|
||||
upstreamLogger *LogMonitor
|
||||
muxLogger *LogMonitor
|
||||
proxyLogger *logmon.Monitor
|
||||
upstreamLogger *logmon.Monitor
|
||||
muxLogger *logmon.Monitor
|
||||
|
||||
metricsMonitor *metricsMonitor
|
||||
perfMonitor *perf.Monitor
|
||||
|
||||
processGroups map[string]*ProcessGroup
|
||||
|
||||
@@ -98,27 +101,27 @@ type ProxyManager struct {
|
||||
func New(proxyConfig config.Config) *ProxyManager {
|
||||
// set up loggers
|
||||
|
||||
var muxLogger, upstreamLogger, proxyLogger *LogMonitor
|
||||
var muxLogger, upstreamLogger, proxyLogger *logmon.Monitor
|
||||
switch proxyConfig.LogToStdout {
|
||||
case config.LogToStdoutNone:
|
||||
muxLogger = NewLogMonitorWriter(io.Discard)
|
||||
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||
muxLogger = logmon.NewWriter(io.Discard)
|
||||
upstreamLogger = logmon.NewWriter(io.Discard)
|
||||
proxyLogger = logmon.NewWriter(io.Discard)
|
||||
case config.LogToStdoutBoth:
|
||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||
muxLogger = logmon.NewWriter(os.Stdout)
|
||||
upstreamLogger = logmon.NewWriter(muxLogger)
|
||||
proxyLogger = logmon.NewWriter(muxLogger)
|
||||
case config.LogToStdoutUpstream:
|
||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||
muxLogger = logmon.NewWriter(os.Stdout)
|
||||
upstreamLogger = logmon.NewWriter(muxLogger)
|
||||
proxyLogger = logmon.NewWriter(io.Discard)
|
||||
default:
|
||||
// same as config.LogToStdoutProxy
|
||||
// helpful because some old tests create a config.Config directly and it
|
||||
// may not have LogToStdout set explicitly
|
||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||
muxLogger = logmon.NewWriter(os.Stdout)
|
||||
upstreamLogger = logmon.NewWriter(io.Discard)
|
||||
proxyLogger = logmon.NewWriter(muxLogger)
|
||||
}
|
||||
|
||||
if proxyConfig.LogRequests {
|
||||
@@ -127,20 +130,20 @@ func New(proxyConfig config.Config) *ProxyManager {
|
||||
|
||||
switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
|
||||
case "debug":
|
||||
proxyLogger.SetLogLevel(LevelDebug)
|
||||
upstreamLogger.SetLogLevel(LevelDebug)
|
||||
proxyLogger.SetLogLevel(logmon.LevelDebug)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelDebug)
|
||||
case "info":
|
||||
proxyLogger.SetLogLevel(LevelInfo)
|
||||
upstreamLogger.SetLogLevel(LevelInfo)
|
||||
proxyLogger.SetLogLevel(logmon.LevelInfo)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelInfo)
|
||||
case "warn":
|
||||
proxyLogger.SetLogLevel(LevelWarn)
|
||||
upstreamLogger.SetLogLevel(LevelWarn)
|
||||
proxyLogger.SetLogLevel(logmon.LevelWarn)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelWarn)
|
||||
case "error":
|
||||
proxyLogger.SetLogLevel(LevelError)
|
||||
upstreamLogger.SetLogLevel(LevelError)
|
||||
proxyLogger.SetLogLevel(logmon.LevelError)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelError)
|
||||
default:
|
||||
proxyLogger.SetLogLevel(LevelInfo)
|
||||
upstreamLogger.SetLogLevel(LevelInfo)
|
||||
proxyLogger.SetLogLevel(logmon.LevelInfo)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelInfo)
|
||||
}
|
||||
|
||||
// see: https://go.dev/src/time/format.go
|
||||
@@ -271,13 +274,17 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
|
||||
pm.ginEngine.Use(func(c *gin.Context) {
|
||||
|
||||
// don't log the Wake on Lan proxy health check
|
||||
if c.Request.URL.Path == "/wol-health" {
|
||||
c.Next()
|
||||
return
|
||||
for _, prefix := range []string{
|
||||
"/wol-health",
|
||||
"/api/performance",
|
||||
"/metrics",
|
||||
} {
|
||||
if strings.HasPrefix(c.Request.URL.Path, prefix) {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Start timer
|
||||
start := time.Now()
|
||||
|
||||
// capture these because /upstream/:model rewrites them in c.Next()
|
||||
@@ -285,12 +292,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
method := c.Request.Method
|
||||
path := c.Request.URL.Path
|
||||
|
||||
// Process request
|
||||
c.Next()
|
||||
|
||||
// Stop timer
|
||||
duration := time.Since(start)
|
||||
|
||||
statusCode := c.Writer.Status()
|
||||
bodySize := c.Writer.Size()
|
||||
|
||||
@@ -439,6 +443,8 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
c.String(http.StatusOK, "OK")
|
||||
})
|
||||
|
||||
pm.ginEngine.GET("/metrics", pm.prometheusMetricsHandler)
|
||||
|
||||
// see cmd/wol-proxy/wol-proxy.go, not logged
|
||||
pm.ginEngine.GET("/wol-health", func(c *gin.Context) {
|
||||
c.String(http.StatusOK, "OK")
|
||||
@@ -1218,3 +1224,9 @@ func (pm *ProxyManager) SetVersion(buildDate string, commit string, version stri
|
||||
pm.commit = commit
|
||||
pm.version = version
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) SetPerfMonitor(m *perf.Monitor) {
|
||||
pm.Lock()
|
||||
defer pm.Unlock()
|
||||
pm.perfMonitor = m
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user