proxy,ui: add performance monitoring with Prometheus metrics (#743)

Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint.

Backend changes:
- New internal/perf package with configurable interval-based stats collection
- GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux
- Ring buffer (internal/ring) for time-series stat storage
- Prometheus /metrics endpoint with all system and GPU metrics
- Moved LogMonitor to internal/logmon package
- New PerformanceConfig for hot-reloadable monitoring settings
- REST /api/performance endpoint replacing SSE streaming

UI changes:
- New Performance page with real-time charts for CPU, memory, GPU, and network
- Reusable PerformanceChart component
- LLAMA_SWAP_URL environment variable support
- Improved capture dialog display

Other:
- Example Grafana dashboard for Prometheus metrics
- monitor-test standalone binary
- Config schema and example updates

fixes #596
This commit is contained in:
Benson Wong
2026-05-09 13:29:22 -07:00
committed by GitHub
parent e261745c66
commit 7e3e94a08a
49 changed files with 4322 additions and 273 deletions
+21 -8
View File
@@ -15,6 +15,8 @@ import (
"github.com/gin-gonic/gin"
"github.com/mostlygeek/llama-swap/event"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/perf"
"github.com/mostlygeek/llama-swap/proxy"
"github.com/mostlygeek/llama-swap/proxy/config"
"github.com/mostlygeek/llama-swap/proxy/configwatcher"
@@ -34,7 +36,7 @@ func main() {
keyFile := flag.String("tls-key-file", "", "TLS key file")
showVersion := flag.Bool("version", false, "show version of build")
watchConfig := flag.Bool("watch-config", false, "Automatically reload config file on change")
mainLogger := proxy.NewLogMonitor()
mainLogger := logmon.New()
flag.Parse() // Parse the command-line flags
@@ -45,7 +47,7 @@ func main() {
conf, err := config.LoadConfig(*configPath)
if err != nil {
mainLogger.Errorf("Error loading config: %", err)
mainLogger.Errorf("Error loading config: %v", err)
os.Exit(1)
}
@@ -55,15 +57,15 @@ func main() {
switch strings.ToLower(strings.TrimSpace(conf.LogLevel)) {
case "debug":
mainLogger.SetLogLevel(proxy.LevelDebug)
mainLogger.SetLogLevel(logmon.LevelDebug)
case "info":
mainLogger.SetLogLevel(proxy.LevelInfo)
mainLogger.SetLogLevel(logmon.LevelInfo)
case "warn":
mainLogger.SetLogLevel(proxy.LevelWarn)
mainLogger.SetLogLevel(logmon.LevelWarn)
case "error":
mainLogger.SetLogLevel(proxy.LevelError)
mainLogger.SetLogLevel(logmon.LevelError)
default:
mainLogger.SetLogLevel(proxy.LevelInfo)
mainLogger.SetLogLevel(logmon.LevelInfo)
}
mainLogger.Debugf("PID: %d", os.Getpid())
@@ -91,6 +93,13 @@ func main() {
listenStr = &defaultPort
}
mon, err := perf.New(conf.Performance, mainLogger)
if err != nil {
mainLogger.Errorf("failed to create monitor: %s", err.Error())
os.Exit(1)
}
mon.Start()
// Setup channels for server management
exitChan := make(chan struct{})
sigChan := make(chan os.Signal, 1)
@@ -121,8 +130,8 @@ func main() {
reloadMutex.Unlock()
}()
mainLogger.Info("Reloading Configuration")
if currentPM, ok := srv.Handler.(*proxy.ProxyManager); ok {
mainLogger.Info("Reloading Configuration")
conf, err = config.LoadConfig(*configPath)
if err != nil {
mainLogger.Warnf("Unable to reload configuration: %v", err)
@@ -131,8 +140,10 @@ func main() {
mainLogger.Debug("Configuration Changed")
currentPM.Shutdown()
mon.UpdateConfig(conf.Performance)
newPM := proxy.New(conf)
newPM.SetVersion(date, commit, version)
newPM.SetPerfMonitor(mon)
srv.Handler = newPM
mainLogger.Debug("Configuration Reloaded")
@@ -150,6 +161,7 @@ func main() {
}
newPM := proxy.New(conf)
newPM.SetVersion(date, commit, version)
newPM.SetPerfMonitor(mon)
srv.Handler = newPM
}
}
@@ -185,6 +197,7 @@ func main() {
reloadProxyManager()
case syscall.SIGINT, syscall.SIGTERM:
mainLogger.Debugf("Received signal %v, shutting down...", sig)
mon.Stop()
watcherCancel()
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()