proxy,ui: add performance monitoring with Prometheus metrics (#743)
Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint. Backend changes: - New internal/perf package with configurable interval-based stats collection - GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux - Ring buffer (internal/ring) for time-series stat storage - Prometheus /metrics endpoint with all system and GPU metrics - Moved LogMonitor to internal/logmon package - New PerformanceConfig for hot-reloadable monitoring settings - REST /api/performance endpoint replacing SSE streaming UI changes: - New Performance page with real-time charts for CPU, memory, GPU, and network - Reusable PerformanceChart component - LLAMA_SWAP_URL environment variable support - Improved capture dialog display Other: - Example Grafana dashboard for Prometheus metrics - monitor-test standalone binary - Config schema and example updates fixes #596
This commit is contained in:
+21
-8
@@ -15,6 +15,8 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
"github.com/mostlygeek/llama-swap/proxy"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
"github.com/mostlygeek/llama-swap/proxy/configwatcher"
|
||||
@@ -34,7 +36,7 @@ func main() {
|
||||
keyFile := flag.String("tls-key-file", "", "TLS key file")
|
||||
showVersion := flag.Bool("version", false, "show version of build")
|
||||
watchConfig := flag.Bool("watch-config", false, "Automatically reload config file on change")
|
||||
mainLogger := proxy.NewLogMonitor()
|
||||
mainLogger := logmon.New()
|
||||
|
||||
flag.Parse() // Parse the command-line flags
|
||||
|
||||
@@ -45,7 +47,7 @@ func main() {
|
||||
|
||||
conf, err := config.LoadConfig(*configPath)
|
||||
if err != nil {
|
||||
mainLogger.Errorf("Error loading config: %", err)
|
||||
mainLogger.Errorf("Error loading config: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@@ -55,15 +57,15 @@ func main() {
|
||||
|
||||
switch strings.ToLower(strings.TrimSpace(conf.LogLevel)) {
|
||||
case "debug":
|
||||
mainLogger.SetLogLevel(proxy.LevelDebug)
|
||||
mainLogger.SetLogLevel(logmon.LevelDebug)
|
||||
case "info":
|
||||
mainLogger.SetLogLevel(proxy.LevelInfo)
|
||||
mainLogger.SetLogLevel(logmon.LevelInfo)
|
||||
case "warn":
|
||||
mainLogger.SetLogLevel(proxy.LevelWarn)
|
||||
mainLogger.SetLogLevel(logmon.LevelWarn)
|
||||
case "error":
|
||||
mainLogger.SetLogLevel(proxy.LevelError)
|
||||
mainLogger.SetLogLevel(logmon.LevelError)
|
||||
default:
|
||||
mainLogger.SetLogLevel(proxy.LevelInfo)
|
||||
mainLogger.SetLogLevel(logmon.LevelInfo)
|
||||
}
|
||||
|
||||
mainLogger.Debugf("PID: %d", os.Getpid())
|
||||
@@ -91,6 +93,13 @@ func main() {
|
||||
listenStr = &defaultPort
|
||||
}
|
||||
|
||||
mon, err := perf.New(conf.Performance, mainLogger)
|
||||
if err != nil {
|
||||
mainLogger.Errorf("failed to create monitor: %s", err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
mon.Start()
|
||||
|
||||
// Setup channels for server management
|
||||
exitChan := make(chan struct{})
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
@@ -121,8 +130,8 @@ func main() {
|
||||
reloadMutex.Unlock()
|
||||
}()
|
||||
|
||||
mainLogger.Info("Reloading Configuration")
|
||||
if currentPM, ok := srv.Handler.(*proxy.ProxyManager); ok {
|
||||
mainLogger.Info("Reloading Configuration")
|
||||
conf, err = config.LoadConfig(*configPath)
|
||||
if err != nil {
|
||||
mainLogger.Warnf("Unable to reload configuration: %v", err)
|
||||
@@ -131,8 +140,10 @@ func main() {
|
||||
|
||||
mainLogger.Debug("Configuration Changed")
|
||||
currentPM.Shutdown()
|
||||
mon.UpdateConfig(conf.Performance)
|
||||
newPM := proxy.New(conf)
|
||||
newPM.SetVersion(date, commit, version)
|
||||
newPM.SetPerfMonitor(mon)
|
||||
srv.Handler = newPM
|
||||
mainLogger.Debug("Configuration Reloaded")
|
||||
|
||||
@@ -150,6 +161,7 @@ func main() {
|
||||
}
|
||||
newPM := proxy.New(conf)
|
||||
newPM.SetVersion(date, commit, version)
|
||||
newPM.SetPerfMonitor(mon)
|
||||
srv.Handler = newPM
|
||||
}
|
||||
}
|
||||
@@ -185,6 +197,7 @@ func main() {
|
||||
reloadProxyManager()
|
||||
case syscall.SIGINT, syscall.SIGTERM:
|
||||
mainLogger.Debugf("Received signal %v, shutting down...", sig)
|
||||
mon.Stop()
|
||||
watcherCancel()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||
defer cancel()
|
||||
|
||||
Reference in New Issue
Block a user