proxy,ui: add performance monitoring with Prometheus metrics (#743)

Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint.

Backend changes:
- New internal/perf package with configurable interval-based stats collection
- GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux
- Ring buffer (internal/ring) for time-series stat storage
- Prometheus /metrics endpoint with all system and GPU metrics
- Moved LogMonitor to internal/logmon package
- New PerformanceConfig for hot-reloadable monitoring settings
- REST /api/performance endpoint replacing SSE streaming

UI changes:
- New Performance page with real-time charts for CPU, memory, GPU, and network
- Reusable PerformanceChart component
- LLAMA_SWAP_URL environment variable support
- Improved capture dialog display

Other:
- Example Grafana dashboard for Prometheus metrics
- monitor-test standalone binary
- Config schema and example updates

fixes #596
This commit is contained in:
Benson Wong
2026-05-09 13:29:22 -07:00
committed by GitHub
parent e261745c66
commit 7e3e94a08a
49 changed files with 4322 additions and 273 deletions
+13
View File
@@ -9,6 +9,7 @@ import (
"runtime"
"sort"
"strings"
"time"
"github.com/billziss-gh/golib/shlex"
"gopkg.in/yaml.v3"
@@ -124,6 +125,7 @@ type Config struct {
LogToStdout string `yaml:"logToStdout"`
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
CaptureBuffer int `yaml:"captureBuffer"`
Performance PerformanceConfig `yaml:"performance"`
GlobalTTL int `yaml:"globalTTL"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Profiles map[string][]string `yaml:"profiles"`
@@ -220,6 +222,17 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
config.HealthCheckTimeout = 15
}
// Apply defaults for performance config when section is missing
if !config.Performance.Enable && config.Performance.Every == 0 && config.Performance.MaxAge == 0 && config.Performance.GC == 0 {
config.Performance.Enable = true
config.Performance.Every = 15 * time.Second
config.Performance.MaxAge = 1 * time.Hour
config.Performance.GC = 5 * time.Minute
}
if err = config.Performance.Validate(); err != nil {
return Config{}, fmt.Errorf("performance: %w", err)
}
if config.StartPort < 1 {
return Config{}, fmt.Errorf("startPort must be greater than 1")
}
+7
View File
@@ -7,6 +7,7 @@ import (
"path/filepath"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
@@ -229,6 +230,12 @@ groups:
HealthCheckTimeout: 15,
MetricsMaxInMemory: 1000,
CaptureBuffer: 5,
Performance: PerformanceConfig{
Enable: true,
Every: 15 * time.Second,
MaxAge: 1 * time.Hour,
GC: 5 * time.Minute,
},
Profiles: map[string][]string{
"test": {"model1", "model2"},
},
+7
View File
@@ -7,6 +7,7 @@ import (
"path/filepath"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
@@ -218,6 +219,12 @@ groups:
HealthCheckTimeout: 15,
MetricsMaxInMemory: 1000,
CaptureBuffer: 5,
Performance: PerformanceConfig{
Enable: true,
Every: 15 * time.Second,
MaxAge: 1 * time.Hour,
GC: 5 * time.Minute,
},
Profiles: map[string][]string{
"test": {"model1", "model2"},
},
+45
View File
@@ -0,0 +1,45 @@
package config
import (
"fmt"
"time"
)
// PerformanceConfig holds configuration for system performance monitoring
type PerformanceConfig struct {
Enable bool `yaml:"enable"`
Every time.Duration `yaml:"every"`
MaxAge time.Duration `yaml:"maxAge"`
GC time.Duration `yaml:"gc"`
}
func (p *PerformanceConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
type rawPerformanceConfig PerformanceConfig
defaults := rawPerformanceConfig{
Enable: true,
Every: 15 * time.Second,
MaxAge: 1 * time.Hour,
GC: 5 * time.Minute,
}
if err := unmarshal(&defaults); err != nil {
return err
}
*p = PerformanceConfig(defaults)
return nil
}
// Validate checks the PerformanceConfig values and returns an error if invalid
func (p *PerformanceConfig) Validate() error {
if p.Every < time.Second {
return fmt.Errorf("every must be at least 1s, got %v", p.Every)
}
if p.MaxAge <= 0 {
return fmt.Errorf("maxAge must be greater than 0, got %v", p.MaxAge)
}
if p.GC <= 0 {
return fmt.Errorf("gc must be greater than 0, got %v", p.GC)
}
return nil
}
+140
View File
@@ -0,0 +1,140 @@
package config
import (
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestPerformanceConfig_Defaults(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
// When performance section is missing, defaults should be applied
assert.True(t, config.Performance.Enable)
assert.Equal(t, 15*time.Second, config.Performance.Every)
assert.Equal(t, 1*time.Hour, config.Performance.MaxAge)
assert.Equal(t, 5*time.Minute, config.Performance.GC)
}
func TestPerformanceConfig_CustomValues(t *testing.T) {
content := `
performance:
enable: true
every: 30s
maxAge: 12h
gc: 10m
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.True(t, config.Performance.Enable)
assert.Equal(t, 30*time.Second, config.Performance.Every)
assert.Equal(t, 12*time.Hour, config.Performance.MaxAge)
assert.Equal(t, 10*time.Minute, config.Performance.GC)
}
func TestPerformanceConfig_Disabled(t *testing.T) {
content := `
performance:
enable: false
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.False(t, config.Performance.Enable)
// Duration defaults should still apply
assert.Equal(t, 15*time.Second, config.Performance.Every)
assert.Equal(t, 1*time.Hour, config.Performance.MaxAge)
assert.Equal(t, 5*time.Minute, config.Performance.GC)
}
func TestPerformanceConfig_PartialValues(t *testing.T) {
content := `
performance:
every: 10s
maxAge: 6h
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
// enable should default to true
assert.True(t, config.Performance.Enable)
assert.Equal(t, 10*time.Second, config.Performance.Every)
assert.Equal(t, 6*time.Hour, config.Performance.MaxAge)
// gc should use default
assert.Equal(t, 5*time.Minute, config.Performance.GC)
}
func TestPerformanceConfig_InvalidEvery(t *testing.T) {
content := `
performance:
every: 500ms
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Error(t, err)
assert.Contains(t, err.Error(), "every must be at least 1s")
}
func TestPerformanceConfig_InvalidMaxAge(t *testing.T) {
content := `
performance:
maxAge: 0s
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Error(t, err)
assert.Contains(t, err.Error(), "maxAge must be greater than 0")
}
func TestPerformanceConfig_InvalidGC(t *testing.T) {
content := `
performance:
gc: 0s
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Error(t, err)
assert.Contains(t, err.Error(), "gc must be greater than 0")
}
func TestPerformanceConfig_ComplexDurations(t *testing.T) {
content := `
performance:
every: 1m30s
maxAge: 2h10m
gc: 1m
models:
model1:
cmd: path/to/cmd --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
assert.Equal(t, 90*time.Second, config.Performance.Every)
assert.Equal(t, (2*time.Hour)+(10*time.Minute), config.Performance.MaxAge)
assert.Equal(t, 1*time.Minute, config.Performance.GC)
}