proxy,ui: add performance monitoring with Prometheus metrics (#743)
Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint. Backend changes: - New internal/perf package with configurable interval-based stats collection - GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux - Ring buffer (internal/ring) for time-series stat storage - Prometheus /metrics endpoint with all system and GPU metrics - Moved LogMonitor to internal/logmon package - New PerformanceConfig for hot-reloadable monitoring settings - REST /api/performance endpoint replacing SSE streaming UI changes: - New Performance page with real-time charts for CPU, memory, GPU, and network - Reusable PerformanceChart component - LLAMA_SWAP_URL environment variable support - Improved capture dialog display Other: - Example Grafana dashboard for Prometheus metrics - monitor-test standalone binary - Config schema and example updates fixes #596
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/billziss-gh/golib/shlex"
|
||||
"gopkg.in/yaml.v3"
|
||||
@@ -124,6 +125,7 @@ type Config struct {
|
||||
LogToStdout string `yaml:"logToStdout"`
|
||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||
CaptureBuffer int `yaml:"captureBuffer"`
|
||||
Performance PerformanceConfig `yaml:"performance"`
|
||||
GlobalTTL int `yaml:"globalTTL"`
|
||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||
Profiles map[string][]string `yaml:"profiles"`
|
||||
@@ -220,6 +222,17 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
config.HealthCheckTimeout = 15
|
||||
}
|
||||
|
||||
// Apply defaults for performance config when section is missing
|
||||
if !config.Performance.Enable && config.Performance.Every == 0 && config.Performance.MaxAge == 0 && config.Performance.GC == 0 {
|
||||
config.Performance.Enable = true
|
||||
config.Performance.Every = 15 * time.Second
|
||||
config.Performance.MaxAge = 1 * time.Hour
|
||||
config.Performance.GC = 5 * time.Minute
|
||||
}
|
||||
if err = config.Performance.Validate(); err != nil {
|
||||
return Config{}, fmt.Errorf("performance: %w", err)
|
||||
}
|
||||
|
||||
if config.StartPort < 1 {
|
||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
@@ -229,6 +230,12 @@ groups:
|
||||
HealthCheckTimeout: 15,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
Performance: PerformanceConfig{
|
||||
Enable: true,
|
||||
Every: 15 * time.Second,
|
||||
MaxAge: 1 * time.Hour,
|
||||
GC: 5 * time.Minute,
|
||||
},
|
||||
Profiles: map[string][]string{
|
||||
"test": {"model1", "model2"},
|
||||
},
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
@@ -218,6 +219,12 @@ groups:
|
||||
HealthCheckTimeout: 15,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
Performance: PerformanceConfig{
|
||||
Enable: true,
|
||||
Every: 15 * time.Second,
|
||||
MaxAge: 1 * time.Hour,
|
||||
GC: 5 * time.Minute,
|
||||
},
|
||||
Profiles: map[string][]string{
|
||||
"test": {"model1", "model2"},
|
||||
},
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// PerformanceConfig holds configuration for system performance monitoring
|
||||
type PerformanceConfig struct {
|
||||
Enable bool `yaml:"enable"`
|
||||
Every time.Duration `yaml:"every"`
|
||||
MaxAge time.Duration `yaml:"maxAge"`
|
||||
GC time.Duration `yaml:"gc"`
|
||||
}
|
||||
|
||||
func (p *PerformanceConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type rawPerformanceConfig PerformanceConfig
|
||||
defaults := rawPerformanceConfig{
|
||||
Enable: true,
|
||||
Every: 15 * time.Second,
|
||||
MaxAge: 1 * time.Hour,
|
||||
GC: 5 * time.Minute,
|
||||
}
|
||||
|
||||
if err := unmarshal(&defaults); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*p = PerformanceConfig(defaults)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate checks the PerformanceConfig values and returns an error if invalid
|
||||
func (p *PerformanceConfig) Validate() error {
|
||||
if p.Every < time.Second {
|
||||
return fmt.Errorf("every must be at least 1s, got %v", p.Every)
|
||||
}
|
||||
if p.MaxAge <= 0 {
|
||||
return fmt.Errorf("maxAge must be greater than 0, got %v", p.MaxAge)
|
||||
}
|
||||
if p.GC <= 0 {
|
||||
return fmt.Errorf("gc must be greater than 0, got %v", p.GC)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestPerformanceConfig_Defaults(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
// When performance section is missing, defaults should be applied
|
||||
assert.True(t, config.Performance.Enable)
|
||||
assert.Equal(t, 15*time.Second, config.Performance.Every)
|
||||
assert.Equal(t, 1*time.Hour, config.Performance.MaxAge)
|
||||
assert.Equal(t, 5*time.Minute, config.Performance.GC)
|
||||
}
|
||||
|
||||
func TestPerformanceConfig_CustomValues(t *testing.T) {
|
||||
content := `
|
||||
performance:
|
||||
enable: true
|
||||
every: 30s
|
||||
maxAge: 12h
|
||||
gc: 10m
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.True(t, config.Performance.Enable)
|
||||
assert.Equal(t, 30*time.Second, config.Performance.Every)
|
||||
assert.Equal(t, 12*time.Hour, config.Performance.MaxAge)
|
||||
assert.Equal(t, 10*time.Minute, config.Performance.GC)
|
||||
}
|
||||
|
||||
func TestPerformanceConfig_Disabled(t *testing.T) {
|
||||
content := `
|
||||
performance:
|
||||
enable: false
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.False(t, config.Performance.Enable)
|
||||
// Duration defaults should still apply
|
||||
assert.Equal(t, 15*time.Second, config.Performance.Every)
|
||||
assert.Equal(t, 1*time.Hour, config.Performance.MaxAge)
|
||||
assert.Equal(t, 5*time.Minute, config.Performance.GC)
|
||||
}
|
||||
|
||||
func TestPerformanceConfig_PartialValues(t *testing.T) {
|
||||
content := `
|
||||
performance:
|
||||
every: 10s
|
||||
maxAge: 6h
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
// enable should default to true
|
||||
assert.True(t, config.Performance.Enable)
|
||||
assert.Equal(t, 10*time.Second, config.Performance.Every)
|
||||
assert.Equal(t, 6*time.Hour, config.Performance.MaxAge)
|
||||
// gc should use default
|
||||
assert.Equal(t, 5*time.Minute, config.Performance.GC)
|
||||
}
|
||||
|
||||
func TestPerformanceConfig_InvalidEvery(t *testing.T) {
|
||||
content := `
|
||||
performance:
|
||||
every: 500ms
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "every must be at least 1s")
|
||||
}
|
||||
|
||||
func TestPerformanceConfig_InvalidMaxAge(t *testing.T) {
|
||||
content := `
|
||||
performance:
|
||||
maxAge: 0s
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "maxAge must be greater than 0")
|
||||
}
|
||||
|
||||
func TestPerformanceConfig_InvalidGC(t *testing.T) {
|
||||
content := `
|
||||
performance:
|
||||
gc: 0s
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "gc must be greater than 0")
|
||||
}
|
||||
|
||||
func TestPerformanceConfig_ComplexDurations(t *testing.T) {
|
||||
content := `
|
||||
performance:
|
||||
every: 1m30s
|
||||
maxAge: 2h10m
|
||||
gc: 1m
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, 90*time.Second, config.Performance.Every)
|
||||
assert.Equal(t, (2*time.Hour)+(10*time.Minute), config.Performance.MaxAge)
|
||||
assert.Equal(t, 1*time.Minute, config.Performance.GC)
|
||||
}
|
||||
@@ -5,7 +5,6 @@ package proxy
|
||||
const ProcessStateChangeEventID = 0x01
|
||||
const ChatCompletionStatsEventID = 0x02
|
||||
const ConfigFileChangedEventID = 0x03
|
||||
const LogDataEventID = 0x04
|
||||
const ActivityLogEventID = 0x05
|
||||
const ModelPreloadedEventID = 0x06
|
||||
const InFlightRequestsEventID = 0x07
|
||||
@@ -43,14 +42,6 @@ func (e ConfigFileChangedEvent) Type() uint32 {
|
||||
return ConfigFileChangedEventID
|
||||
}
|
||||
|
||||
type LogDataEvent struct {
|
||||
Data []byte
|
||||
}
|
||||
|
||||
func (e LogDataEvent) Type() uint32 {
|
||||
return LogDataEventID
|
||||
}
|
||||
|
||||
type ModelPreloadedEvent struct {
|
||||
ModelName string
|
||||
Success bool
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -24,7 +25,7 @@ import (
|
||||
var (
|
||||
nextTestPort int = 12000
|
||||
portMutex sync.Mutex
|
||||
testLogger = NewLogMonitorWriter(os.Stdout)
|
||||
testLogger = logmon.NewWriter(os.Stdout)
|
||||
simpleResponderPath = getSimpleResponderPath()
|
||||
)
|
||||
|
||||
@@ -40,13 +41,13 @@ func TestMain(m *testing.M) {
|
||||
|
||||
switch os.Getenv("LOG_LEVEL") {
|
||||
case "debug":
|
||||
testLogger.SetLogLevel(LevelDebug)
|
||||
testLogger.SetLogLevel(logmon.LevelDebug)
|
||||
case "warn":
|
||||
testLogger.SetLogLevel(LevelWarn)
|
||||
testLogger.SetLogLevel(logmon.LevelWarn)
|
||||
case "info":
|
||||
testLogger.SetLogLevel(LevelInfo)
|
||||
testLogger.SetLogLevel(logmon.LevelInfo)
|
||||
default:
|
||||
testLogger.SetLogLevel(LevelWarn)
|
||||
testLogger.SetLogLevel(logmon.LevelWarn)
|
||||
}
|
||||
|
||||
m.Run()
|
||||
|
||||
@@ -1,269 +0,0 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
)
|
||||
|
||||
// circularBuffer is a fixed-size circular byte buffer that overwrites
|
||||
// oldest data when full. It provides O(1) writes and O(n) reads.
|
||||
type circularBuffer struct {
|
||||
data []byte // pre-allocated capacity
|
||||
head int // next write position
|
||||
size int // current number of bytes stored (0 to cap)
|
||||
}
|
||||
|
||||
func newCircularBuffer(capacity int) *circularBuffer {
|
||||
return &circularBuffer{
|
||||
data: make([]byte, capacity),
|
||||
head: 0,
|
||||
size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// Write appends bytes to the buffer, overwriting oldest data when full.
|
||||
// Data is copied into the internal buffer (not stored by reference).
|
||||
func (cb *circularBuffer) Write(p []byte) {
|
||||
if len(p) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
cap := len(cb.data)
|
||||
|
||||
// If input is larger than capacity, only keep the last cap bytes
|
||||
if len(p) >= cap {
|
||||
copy(cb.data, p[len(p)-cap:])
|
||||
cb.head = 0
|
||||
cb.size = cap
|
||||
return
|
||||
}
|
||||
|
||||
// Calculate how much space is available from head to end of buffer
|
||||
firstPart := cap - cb.head
|
||||
if firstPart >= len(p) {
|
||||
// All data fits without wrapping
|
||||
copy(cb.data[cb.head:], p)
|
||||
cb.head = (cb.head + len(p)) % cap
|
||||
} else {
|
||||
// Data wraps around
|
||||
copy(cb.data[cb.head:], p[:firstPart])
|
||||
copy(cb.data[:len(p)-firstPart], p[firstPart:])
|
||||
cb.head = len(p) - firstPart
|
||||
}
|
||||
|
||||
// Update size
|
||||
cb.size += len(p)
|
||||
if cb.size > cap {
|
||||
cb.size = cap
|
||||
}
|
||||
}
|
||||
|
||||
// GetHistory returns all buffered data in correct order (oldest to newest).
|
||||
// Returns a new slice (copy), not a view into internal buffer.
|
||||
func (cb *circularBuffer) GetHistory() []byte {
|
||||
if cb.size == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make([]byte, cb.size)
|
||||
cap := len(cb.data)
|
||||
|
||||
// Calculate start position (oldest data)
|
||||
start := (cb.head - cb.size + cap) % cap
|
||||
|
||||
if start+cb.size <= cap {
|
||||
// Data is contiguous, single copy
|
||||
copy(result, cb.data[start:start+cb.size])
|
||||
} else {
|
||||
// Data wraps around, two copies
|
||||
firstPart := cap - start
|
||||
copy(result[:firstPart], cb.data[start:])
|
||||
copy(result[firstPart:], cb.data[:cb.size-firstPart])
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
type LogLevel int
|
||||
|
||||
const (
|
||||
LevelDebug LogLevel = iota
|
||||
LevelInfo
|
||||
LevelWarn
|
||||
LevelError
|
||||
|
||||
LogBufferSize = 100 * 1024
|
||||
)
|
||||
|
||||
type LogMonitor struct {
|
||||
eventbus *event.Dispatcher
|
||||
mu sync.RWMutex
|
||||
buffer *circularBuffer
|
||||
bufferMu sync.RWMutex
|
||||
|
||||
// typically this can be os.Stdout
|
||||
stdout io.Writer
|
||||
|
||||
// logging levels
|
||||
level LogLevel
|
||||
prefix string
|
||||
|
||||
// timestamps
|
||||
timeFormat string
|
||||
}
|
||||
|
||||
func NewLogMonitor() *LogMonitor {
|
||||
return NewLogMonitorWriter(os.Stdout)
|
||||
}
|
||||
|
||||
func NewLogMonitorWriter(stdout io.Writer) *LogMonitor {
|
||||
return &LogMonitor{
|
||||
eventbus: event.NewDispatcherConfig(1000),
|
||||
buffer: nil, // lazy initialized on first Write
|
||||
stdout: stdout,
|
||||
level: LevelInfo,
|
||||
prefix: "",
|
||||
timeFormat: "",
|
||||
}
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Write(p []byte) (n int, err error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
n, err = w.stdout.Write(p)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
w.bufferMu.Lock()
|
||||
if w.buffer == nil {
|
||||
w.buffer = newCircularBuffer(LogBufferSize)
|
||||
}
|
||||
w.buffer.Write(p)
|
||||
w.bufferMu.Unlock()
|
||||
|
||||
// Make a copy for broadcast to preserve immutability
|
||||
bufferCopy := make([]byte, len(p))
|
||||
copy(bufferCopy, p)
|
||||
w.broadcast(bufferCopy)
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (w *LogMonitor) GetHistory() []byte {
|
||||
w.bufferMu.RLock()
|
||||
defer w.bufferMu.RUnlock()
|
||||
if w.buffer == nil {
|
||||
return nil
|
||||
}
|
||||
return w.buffer.GetHistory()
|
||||
}
|
||||
|
||||
// Clear releases the buffer memory, making it eligible for GC.
|
||||
// The buffer will be lazily re-allocated on the next Write.
|
||||
func (w *LogMonitor) Clear() {
|
||||
w.bufferMu.Lock()
|
||||
w.buffer = nil
|
||||
w.bufferMu.Unlock()
|
||||
}
|
||||
|
||||
func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc {
|
||||
return event.Subscribe(w.eventbus, func(e LogDataEvent) {
|
||||
callback(e.Data)
|
||||
})
|
||||
}
|
||||
|
||||
func (w *LogMonitor) broadcast(msg []byte) {
|
||||
event.Publish(w.eventbus, LogDataEvent{Data: msg})
|
||||
}
|
||||
|
||||
func (w *LogMonitor) SetPrefix(prefix string) {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
w.prefix = prefix
|
||||
}
|
||||
|
||||
func (w *LogMonitor) SetLogLevel(level LogLevel) {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
w.level = level
|
||||
}
|
||||
|
||||
func (w *LogMonitor) SetLogTimeFormat(timeFormat string) {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
w.timeFormat = timeFormat
|
||||
}
|
||||
|
||||
func (w *LogMonitor) formatMessage(level string, msg string) []byte {
|
||||
prefix := ""
|
||||
if w.prefix != "" {
|
||||
prefix = fmt.Sprintf("[%s] ", w.prefix)
|
||||
}
|
||||
timestamp := ""
|
||||
if w.timeFormat != "" {
|
||||
timestamp = fmt.Sprintf("%s ", time.Now().Format(w.timeFormat))
|
||||
}
|
||||
return []byte(fmt.Sprintf("%s%s[%s] %s\n", timestamp, prefix, level, msg))
|
||||
}
|
||||
|
||||
func (w *LogMonitor) log(level LogLevel, msg string) {
|
||||
if level < w.level {
|
||||
return
|
||||
}
|
||||
w.Write(w.formatMessage(level.String(), msg))
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Debug(msg string) {
|
||||
w.log(LevelDebug, msg)
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Info(msg string) {
|
||||
w.log(LevelInfo, msg)
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Warn(msg string) {
|
||||
w.log(LevelWarn, msg)
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Error(msg string) {
|
||||
w.log(LevelError, msg)
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Debugf(format string, args ...interface{}) {
|
||||
w.log(LevelDebug, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Infof(format string, args ...interface{}) {
|
||||
w.log(LevelInfo, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Warnf(format string, args ...interface{}) {
|
||||
w.log(LevelWarn, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func (w *LogMonitor) Errorf(format string, args ...interface{}) {
|
||||
w.log(LevelError, fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
func (l LogLevel) String() string {
|
||||
switch l {
|
||||
case LevelDebug:
|
||||
return "DEBUG"
|
||||
case LevelInfo:
|
||||
return "INFO"
|
||||
case LevelWarn:
|
||||
return "WARN"
|
||||
case LevelError:
|
||||
return "ERROR"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
@@ -1,316 +0,0 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLogMonitor(t *testing.T) {
|
||||
logMonitor := NewLogMonitorWriter(io.Discard)
|
||||
|
||||
// A WaitGroup is used to wait for all the expected writes to complete
|
||||
var wg sync.WaitGroup
|
||||
|
||||
client1Messages := make([]byte, 0)
|
||||
client2Messages := make([]byte, 0)
|
||||
|
||||
defer logMonitor.OnLogData(func(data []byte) {
|
||||
client1Messages = append(client1Messages, data...)
|
||||
wg.Done()
|
||||
})()
|
||||
|
||||
defer logMonitor.OnLogData(func(data []byte) {
|
||||
client2Messages = append(client2Messages, data...)
|
||||
wg.Done()
|
||||
})()
|
||||
|
||||
wg.Add(6) // 2 x 3 writes
|
||||
|
||||
logMonitor.Write([]byte("1"))
|
||||
logMonitor.Write([]byte("2"))
|
||||
logMonitor.Write([]byte("3"))
|
||||
|
||||
// wait for all writes to complete
|
||||
wg.Wait()
|
||||
|
||||
// Check the buffer
|
||||
expectedHistory := "123"
|
||||
history := string(logMonitor.GetHistory())
|
||||
|
||||
if history != expectedHistory {
|
||||
t.Errorf("Expected history: %s, got: %s", expectedHistory, history)
|
||||
}
|
||||
|
||||
c1Data := string(client1Messages)
|
||||
if c1Data != expectedHistory {
|
||||
t.Errorf("Client1 expected %s, got: %s", expectedHistory, c1Data)
|
||||
}
|
||||
|
||||
c2Data := string(client2Messages)
|
||||
if c2Data != expectedHistory {
|
||||
t.Errorf("Client2 expected %s, got: %s", expectedHistory, c2Data)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrite_ImmutableBuffer(t *testing.T) {
|
||||
// Create a new LogMonitor instance
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
|
||||
// Prepare a message to write
|
||||
msg := []byte("Hello, World!")
|
||||
lenmsg := len(msg)
|
||||
|
||||
// Write the message to the LogMonitor
|
||||
n, err := lm.Write(msg)
|
||||
if err != nil {
|
||||
t.Fatalf("Write failed: %v", err)
|
||||
}
|
||||
|
||||
if n != lenmsg {
|
||||
t.Errorf("Expected %d bytes written but got %d", lenmsg, n)
|
||||
}
|
||||
|
||||
// Change the original message
|
||||
msg[0] = 'B' // This should not affect the buffer
|
||||
|
||||
// Get the history from the LogMonitor
|
||||
history := lm.GetHistory()
|
||||
|
||||
// Check that the history contains the original message, not the modified one
|
||||
expected := []byte("Hello, World!")
|
||||
if !bytes.Equal(history, expected) {
|
||||
t.Errorf("Expected history to be %q, got %q", expected, history)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrite_LogTimeFormat(t *testing.T) {
|
||||
// Create a new LogMonitor instance
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
|
||||
// Enable timestamps
|
||||
lm.timeFormat = time.RFC3339
|
||||
|
||||
// Write the message to the LogMonitor
|
||||
lm.Info("Hello, World!")
|
||||
|
||||
// Get the history from the LogMonitor
|
||||
history := lm.GetHistory()
|
||||
|
||||
timestamp := ""
|
||||
fields := strings.Fields(string(history))
|
||||
if len(fields) > 0 {
|
||||
timestamp = fields[0]
|
||||
} else {
|
||||
t.Fatalf("Cannot extract string from history")
|
||||
}
|
||||
|
||||
_, err := time.Parse(time.RFC3339, timestamp)
|
||||
if err != nil {
|
||||
t.Fatalf("Cannot find timestamp: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCircularBuffer_WrapAround(t *testing.T) {
|
||||
// Create a small buffer to test wrap-around
|
||||
cb := newCircularBuffer(10)
|
||||
|
||||
// Write "hello" (5 bytes)
|
||||
cb.Write([]byte("hello"))
|
||||
if got := string(cb.GetHistory()); got != "hello" {
|
||||
t.Errorf("Expected 'hello', got %q", got)
|
||||
}
|
||||
|
||||
// Write "world" (5 bytes) - buffer now full
|
||||
cb.Write([]byte("world"))
|
||||
if got := string(cb.GetHistory()); got != "helloworld" {
|
||||
t.Errorf("Expected 'helloworld', got %q", got)
|
||||
}
|
||||
|
||||
// Write "12345" (5 bytes) - should overwrite "hello"
|
||||
cb.Write([]byte("12345"))
|
||||
if got := string(cb.GetHistory()); got != "world12345" {
|
||||
t.Errorf("Expected 'world12345', got %q", got)
|
||||
}
|
||||
|
||||
// Write data larger than buffer capacity
|
||||
cb.Write([]byte("abcdefghijklmnop")) // 16 bytes, only last 10 kept
|
||||
if got := string(cb.GetHistory()); got != "ghijklmnop" {
|
||||
t.Errorf("Expected 'ghijklmnop', got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCircularBuffer_BoundaryConditions(t *testing.T) {
|
||||
// Test empty buffer
|
||||
cb := newCircularBuffer(10)
|
||||
if got := cb.GetHistory(); got != nil {
|
||||
t.Errorf("Expected nil for empty buffer, got %q", got)
|
||||
}
|
||||
|
||||
// Test exact capacity
|
||||
cb.Write([]byte("1234567890"))
|
||||
if got := string(cb.GetHistory()); got != "1234567890" {
|
||||
t.Errorf("Expected '1234567890', got %q", got)
|
||||
}
|
||||
|
||||
// Test write exactly at capacity boundary
|
||||
cb = newCircularBuffer(10)
|
||||
cb.Write([]byte("12345"))
|
||||
cb.Write([]byte("67890"))
|
||||
if got := string(cb.GetHistory()); got != "1234567890" {
|
||||
t.Errorf("Expected '1234567890', got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogMonitor_LazyInit(t *testing.T) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
|
||||
// Buffer should be nil before any writes
|
||||
if lm.buffer != nil {
|
||||
t.Error("Expected buffer to be nil before first write")
|
||||
}
|
||||
|
||||
// GetHistory should return nil when buffer is nil
|
||||
if got := lm.GetHistory(); got != nil {
|
||||
t.Errorf("Expected nil history before first write, got %q", got)
|
||||
}
|
||||
|
||||
// Write should lazily initialize the buffer
|
||||
lm.Write([]byte("test"))
|
||||
|
||||
if lm.buffer == nil {
|
||||
t.Error("Expected buffer to be initialized after write")
|
||||
}
|
||||
|
||||
if got := string(lm.GetHistory()); got != "test" {
|
||||
t.Errorf("Expected 'test', got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogMonitor_Clear(t *testing.T) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
|
||||
// Write some data
|
||||
lm.Write([]byte("hello"))
|
||||
if got := string(lm.GetHistory()); got != "hello" {
|
||||
t.Errorf("Expected 'hello', got %q", got)
|
||||
}
|
||||
|
||||
// Clear should release the buffer
|
||||
lm.Clear()
|
||||
|
||||
if lm.buffer != nil {
|
||||
t.Error("Expected buffer to be nil after Clear")
|
||||
}
|
||||
|
||||
if got := lm.GetHistory(); got != nil {
|
||||
t.Errorf("Expected nil history after Clear, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogMonitor_ClearAndReuse(t *testing.T) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
|
||||
// Write, clear, then write again
|
||||
lm.Write([]byte("first"))
|
||||
lm.Clear()
|
||||
lm.Write([]byte("second"))
|
||||
|
||||
if got := string(lm.GetHistory()); got != "second" {
|
||||
t.Errorf("Expected 'second' after clear and reuse, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLogMonitorWrite(b *testing.B) {
|
||||
// Test data of varying sizes
|
||||
smallMsg := []byte("small message\n")
|
||||
mediumMsg := []byte(strings.Repeat("medium message content ", 10) + "\n")
|
||||
largeMsg := []byte(strings.Repeat("large message content for benchmarking ", 100) + "\n")
|
||||
|
||||
b.Run("SmallWrite", func(b *testing.B) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
lm.Write(smallMsg)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("MediumWrite", func(b *testing.B) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
lm.Write(mediumMsg)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("LargeWrite", func(b *testing.B) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
lm.Write(largeMsg)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("WithSubscribers", func(b *testing.B) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
// Add some subscribers
|
||||
for i := 0; i < 5; i++ {
|
||||
lm.OnLogData(func(data []byte) {})
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
lm.Write(mediumMsg)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("GetHistory", func(b *testing.B) {
|
||||
lm := NewLogMonitorWriter(io.Discard)
|
||||
// Pre-populate with data
|
||||
for i := 0; i < 1000; i++ {
|
||||
lm.Write(mediumMsg)
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
lm.GetHistory()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/*
|
||||
Benchmark Results - MBP M1 Pro
|
||||
|
||||
Before (ring.Ring):
|
||||
| Benchmark | ns/op | bytes/op | allocs/op |
|
||||
|---------------------------------|------------|----------|-----------|
|
||||
| SmallWrite (14B) | 43 ns | 40 B | 2 |
|
||||
| MediumWrite (241B) | 76 ns | 264 B | 2 |
|
||||
| LargeWrite (4KB) | 504 ns | 4,120 B | 2 |
|
||||
| WithSubscribers (5 subs) | 355 ns | 264 B | 2 |
|
||||
| GetHistory (after 1000 writes) | 145,000 ns | 1.2 MB | 22 |
|
||||
|
||||
After (circularBuffer 10KB):
|
||||
| Benchmark | ns/op | bytes/op | allocs/op |
|
||||
|---------------------------------|------------|----------|-----------|
|
||||
| SmallWrite (14B) | 26 ns | 16 B | 1 |
|
||||
| MediumWrite (241B) | 67 ns | 240 B | 1 |
|
||||
| LargeWrite (4KB) | 774 ns | 4,096 B | 1 |
|
||||
| WithSubscribers (5 subs) | 325 ns | 240 B | 1 |
|
||||
| GetHistory (after 1000 writes) | 1,042 ns | 10,240 B | 1 |
|
||||
|
||||
After (circularBuffer 100KB):
|
||||
| Benchmark | ns/op | bytes/op | allocs/op |
|
||||
|---------------------------------|------------|-----------|-----------|
|
||||
| SmallWrite (14B) | 26 ns | 16 B | 1 |
|
||||
| MediumWrite (241B) | 66 ns | 240 B | 1 |
|
||||
| LargeWrite (4KB) | 753 ns | 4,096 B | 1 |
|
||||
| WithSubscribers (5 subs) | 309 ns | 240 B | 1 |
|
||||
| GetHistory (after 1000 writes) | 7,788 ns | 106,496 B | 1 |
|
||||
|
||||
Summary:
|
||||
- GetHistory: 139x faster (10KB), 18x faster (100KB)
|
||||
- Allocations: reduced from 2 to 1 across all operations
|
||||
- Small/medium writes: ~1.1-1.6x faster
|
||||
*/
|
||||
+5
-4
@@ -7,6 +7,7 @@ import (
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
)
|
||||
|
||||
@@ -145,8 +146,8 @@ type Matrix struct {
|
||||
solver *MatrixSolver
|
||||
processes map[string]*Process // all processes keyed by real model name
|
||||
config config.Config
|
||||
proxyLogger *LogMonitor
|
||||
upstreamLogger *LogMonitor
|
||||
proxyLogger *logmon.Monitor
|
||||
upstreamLogger *logmon.Monitor
|
||||
|
||||
// inflight tracks ProxyRequest calls that have released m.Lock but may
|
||||
// not yet have incremented Process.inFlightRequests. A concurrent
|
||||
@@ -165,10 +166,10 @@ type Matrix struct {
|
||||
|
||||
// NewMatrix creates a Matrix from config. It creates a Process for every
|
||||
// model defined in the config (any model can run alone even if not in a set).
|
||||
func NewMatrix(cfg config.Config, proxyLogger, upstreamLogger *LogMonitor) *Matrix {
|
||||
func NewMatrix(cfg config.Config, proxyLogger, upstreamLogger *logmon.Monitor) *Matrix {
|
||||
processes := make(map[string]*Process)
|
||||
for modelID, modelConfig := range cfg.Models {
|
||||
processLogger := NewLogMonitorWriter(upstreamLogger)
|
||||
processLogger := logmon.NewWriter(upstreamLogger)
|
||||
process := NewProcess(modelID, cfg.HealthCheckTimeout, modelConfig, processLogger, proxyLogger)
|
||||
processes[modelID] = process
|
||||
}
|
||||
|
||||
+27
-26
@@ -16,6 +16,8 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/ring"
|
||||
"github.com/mostlygeek/llama-swap/proxy/cache"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
@@ -113,11 +115,10 @@ func (e ActivityLogEvent) Type() uint32 {
|
||||
|
||||
// metricsMonitor parses llama-server output for token statistics
|
||||
type metricsMonitor struct {
|
||||
mu sync.RWMutex
|
||||
metrics []ActivityLogEntry
|
||||
maxMetrics int
|
||||
nextID int
|
||||
logger *LogMonitor
|
||||
mu sync.RWMutex
|
||||
metrics ring.Buffer[ActivityLogEntry]
|
||||
nextID int
|
||||
logger *logmon.Monitor
|
||||
|
||||
// capture fields
|
||||
enableCaptures bool
|
||||
@@ -126,10 +127,10 @@ type metricsMonitor struct {
|
||||
|
||||
// newMetricsMonitor creates a new metricsMonitor. captureBufferMB is the
|
||||
// capture buffer size in megabytes; 0 disables captures.
|
||||
func newMetricsMonitor(logger *LogMonitor, maxMetrics int, captureBufferMB int) *metricsMonitor {
|
||||
func newMetricsMonitor(logger *logmon.Monitor, maxMetrics int, captureBufferMB int) *metricsMonitor {
|
||||
mm := &metricsMonitor{
|
||||
logger: logger,
|
||||
maxMetrics: maxMetrics,
|
||||
metrics: ring.NewBuffer[ActivityLogEntry](maxMetrics),
|
||||
enableCaptures: captureBufferMB > 0,
|
||||
}
|
||||
if captureBufferMB > 0 {
|
||||
@@ -146,10 +147,7 @@ func (mp *metricsMonitor) queueMetrics(metric ActivityLogEntry) int {
|
||||
|
||||
metric.ID = mp.nextID
|
||||
mp.nextID++
|
||||
mp.metrics = append(mp.metrics, metric)
|
||||
if len(mp.metrics) > mp.maxMetrics {
|
||||
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
|
||||
}
|
||||
mp.metrics.Push(metric)
|
||||
return metric.ID
|
||||
}
|
||||
|
||||
@@ -213,30 +211,36 @@ func (mp *metricsMonitor) getCaptureByID(id int) *ReqRespCapture {
|
||||
return capture
|
||||
}
|
||||
|
||||
// getMetrics returns a copy of the current metrics
|
||||
// getMetrics returns a copy of the current metrics with HasCapture resolved from cache.
|
||||
func (mp *metricsMonitor) getMetrics() []ActivityLogEntry {
|
||||
mp.mu.RLock()
|
||||
defer mp.mu.RUnlock()
|
||||
|
||||
result := make([]ActivityLogEntry, len(mp.metrics))
|
||||
copy(result, mp.metrics)
|
||||
result := mp.metrics.Slice()
|
||||
if result == nil {
|
||||
return []ActivityLogEntry{}
|
||||
}
|
||||
if mp.captureCache != nil {
|
||||
for i := range result {
|
||||
result[i].HasCapture = mp.captureCache.Has(result[i].ID)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// getMetricsJSON returns metrics as JSON
|
||||
// getMetricsJSON returns metrics as JSON with HasCapture resolved from cache.
|
||||
func (mp *metricsMonitor) getMetricsJSON() ([]byte, error) {
|
||||
mp.mu.RLock()
|
||||
defer mp.mu.RUnlock()
|
||||
|
||||
if mp.captureCache == nil {
|
||||
return json.Marshal(mp.metrics)
|
||||
result := mp.metrics.Slice()
|
||||
if result == nil {
|
||||
return json.Marshal([]ActivityLogEntry{})
|
||||
}
|
||||
|
||||
// Make a copy with up-to-date has_capture from cache
|
||||
result := make([]ActivityLogEntry, len(mp.metrics))
|
||||
for i, m := range mp.metrics {
|
||||
m.HasCapture = mp.captureCache.Has(m.ID)
|
||||
result[i] = m
|
||||
if mp.captureCache != nil {
|
||||
for i := range result {
|
||||
result[i].HasCapture = mp.captureCache.Has(result[i].ID)
|
||||
}
|
||||
}
|
||||
return json.Marshal(result)
|
||||
}
|
||||
@@ -412,9 +416,6 @@ func (mp *metricsMonitor) wrapHandler(
|
||||
capture.ID = metricID
|
||||
if mp.addCapture(*capture) {
|
||||
tm.HasCapture = true
|
||||
mp.mu.Lock()
|
||||
mp.metrics[len(mp.metrics)-1].HasCapture = true
|
||||
mp.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+2
-1
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
)
|
||||
|
||||
@@ -24,7 +25,7 @@ type PeerProxy struct {
|
||||
proxyMap map[string]*peerProxyMember
|
||||
}
|
||||
|
||||
func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*PeerProxy, error) {
|
||||
func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *logmon.Monitor) (*PeerProxy, error) {
|
||||
proxyMap := make(map[string]*peerProxyMember)
|
||||
|
||||
// Sort peer IDs for consistent iteration order
|
||||
|
||||
+6
-5
@@ -18,6 +18,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
)
|
||||
|
||||
@@ -53,8 +54,8 @@ type Process struct {
|
||||
// closed when command exits
|
||||
cmdWaitChan chan struct{}
|
||||
|
||||
processLogger *LogMonitor
|
||||
proxyLogger *LogMonitor
|
||||
processLogger *logmon.Monitor
|
||||
proxyLogger *logmon.Monitor
|
||||
|
||||
healthCheckTimeout int
|
||||
healthCheckLoopInterval time.Duration
|
||||
@@ -84,7 +85,7 @@ type Process struct {
|
||||
failedStartCount int
|
||||
}
|
||||
|
||||
func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process {
|
||||
func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *logmon.Monitor, proxyLogger *logmon.Monitor) *Process {
|
||||
concurrentLimit := 10
|
||||
if config.ConcurrencyLimit > 0 {
|
||||
concurrentLimit = config.ConcurrencyLimit
|
||||
@@ -149,7 +150,7 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
|
||||
}
|
||||
|
||||
// LogMonitor returns the log monitor associated with the process.
|
||||
func (p *Process) LogMonitor() *LogMonitor {
|
||||
func (p *Process) LogMonitor() *logmon.Monitor {
|
||||
return p.processLogger
|
||||
}
|
||||
|
||||
@@ -726,7 +727,7 @@ func (p *Process) cmdStopUpstreamProcess() error {
|
||||
}
|
||||
|
||||
// Logger returns the logger for this process.
|
||||
func (p *Process) Logger() *LogMonitor {
|
||||
func (p *Process) Logger() *logmon.Monitor {
|
||||
return p.processLogger
|
||||
}
|
||||
|
||||
|
||||
@@ -11,20 +11,21 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var (
|
||||
debugLogger = NewLogMonitorWriter(os.Stdout)
|
||||
debugLogger = logmon.NewWriter(os.Stdout)
|
||||
)
|
||||
|
||||
func init() {
|
||||
// flip to help with debugging tests
|
||||
if false {
|
||||
debugLogger.SetLogLevel(LevelDebug)
|
||||
debugLogger.SetLogLevel(logmon.LevelDebug)
|
||||
} else {
|
||||
debugLogger.SetLogLevel(LevelError)
|
||||
debugLogger.SetLogLevel(logmon.LevelError)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -585,7 +586,7 @@ func TestProcess_CustomTimeouts(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
debugLogger := NewLogMonitorWriter(io.Discard)
|
||||
debugLogger := logmon.NewWriter(io.Discard)
|
||||
process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
|
||||
|
||||
// Verify the process was created successfully
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
)
|
||||
|
||||
@@ -18,8 +19,8 @@ type ProcessGroup struct {
|
||||
exclusive bool
|
||||
persistent bool
|
||||
|
||||
proxyLogger *LogMonitor
|
||||
upstreamLogger *LogMonitor
|
||||
proxyLogger *logmon.Monitor
|
||||
upstreamLogger *logmon.Monitor
|
||||
|
||||
// map of current processes
|
||||
processes map[string]*Process
|
||||
@@ -42,7 +43,7 @@ type ProcessGroup struct {
|
||||
testDelayFastPath func()
|
||||
}
|
||||
|
||||
func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup {
|
||||
func NewProcessGroup(id string, config config.Config, proxyLogger *logmon.Monitor, upstreamLogger *logmon.Monitor) *ProcessGroup {
|
||||
groupConfig, ok := config.Groups[id]
|
||||
if !ok {
|
||||
panic("Unable to find configuration for group id: " + id)
|
||||
@@ -62,7 +63,7 @@ func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, u
|
||||
// Create a Process for each member in the group
|
||||
for _, modelID := range groupConfig.Members {
|
||||
modelConfig, modelID, _ := pg.config.FindConfig(modelID)
|
||||
processLogger := NewLogMonitorWriter(upstreamLogger)
|
||||
processLogger := logmon.NewWriter(upstreamLogger)
|
||||
process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger)
|
||||
pg.processes[modelID] = process
|
||||
}
|
||||
|
||||
+46
-34
@@ -17,6 +17,8 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -69,11 +71,12 @@ type ProxyManager struct {
|
||||
ginEngine *gin.Engine
|
||||
|
||||
// logging
|
||||
proxyLogger *LogMonitor
|
||||
upstreamLogger *LogMonitor
|
||||
muxLogger *LogMonitor
|
||||
proxyLogger *logmon.Monitor
|
||||
upstreamLogger *logmon.Monitor
|
||||
muxLogger *logmon.Monitor
|
||||
|
||||
metricsMonitor *metricsMonitor
|
||||
perfMonitor *perf.Monitor
|
||||
|
||||
processGroups map[string]*ProcessGroup
|
||||
|
||||
@@ -98,27 +101,27 @@ type ProxyManager struct {
|
||||
func New(proxyConfig config.Config) *ProxyManager {
|
||||
// set up loggers
|
||||
|
||||
var muxLogger, upstreamLogger, proxyLogger *LogMonitor
|
||||
var muxLogger, upstreamLogger, proxyLogger *logmon.Monitor
|
||||
switch proxyConfig.LogToStdout {
|
||||
case config.LogToStdoutNone:
|
||||
muxLogger = NewLogMonitorWriter(io.Discard)
|
||||
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||
muxLogger = logmon.NewWriter(io.Discard)
|
||||
upstreamLogger = logmon.NewWriter(io.Discard)
|
||||
proxyLogger = logmon.NewWriter(io.Discard)
|
||||
case config.LogToStdoutBoth:
|
||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||
muxLogger = logmon.NewWriter(os.Stdout)
|
||||
upstreamLogger = logmon.NewWriter(muxLogger)
|
||||
proxyLogger = logmon.NewWriter(muxLogger)
|
||||
case config.LogToStdoutUpstream:
|
||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||
muxLogger = logmon.NewWriter(os.Stdout)
|
||||
upstreamLogger = logmon.NewWriter(muxLogger)
|
||||
proxyLogger = logmon.NewWriter(io.Discard)
|
||||
default:
|
||||
// same as config.LogToStdoutProxy
|
||||
// helpful because some old tests create a config.Config directly and it
|
||||
// may not have LogToStdout set explicitly
|
||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||
muxLogger = logmon.NewWriter(os.Stdout)
|
||||
upstreamLogger = logmon.NewWriter(io.Discard)
|
||||
proxyLogger = logmon.NewWriter(muxLogger)
|
||||
}
|
||||
|
||||
if proxyConfig.LogRequests {
|
||||
@@ -127,20 +130,20 @@ func New(proxyConfig config.Config) *ProxyManager {
|
||||
|
||||
switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
|
||||
case "debug":
|
||||
proxyLogger.SetLogLevel(LevelDebug)
|
||||
upstreamLogger.SetLogLevel(LevelDebug)
|
||||
proxyLogger.SetLogLevel(logmon.LevelDebug)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelDebug)
|
||||
case "info":
|
||||
proxyLogger.SetLogLevel(LevelInfo)
|
||||
upstreamLogger.SetLogLevel(LevelInfo)
|
||||
proxyLogger.SetLogLevel(logmon.LevelInfo)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelInfo)
|
||||
case "warn":
|
||||
proxyLogger.SetLogLevel(LevelWarn)
|
||||
upstreamLogger.SetLogLevel(LevelWarn)
|
||||
proxyLogger.SetLogLevel(logmon.LevelWarn)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelWarn)
|
||||
case "error":
|
||||
proxyLogger.SetLogLevel(LevelError)
|
||||
upstreamLogger.SetLogLevel(LevelError)
|
||||
proxyLogger.SetLogLevel(logmon.LevelError)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelError)
|
||||
default:
|
||||
proxyLogger.SetLogLevel(LevelInfo)
|
||||
upstreamLogger.SetLogLevel(LevelInfo)
|
||||
proxyLogger.SetLogLevel(logmon.LevelInfo)
|
||||
upstreamLogger.SetLogLevel(logmon.LevelInfo)
|
||||
}
|
||||
|
||||
// see: https://go.dev/src/time/format.go
|
||||
@@ -271,13 +274,17 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
|
||||
pm.ginEngine.Use(func(c *gin.Context) {
|
||||
|
||||
// don't log the Wake on Lan proxy health check
|
||||
if c.Request.URL.Path == "/wol-health" {
|
||||
c.Next()
|
||||
return
|
||||
for _, prefix := range []string{
|
||||
"/wol-health",
|
||||
"/api/performance",
|
||||
"/metrics",
|
||||
} {
|
||||
if strings.HasPrefix(c.Request.URL.Path, prefix) {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Start timer
|
||||
start := time.Now()
|
||||
|
||||
// capture these because /upstream/:model rewrites them in c.Next()
|
||||
@@ -285,12 +292,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
method := c.Request.Method
|
||||
path := c.Request.URL.Path
|
||||
|
||||
// Process request
|
||||
c.Next()
|
||||
|
||||
// Stop timer
|
||||
duration := time.Since(start)
|
||||
|
||||
statusCode := c.Writer.Status()
|
||||
bodySize := c.Writer.Size()
|
||||
|
||||
@@ -439,6 +443,8 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
c.String(http.StatusOK, "OK")
|
||||
})
|
||||
|
||||
pm.ginEngine.GET("/metrics", pm.prometheusMetricsHandler)
|
||||
|
||||
// see cmd/wol-proxy/wol-proxy.go, not logged
|
||||
pm.ginEngine.GET("/wol-health", func(c *gin.Context) {
|
||||
c.String(http.StatusOK, "OK")
|
||||
@@ -1218,3 +1224,9 @@ func (pm *ProxyManager) SetVersion(buildDate string, commit string, version stri
|
||||
pm.commit = commit
|
||||
pm.version = version
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) SetPerfMonitor(m *perf.Monitor) {
|
||||
pm.Lock()
|
||||
defer pm.Unlock()
|
||||
pm.perfMonitor = m
|
||||
}
|
||||
|
||||
@@ -8,9 +8,11 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||
)
|
||||
|
||||
type Model struct {
|
||||
@@ -32,6 +34,7 @@ func addApiHandlers(pm *ProxyManager) {
|
||||
apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
|
||||
apiGroup.GET("/events", pm.apiSendEvents)
|
||||
apiGroup.GET("/metrics", pm.apiGetMetrics)
|
||||
apiGroup.GET("/performance", pm.apiGetPerformance)
|
||||
apiGroup.GET("/version", pm.apiGetVersion)
|
||||
apiGroup.GET("/captures/:id", pm.apiGetCapture)
|
||||
}
|
||||
@@ -247,6 +250,56 @@ func (pm *ProxyManager) apiGetMetrics(c *gin.Context) {
|
||||
c.Data(http.StatusOK, "application/json", jsonData)
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) prometheusMetricsHandler(c *gin.Context) {
|
||||
if pm.perfMonitor == nil {
|
||||
c.String(http.StatusServiceUnavailable, "# performance monitor not available\n")
|
||||
return
|
||||
}
|
||||
pm.perfMonitor.MetricsHandler().ServeHTTP(c.Writer, c.Request)
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) apiGetPerformance(c *gin.Context) {
|
||||
if pm.perfMonitor == nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "performance monitor not available"})
|
||||
return
|
||||
}
|
||||
|
||||
sysStats, gpuStats := pm.perfMonitor.Current()
|
||||
|
||||
var after time.Time
|
||||
if afterStr := c.Query("after"); afterStr != "" {
|
||||
ts, err := time.Parse(time.RFC3339, afterStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid 'after' timestamp, use RFC3339 format"})
|
||||
return
|
||||
}
|
||||
after = ts
|
||||
}
|
||||
|
||||
if !after.IsZero() {
|
||||
filtered := make([]perf.SysStat, 0, len(sysStats))
|
||||
for _, s := range sysStats {
|
||||
if s.Timestamp.After(after) {
|
||||
filtered = append(filtered, s)
|
||||
}
|
||||
}
|
||||
sysStats = filtered
|
||||
|
||||
filteredGpu := make([]perf.GpuStat, 0, len(gpuStats))
|
||||
for _, g := range gpuStats {
|
||||
if g.Timestamp.After(after) {
|
||||
filteredGpu = append(filteredGpu, g)
|
||||
}
|
||||
}
|
||||
gpuStats = filteredGpu
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"sys_stats": sysStats,
|
||||
"gpu_stats": gpuStats,
|
||||
})
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) {
|
||||
requestedModel := strings.TrimPrefix(c.Param("model"), "/")
|
||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
||||
@@ -291,7 +344,7 @@ func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
|
||||
}
|
||||
|
||||
capture := pm.metricsMonitor.getCaptureByID(id)
|
||||
if capture == nil {
|
||||
if capture == nil || (capture.ReqPath == "" && capture.ReqHeaders == nil && capture.ReqBody == nil && capture.RespHeaders == nil && capture.RespBody == nil) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
)
|
||||
|
||||
func (pm *ProxyManager) sendLogsHandlers(c *gin.Context) {
|
||||
@@ -89,7 +90,7 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
// getLogger searches for the appropriate logger based on the logMonitorId
|
||||
func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) {
|
||||
func (pm *ProxyManager) getLogger(logMonitorId string) (*logmon.Monitor, error) {
|
||||
switch logMonitorId {
|
||||
case "":
|
||||
// maintain the default
|
||||
|
||||
Reference in New Issue
Block a user