proxy,ui: add performance monitoring with Prometheus metrics (#743)

Add a comprehensive performance monitoring system that collects CPU, memory, swap, load average, network IO, and GPU stats. Provides both a REST API for the UI and a Prometheus /metrics endpoint.

Backend changes:
- New internal/perf package with configurable interval-based stats collection
- GPU monitoring via LACT (Unix socket) and nvidia-smi fallback on Linux
- Ring buffer (internal/ring) for time-series stat storage
- Prometheus /metrics endpoint with all system and GPU metrics
- Moved LogMonitor to internal/logmon package
- New PerformanceConfig for hot-reloadable monitoring settings
- REST /api/performance endpoint replacing SSE streaming

UI changes:
- New Performance page with real-time charts for CPU, memory, GPU, and network
- Reusable PerformanceChart component
- LLAMA_SWAP_URL environment variable support
- Improved capture dialog display

Other:
- Example Grafana dashboard for Prometheus metrics
- monitor-test standalone binary
- Config schema and example updates

fixes #596
This commit is contained in:
Benson Wong
2026-05-09 13:29:22 -07:00
committed by GitHub
parent e261745c66
commit 7e3e94a08a
49 changed files with 4322 additions and 273 deletions
+251
View File
@@ -0,0 +1,251 @@
package logmon
import (
"context"
"fmt"
"io"
"os"
"sync"
"time"
"github.com/mostlygeek/llama-swap/event"
)
const DataEventID = 0x04
type DataEvent struct {
Data []byte
}
func (e DataEvent) Type() uint32 {
return DataEventID
}
// circularBuffer is a fixed-size circular byte buffer that overwrites
// oldest data when full. It provides O(1) writes and O(n) reads.
type circularBuffer struct {
data []byte
head int
size int
}
func newCircularBuffer(capacity int) *circularBuffer {
return &circularBuffer{
data: make([]byte, capacity),
head: 0,
size: 0,
}
}
func (cb *circularBuffer) Write(p []byte) {
if len(p) == 0 {
return
}
cap := len(cb.data)
if len(p) >= cap {
copy(cb.data, p[len(p)-cap:])
cb.head = 0
cb.size = cap
return
}
firstPart := cap - cb.head
if firstPart >= len(p) {
copy(cb.data[cb.head:], p)
cb.head = (cb.head + len(p)) % cap
} else {
copy(cb.data[cb.head:], p[:firstPart])
copy(cb.data[:len(p)-firstPart], p[firstPart:])
cb.head = len(p) - firstPart
}
cb.size += len(p)
if cb.size > cap {
cb.size = cap
}
}
func (cb *circularBuffer) GetHistory() []byte {
if cb.size == 0 {
return nil
}
result := make([]byte, cb.size)
cap := len(cb.data)
start := (cb.head - cb.size + cap) % cap
if start+cb.size <= cap {
copy(result, cb.data[start:start+cb.size])
} else {
firstPart := cap - start
copy(result[:firstPart], cb.data[start:])
copy(result[firstPart:], cb.data[:cb.size-firstPart])
}
return result
}
type Level int
const (
LevelDebug Level = iota
LevelInfo
LevelWarn
LevelError
BufferSize = 100 * 1024
)
type Monitor struct {
eventbus *event.Dispatcher
mu sync.RWMutex
buffer *circularBuffer
bufferMu sync.RWMutex
stdout io.Writer
level Level
prefix string
timeFormat string
}
func New() *Monitor {
return NewWriter(os.Stdout)
}
func NewWriter(stdout io.Writer) *Monitor {
return &Monitor{
eventbus: event.NewDispatcherConfig(1000),
buffer: nil,
stdout: stdout,
level: LevelInfo,
prefix: "",
timeFormat: "",
}
}
func (w *Monitor) Write(p []byte) (n int, err error) {
if len(p) == 0 {
return 0, nil
}
n, err = w.stdout.Write(p)
if err != nil {
return n, err
}
w.bufferMu.Lock()
if w.buffer == nil {
w.buffer = newCircularBuffer(BufferSize)
}
w.buffer.Write(p)
w.bufferMu.Unlock()
bufferCopy := make([]byte, len(p))
copy(bufferCopy, p)
w.broadcast(bufferCopy)
return n, nil
}
func (w *Monitor) GetHistory() []byte {
w.bufferMu.RLock()
defer w.bufferMu.RUnlock()
if w.buffer == nil {
return nil
}
return w.buffer.GetHistory()
}
// Clear releases the buffer memory, making it eligible for GC.
// The buffer will be lazily re-allocated on the next Write.
func (w *Monitor) Clear() {
w.bufferMu.Lock()
w.buffer = nil
w.bufferMu.Unlock()
}
func (w *Monitor) OnLogData(callback func(data []byte)) context.CancelFunc {
return event.Subscribe(w.eventbus, func(e DataEvent) {
callback(e.Data)
})
}
func (w *Monitor) broadcast(msg []byte) {
event.Publish(w.eventbus, DataEvent{Data: msg})
}
func (w *Monitor) SetPrefix(prefix string) {
w.mu.Lock()
defer w.mu.Unlock()
w.prefix = prefix
}
func (w *Monitor) SetLogLevel(level Level) {
w.mu.Lock()
defer w.mu.Unlock()
w.level = level
}
func (w *Monitor) SetLogTimeFormat(timeFormat string) {
w.mu.Lock()
defer w.mu.Unlock()
w.timeFormat = timeFormat
}
func (w *Monitor) formatMessage(level string, msg string) []byte {
prefix := ""
if w.prefix != "" {
prefix = fmt.Sprintf("[%s] ", w.prefix)
}
timestamp := ""
if w.timeFormat != "" {
timestamp = fmt.Sprintf("%s ", time.Now().Format(w.timeFormat))
}
return fmt.Appendf(nil, "%s%s[%s] %s\n", timestamp, prefix, level, msg)
}
func (w *Monitor) log(level Level, msg string) {
if level < w.level {
return
}
w.Write(w.formatMessage(level.String(), msg))
}
func (w *Monitor) Debug(msg string) { w.log(LevelDebug, msg) }
func (w *Monitor) Info(msg string) { w.log(LevelInfo, msg) }
func (w *Monitor) Warn(msg string) { w.log(LevelWarn, msg) }
func (w *Monitor) Error(msg string) { w.log(LevelError, msg) }
func (w *Monitor) Debugf(format string, args ...any) {
w.log(LevelDebug, fmt.Sprintf(format, args...))
}
func (w *Monitor) Infof(format string, args ...any) {
w.log(LevelInfo, fmt.Sprintf(format, args...))
}
func (w *Monitor) Warnf(format string, args ...any) {
w.log(LevelWarn, fmt.Sprintf(format, args...))
}
func (w *Monitor) Errorf(format string, args ...any) {
w.log(LevelError, fmt.Sprintf(format, args...))
}
func (l Level) String() string {
switch l {
case LevelDebug:
return "DEBUG"
case LevelInfo:
return "INFO"
case LevelWarn:
return "WARN"
case LevelError:
return "ERROR"
default:
return "UNKNOWN"
}
}
+250
View File
@@ -0,0 +1,250 @@
package logmon
import (
"bytes"
"io"
"strings"
"sync"
"testing"
"time"
)
func TestLogMonitor(t *testing.T) {
logMonitor := NewWriter(io.Discard)
var wg sync.WaitGroup
client1Messages := make([]byte, 0)
client2Messages := make([]byte, 0)
defer logMonitor.OnLogData(func(data []byte) {
client1Messages = append(client1Messages, data...)
wg.Done()
})()
defer logMonitor.OnLogData(func(data []byte) {
client2Messages = append(client2Messages, data...)
wg.Done()
})()
wg.Add(6) // 2 x 3 writes
logMonitor.Write([]byte("1"))
logMonitor.Write([]byte("2"))
logMonitor.Write([]byte("3"))
wg.Wait()
expectedHistory := "123"
history := string(logMonitor.GetHistory())
if history != expectedHistory {
t.Errorf("Expected history: %s, got: %s", expectedHistory, history)
}
c1Data := string(client1Messages)
if c1Data != expectedHistory {
t.Errorf("Client1 expected %s, got: %s", expectedHistory, c1Data)
}
c2Data := string(client2Messages)
if c2Data != expectedHistory {
t.Errorf("Client2 expected %s, got: %s", expectedHistory, c2Data)
}
}
func TestWrite_ImmutableBuffer(t *testing.T) {
lm := NewWriter(io.Discard)
msg := []byte("Hello, World!")
lenmsg := len(msg)
n, err := lm.Write(msg)
if err != nil {
t.Fatalf("Write failed: %v", err)
}
if n != lenmsg {
t.Errorf("Expected %d bytes written but got %d", lenmsg, n)
}
msg[0] = 'B'
history := lm.GetHistory()
expected := []byte("Hello, World!")
if !bytes.Equal(history, expected) {
t.Errorf("Expected history to be %q, got %q", expected, history)
}
}
func TestWrite_LogTimeFormat(t *testing.T) {
lm := NewWriter(io.Discard)
lm.timeFormat = time.RFC3339
lm.Info("Hello, World!")
history := lm.GetHistory()
timestamp := ""
fields := strings.Fields(string(history))
if len(fields) > 0 {
timestamp = fields[0]
} else {
t.Fatalf("Cannot extract string from history")
}
_, err := time.Parse(time.RFC3339, timestamp)
if err != nil {
t.Fatalf("Cannot find timestamp: %v", err)
}
}
func TestCircularBuffer_WrapAround(t *testing.T) {
cb := newCircularBuffer(10)
cb.Write([]byte("hello"))
if got := string(cb.GetHistory()); got != "hello" {
t.Errorf("Expected 'hello', got %q", got)
}
cb.Write([]byte("world"))
if got := string(cb.GetHistory()); got != "helloworld" {
t.Errorf("Expected 'helloworld', got %q", got)
}
cb.Write([]byte("12345"))
if got := string(cb.GetHistory()); got != "world12345" {
t.Errorf("Expected 'world12345', got %q", got)
}
cb.Write([]byte("abcdefghijklmnop"))
if got := string(cb.GetHistory()); got != "ghijklmnop" {
t.Errorf("Expected 'ghijklmnop', got %q", got)
}
}
func TestCircularBuffer_BoundaryConditions(t *testing.T) {
cb := newCircularBuffer(10)
if got := cb.GetHistory(); got != nil {
t.Errorf("Expected nil for empty buffer, got %q", got)
}
cb.Write([]byte("1234567890"))
if got := string(cb.GetHistory()); got != "1234567890" {
t.Errorf("Expected '1234567890', got %q", got)
}
cb = newCircularBuffer(10)
cb.Write([]byte("12345"))
cb.Write([]byte("67890"))
if got := string(cb.GetHistory()); got != "1234567890" {
t.Errorf("Expected '1234567890', got %q", got)
}
}
func TestLogMonitor_LazyInit(t *testing.T) {
lm := NewWriter(io.Discard)
if lm.buffer != nil {
t.Error("Expected buffer to be nil before first write")
}
if got := lm.GetHistory(); got != nil {
t.Errorf("Expected nil history before first write, got %q", got)
}
lm.Write([]byte("test"))
if lm.buffer == nil {
t.Error("Expected buffer to be initialized after write")
}
if got := string(lm.GetHistory()); got != "test" {
t.Errorf("Expected 'test', got %q", got)
}
}
func TestLogMonitor_Clear(t *testing.T) {
lm := NewWriter(io.Discard)
lm.Write([]byte("hello"))
if got := string(lm.GetHistory()); got != "hello" {
t.Errorf("Expected 'hello', got %q", got)
}
lm.Clear()
if lm.buffer != nil {
t.Error("Expected buffer to be nil after Clear")
}
if got := lm.GetHistory(); got != nil {
t.Errorf("Expected nil history after Clear, got %q", got)
}
}
func TestLogMonitor_ClearAndReuse(t *testing.T) {
lm := NewWriter(io.Discard)
lm.Write([]byte("first"))
lm.Clear()
lm.Write([]byte("second"))
if got := string(lm.GetHistory()); got != "second" {
t.Errorf("Expected 'second' after clear and reuse, got %q", got)
}
}
func BenchmarkLogMonitorWrite(b *testing.B) {
smallMsg := []byte("small message\n")
mediumMsg := []byte(strings.Repeat("medium message content ", 10) + "\n")
largeMsg := []byte(strings.Repeat("large message content for benchmarking ", 100) + "\n")
b.Run("SmallWrite", func(b *testing.B) {
lm := NewWriter(io.Discard)
b.ResetTimer()
for i := 0; i < b.N; i++ {
lm.Write(smallMsg)
}
})
b.Run("MediumWrite", func(b *testing.B) {
lm := NewWriter(io.Discard)
b.ResetTimer()
for i := 0; i < b.N; i++ {
lm.Write(mediumMsg)
}
})
b.Run("LargeWrite", func(b *testing.B) {
lm := NewWriter(io.Discard)
b.ResetTimer()
for i := 0; i < b.N; i++ {
lm.Write(largeMsg)
}
})
b.Run("WithSubscribers", func(b *testing.B) {
lm := NewWriter(io.Discard)
for i := 0; i < 5; i++ {
lm.OnLogData(func(data []byte) {})
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
lm.Write(mediumMsg)
}
})
b.Run("GetHistory", func(b *testing.B) {
lm := NewWriter(io.Discard)
for i := 0; i < 1000; i++ {
lm.Write(mediumMsg)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
lm.GetHistory()
}
})
}
+210
View File
@@ -0,0 +1,210 @@
package perf
import (
"context"
"errors"
"sync"
"time"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/ring"
"github.com/mostlygeek/llama-swap/proxy/config"
)
var (
ErrNotImplemented = errors.New("Not Implemented")
ErrNoGpuTool = errors.New("no GPU monitoring tool available")
)
type Monitor struct {
mutex sync.RWMutex
log *logmon.Monitor
conf config.PerformanceConfig
sysRing ring.Buffer[SysStat]
gpuRing ring.Buffer[[]GpuStat]
stopCtx context.Context
stopCancel context.CancelFunc
sysListeners map[chan SysStat]struct{}
gpuListeners map[chan []GpuStat]struct{}
}
func ringCapacity(c config.PerformanceConfig) int {
n := int(c.MaxAge / c.Every)
if n < 1 {
n = 1
}
return n
}
func New(c config.PerformanceConfig, logger *logmon.Monitor) (*Monitor, error) {
if c.Every < 100*time.Millisecond {
c.Every = 100 * time.Millisecond
}
if c.GC < 1*time.Second {
c.GC = 1 * time.Second
}
if c.MaxAge < 1*time.Minute {
c.MaxAge = 1 * time.Minute
}
if logger == nil {
return nil, errors.New("logger is required")
}
capacity := ringCapacity(c)
return &Monitor{
conf: c,
log: logger,
sysRing: ring.NewBuffer[SysStat](capacity),
gpuRing: ring.NewBuffer[[]GpuStat](capacity),
sysListeners: make(map[chan SysStat]struct{}),
gpuListeners: make(map[chan []GpuStat]struct{}),
}, nil
}
func (m *Monitor) Stop() {
m.mutex.Lock()
defer m.mutex.Unlock()
if m.stopCancel == nil {
return
}
m.stopCancel()
m.stopCancel = nil
}
// UpdateConfig updates the monitor configuration and restarts if changed.
func (m *Monitor) UpdateConfig(newConf config.PerformanceConfig) {
m.mutex.RLock()
changed := m.conf != newConf
m.mutex.RUnlock()
if !changed {
return
}
m.Stop()
m.mutex.Lock()
m.conf = newConf
capacity := ringCapacity(newConf)
m.sysRing = ring.NewBuffer[SysStat](capacity)
m.gpuRing = ring.NewBuffer[[]GpuStat](capacity)
m.mutex.Unlock()
m.Start()
}
// Subscribe returns channels to listen to system and GPU stats.
func (m *Monitor) Subscribe() (chan SysStat, chan []GpuStat, func()) {
m.mutex.Lock()
defer m.mutex.Unlock()
sysChan := make(chan SysStat, 1)
gpuChan := make(chan []GpuStat, 1)
m.sysListeners[sysChan] = struct{}{}
m.gpuListeners[gpuChan] = struct{}{}
unsub := func() {
m.mutex.Lock()
defer m.mutex.Unlock()
delete(m.sysListeners, sysChan)
delete(m.gpuListeners, gpuChan)
}
return sysChan, gpuChan, unsub
}
func (m *Monitor) Start() {
m.mutex.Lock()
defer m.mutex.Unlock()
if m.stopCancel != nil {
return
}
m.stopCtx, m.stopCancel = context.WithCancel(context.Background())
go func() {
tick := time.NewTicker(m.conf.Every)
defer tick.Stop()
for {
select {
case <-m.stopCtx.Done():
return
case <-tick.C:
s, err := ReadSysStats()
if err != nil {
if err != ErrNotImplemented {
m.log.Errorf("failed to read sys stats: %s", err.Error())
}
continue
}
m.mutex.Lock()
m.sysRing.Push(s)
for l := range m.sysListeners {
select {
case l <- s:
default:
}
}
m.mutex.Unlock()
}
}
}()
go func() {
gpuCh, err := getGpuStats(m.stopCtx, m.conf.Every, m.log)
if err != nil {
if errors.Is(err, ErrNotImplemented) || errors.Is(err, ErrNoGpuTool) {
m.log.Infof("GPU monitoring not available: %s", err.Error())
} else {
m.log.Errorf("failed to initialize GPU monitoring: %s", err.Error())
}
return
}
for {
select {
case <-m.stopCtx.Done():
return
case g, ok := <-gpuCh:
if !ok {
m.log.Errorf("failed reading from gpuCh - stopping read goroutine")
return
}
m.mutex.Lock()
m.gpuRing.Push(g)
for l := range m.gpuListeners {
select {
case l <- g:
default:
}
}
m.mutex.Unlock()
}
}
}()
}
// Current returns a copy of the current log of system and GPU stats.
func (m *Monitor) Current() ([]SysStat, []GpuStat) {
m.mutex.RLock()
defer m.mutex.RUnlock()
sysStats := m.sysRing.Slice()
snapshots := m.gpuRing.Slice()
var gpuStats []GpuStat
for _, snapshot := range snapshots {
gpuStats = append(gpuStats, snapshot...)
}
return sysStats, gpuStats
}
func ReadSysStats() (SysStat, error) {
return readSysStats()
}
func GetGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
return getGpuStats(ctx, every, logger)
}
+55
View File
@@ -0,0 +1,55 @@
package perf
import (
"context"
"time"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/shirou/gopsutil/v4/cpu"
"github.com/shirou/gopsutil/v4/load"
"github.com/shirou/gopsutil/v4/mem"
)
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
return nil, ErrNotImplemented
}
func readSysStats() (SysStat, error) {
cpuPcts, err := cpu.Percent(0, true)
if err != nil {
return SysStat{}, err
}
vmStat, err := mem.VirtualMemory()
if err != nil {
return SysStat{}, err
}
const toMB = 1024 * 1024
var swapTotalMB, swapUsedMB int
if swapStat, err := mem.SwapMemory(); err == nil {
swapTotalMB = int(swapStat.Total / toMB)
swapUsedMB = int(swapStat.Used / toMB)
}
var loadAvg1, loadAvg5, loadAvg15 float64
if loadStat, err := load.Avg(); err == nil {
loadAvg1 = loadStat.Load1
loadAvg5 = loadStat.Load5
loadAvg15 = loadStat.Load15
}
return SysStat{
Timestamp: time.Now(),
CpuUtilPerCore: cpuPcts,
MemTotalMB: int(vmStat.Total / toMB),
MemUsedMB: int(vmStat.Used / toMB),
MemFreeMB: int(vmStat.Free / toMB),
SwapTotalMB: swapTotalMB,
SwapUsedMB: swapUsedMB,
LoadAvg1: loadAvg1,
LoadAvg5: loadAvg5,
LoadAvg15: loadAvg15,
}, nil
}
+238
View File
@@ -0,0 +1,238 @@
package perf
import (
"io"
"sync"
"testing"
"time"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/proxy/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newTestLogger() *logmon.Monitor {
return logmon.NewWriter(io.Discard)
}
func TestNew_DefaultConfig(t *testing.T) {
logger := newTestLogger()
m, err := New(config.PerformanceConfig{}, logger)
require.NoError(t, err)
require.NotNil(t, m)
assert.Equal(t, 100*time.Millisecond, m.conf.Every)
assert.Equal(t, 1*time.Second, m.conf.GC)
assert.Equal(t, 1*time.Minute, m.conf.MaxAge)
}
func TestNew_CustomConfig(t *testing.T) {
logger := newTestLogger()
cfg := config.PerformanceConfig{
Enable: true,
Every: 500 * time.Millisecond,
GC: 5 * time.Second,
MaxAge: 10 * time.Minute,
}
m, err := New(cfg, logger)
require.NoError(t, err)
assert.Equal(t, 500*time.Millisecond, m.conf.Every)
assert.Equal(t, 5*time.Second, m.conf.GC)
assert.Equal(t, 10*time.Minute, m.conf.MaxAge)
}
func TestNew_NilLogger(t *testing.T) {
m, err := New(config.PerformanceConfig{}, nil)
assert.Error(t, err)
assert.Nil(t, m)
}
func TestNew_BelowMinimumConfig(t *testing.T) {
logger := newTestLogger()
cfg := config.PerformanceConfig{
Enable: true,
Every: 1 * time.Millisecond,
GC: 100 * time.Millisecond,
MaxAge: 1 * time.Second,
}
m, err := New(cfg, logger)
require.NoError(t, err)
assert.Equal(t, 100*time.Millisecond, m.conf.Every)
assert.Equal(t, 1*time.Second, m.conf.GC)
assert.Equal(t, 1*time.Minute, m.conf.MaxAge)
}
func TestSubscribe_ReturnsChannels(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
sysCh, gpuCh, unsub := m.Subscribe()
defer unsub()
assert.NotNil(t, sysCh)
assert.NotNil(t, gpuCh)
assert.NotNil(t, unsub)
}
func TestSubscribe_UnsubscribeRemovesListeners(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
_, _, unsub := m.Subscribe()
m.mutex.RLock()
assert.Len(t, m.sysListeners, 1)
assert.Len(t, m.gpuListeners, 1)
m.mutex.RUnlock()
unsub()
m.mutex.RLock()
assert.Len(t, m.sysListeners, 0)
assert.Len(t, m.gpuListeners, 0)
m.mutex.RUnlock()
}
func TestSubscribe_MultipleSubscriptions(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
sysCh1, gpuCh1, unsub1 := m.Subscribe()
sysCh2, gpuCh2, unsub2 := m.Subscribe()
defer unsub1()
defer unsub2()
assert.NotEqual(t, sysCh1, sysCh2)
assert.NotEqual(t, gpuCh1, gpuCh2)
m.mutex.RLock()
assert.Len(t, m.sysListeners, 2)
assert.Len(t, m.gpuListeners, 2)
m.mutex.RUnlock()
}
func TestCurrent_EmptyByDefault(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
sysStats, gpuStats := m.Current()
assert.Empty(t, sysStats)
assert.Empty(t, gpuStats)
}
func TestCurrent_ReturnsCopies(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
now := time.Now()
m.sysRing.Push(SysStat{Timestamp: now, MemTotalMB: 1024})
m.gpuRing.Push([]GpuStat{{Timestamp: now, ID: 0, Name: "gpu0"}})
sysStats, gpuStats := m.Current()
assert.Len(t, sysStats, 1)
assert.Len(t, gpuStats, 1)
assert.Equal(t, 1024, sysStats[0].MemTotalMB)
assert.Equal(t, "gpu0", gpuStats[0].Name)
// modifying the returned slice should not affect the original
sysStats[0].MemTotalMB = 999
original, _ := m.Current()
assert.Equal(t, 1024, original[0].MemTotalMB)
}
func TestStart_CollectsSysStats(t *testing.T) {
if testing.Short() {
t.Skip("skipping slow test")
}
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
require.NoError(t, err)
m.Start()
time.Sleep(350 * time.Millisecond)
m.Stop()
sysStats, _ := m.Current()
assert.NotEmpty(t, sysStats, "expected sys stats to be collected")
}
func TestStart_StopStopsGoroutines(t *testing.T) {
if testing.Short() {
t.Skip("skipping slow test")
}
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
require.NoError(t, err)
m.Start()
if m.stopCancel == nil {
t.Error("stopCancel should not be nil after Start()")
}
m.Stop()
if m.stopCancel != nil {
t.Error("stopCancel should be nil after Stop()")
}
}
func TestStart_SubscriberReceivesStats(t *testing.T) {
if testing.Short() {
t.Skip("skipping slow test")
}
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
require.NoError(t, err)
sysCh, _, unsub := m.Subscribe()
defer unsub()
m.Start()
defer m.Stop()
select {
case s := <-sysCh:
assert.False(t, s.Timestamp.IsZero())
assert.NotEmpty(t, s.CpuUtilPerCore)
case <-time.After(500 * time.Millisecond):
t.Fatal("timed out waiting for sys stats")
}
}
func TestReadSysStats(t *testing.T) {
s, err := ReadSysStats()
require.NoError(t, err)
assert.False(t, s.Timestamp.IsZero())
assert.NotEmpty(t, s.CpuUtilPerCore)
assert.Greater(t, s.MemTotalMB, 0)
}
func TestCurrent_ConcurrentAccess(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
m.sysRing.Push(SysStat{Timestamp: time.Now(), MemTotalMB: 1024})
m.gpuRing.Push([]GpuStat{{Timestamp: time.Now(), ID: 0}})
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
sys, gpu := m.Current()
assert.Len(t, sys, 1)
assert.Len(t, gpu, 1)
}()
}
wg.Wait()
}
+461
View File
@@ -0,0 +1,461 @@
//go:build unix && !darwin
package perf
import (
"bufio"
"context"
"encoding/json"
"fmt"
"net"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/shirou/gopsutil/v4/cpu"
"github.com/shirou/gopsutil/v4/load"
"github.com/shirou/gopsutil/v4/mem"
psnet "github.com/shirou/gopsutil/v4/net"
)
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
if ch, err := tryLACT(ctx, every, logger); err == nil {
logger.Info("using LACT for GPU monitoring")
return ch, nil
} else {
logger.Debugf("LACT: %s", err.Error())
}
if ch, err := tryNvidiaSmi(ctx, every, logger); err == nil {
logger.Info("using nvidia-smi for GPU monitoring")
return ch, nil
} else {
logger.Debugf("nvidia-smi: %s", err.Error())
}
if ch, err := trySysfs(ctx, every, logger); err == nil {
logger.Info("using sysfs for GPU monitoring")
return ch, nil
} else {
logger.Debugf("sysfs: %s", err.Error())
}
return nil, ErrNoGpuTool
}
func tryLACT(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
socketPath := lactSocketPath()
if socketPath == "" {
return nil, ErrNoGpuTool
}
conn, err := net.DialTimeout("unix", socketPath, 2*time.Second)
if err != nil {
return nil, fmt.Errorf("cannot connect to LACT socket: %w", err)
}
defer conn.Close()
conn.SetDeadline(time.Now().Add(5 * time.Second))
devices, err := lactListDevices(conn)
if err != nil {
return nil, fmt.Errorf("LACT ListDevices failed: %w", err)
}
if len(devices) == 0 {
return nil, fmt.Errorf("LACT returned no devices")
}
ch := make(chan []GpuStat, 1)
go func() {
defer close(ch)
ticker := time.NewTicker(every)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
socketPath := lactSocketPath()
if socketPath == "" {
continue
}
conn, err := net.DialTimeout("unix", socketPath, 2*time.Second)
if err != nil {
continue
}
conn.SetDeadline(time.Now().Add(5 * time.Second))
devices, err := lactListDevices(conn)
if err != nil {
conn.Close()
continue
}
stats := make([]GpuStat, 0, len(devices))
for i, d := range devices {
stat, err := lactGetDeviceStats(conn, d.ID, d.Name, i)
if err != nil {
continue
}
stats = append(stats, stat)
}
conn.Close()
if len(stats) > 0 {
select {
case ch <- stats:
default:
}
}
}
}
}()
return ch, nil
}
func tryNvidiaSmi(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
if _, err := exec.LookPath("nvidia-smi"); err != nil {
return nil, ErrNoGpuTool
}
sec := int(every.Seconds())
if sec < 1 {
sec = 1
}
cmd := exec.CommandContext(ctx, "nvidia-smi",
"--query-gpu=index,name,uuid,temperature.gpu,utilization.gpu,memory.used,memory.total,fan.speed,power.draw",
"--format=csv,noheader,nounits",
"-loop", fmt.Sprintf("%d", sec),
)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("nvidia-smi stdout pipe failed: %w", err)
}
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("nvidia-smi start failed: %w", err)
}
ch := make(chan []GpuStat, 1)
go func() {
defer close(ch)
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
stat := parseNvidiaSmiLine(line)
if stat != nil {
select {
case ch <- []GpuStat{*stat}:
default:
}
}
}
cmd.Wait()
}()
return ch, nil
}
func parseNvidiaSmiLine(line string) *GpuStat {
fields := strings.Split(line, ", ")
if len(fields) < 9 {
return nil
}
id, _ := strconv.Atoi(strings.TrimSpace(fields[0]))
name := strings.TrimSpace(fields[1])
uuid := strings.TrimSpace(fields[2])
tempC, _ := strconv.Atoi(strings.TrimSpace(fields[3]))
gpuUtil, _ := strconv.ParseFloat(strings.TrimSpace(fields[4]), 64)
memUsed, _ := strconv.Atoi(strings.TrimSpace(fields[5]))
memTotal, _ := strconv.Atoi(strings.TrimSpace(fields[6]))
fanSpeed, _ := strconv.ParseFloat(strings.TrimSpace(fields[7]), 64)
powerDraw, _ := strconv.ParseFloat(strings.TrimSpace(fields[8]), 64)
var memUtil float64
if memTotal > 0 {
memUtil = float64(memUsed) / float64(memTotal) * 100
}
return &GpuStat{
Timestamp: time.Now(),
ID: id,
Name: name,
UUID: uuid,
TempC: tempC,
GpuUtilPct: gpuUtil,
MemUtilPct: memUtil,
MemUsedMB: memUsed,
MemTotalMB: memTotal,
FanSpeedPct: fanSpeed,
PowerDrawW: powerDraw,
}
}
func trySysfs(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
return nil, ErrNotImplemented
}
func lactSocketPath() string {
if p := os.Getenv("LACT_DAEMON_SOCKET_PATH"); p != "" {
if _, err := os.Stat(p); err == nil {
return p
}
}
rootPath := "/run/lactd.sock"
if _, err := os.Stat(rootPath); err == nil {
return rootPath
}
u, err := user.Current()
if err != nil {
return ""
}
userPath := filepath.Join("/run/user", u.Uid, "lactd.sock")
if _, err := os.Stat(userPath); err == nil {
return userPath
}
return ""
}
type lactRequest struct {
Command string `json:"command"`
Args interface{} `json:"args,omitempty"`
}
type lactResponse struct {
Status string `json:"status"`
Data json.RawMessage `json:"data"`
}
type lactDeviceEntry struct {
ID string `json:"id"`
Name string `json:"name"`
}
type lactDeviceStats struct {
Fan struct {
PwmCurrent *uint8 `json:"pwm_current"`
} `json:"fan"`
Vram struct {
Total *uint64 `json:"total"`
Used *uint64 `json:"used"`
} `json:"vram"`
Power struct {
Average *float64 `json:"average"`
Current *float64 `json:"current"`
} `json:"power"`
Temps map[string]lactTempEntry `json:"temps"`
BusyPercent *uint8 `json:"busy_percent"`
}
type lactTempEntry struct {
Current *float64 `json:"current"`
}
func lactSendRequest(conn net.Conn, req lactRequest) (json.RawMessage, error) {
data, err := json.Marshal(req)
if err != nil {
return nil, err
}
data = append(data, '\n')
if _, err := conn.Write(data); err != nil {
return nil, err
}
reader := bufio.NewReader(conn)
line, err := reader.ReadBytes('\n')
if err != nil {
return nil, err
}
var resp lactResponse
if err := json.Unmarshal(line, &resp); err != nil {
return nil, err
}
if resp.Status != "ok" {
return nil, fmt.Errorf("LACT error: %s", string(resp.Data))
}
return resp.Data, nil
}
func lactListDevices(conn net.Conn) ([]lactDeviceEntry, error) {
data, err := lactSendRequest(conn, lactRequest{Command: "list_devices"})
if err != nil {
return nil, err
}
var devices []lactDeviceEntry
if err := json.Unmarshal(data, &devices); err != nil {
return nil, err
}
return devices, nil
}
func lactGetDeviceStats(conn net.Conn, id string, name string, index int) (GpuStat, error) {
data, err := lactSendRequest(conn, lactRequest{
Command: "device_stats",
Args: struct {
ID string `json:"id"`
}{ID: id},
})
if err != nil {
return GpuStat{}, err
}
var stats lactDeviceStats
if err := json.Unmarshal(data, &stats); err != nil {
return GpuStat{}, err
}
var memUsedMB, memTotalMB int
if stats.Vram.Used != nil {
memUsedMB = int(*stats.Vram.Used / 1024 / 1024)
}
if stats.Vram.Total != nil {
memTotalMB = int(*stats.Vram.Total / 1024 / 1024)
}
var memUtil float64
if memTotalMB > 0 {
memUtil = float64(memUsedMB) / float64(memTotalMB) * 100
}
var gpuUtil float64
if stats.BusyPercent != nil {
gpuUtil = float64(*stats.BusyPercent)
}
var fanSpeed float64
if stats.Fan.PwmCurrent != nil {
fanSpeed = float64(*stats.Fan.PwmCurrent) / 255.0 * 100.0
}
var powerDraw float64
if stats.Power.Average != nil && *stats.Power.Average > 0 {
powerDraw = *stats.Power.Average
} else if stats.Power.Current != nil {
powerDraw = *stats.Power.Current
}
var tempC int
if t, ok := stats.Temps["edge"]; ok && t.Current != nil {
tempC = int(*t.Current)
} else if t, ok := stats.Temps["junction"]; ok && t.Current != nil {
tempC = int(*t.Current)
} else {
for _, t := range stats.Temps {
if t.Current != nil {
tempC = int(*t.Current)
break
}
}
}
var vramTempC int
// nvidia uses "VRAM", amd "mem"
for _, key := range []string{"mem", "VRAM"} {
if t, ok := stats.Temps[key]; ok && t.Current != nil && *t.Current > 0 {
vramTempC = int(*t.Current)
break
}
}
return GpuStat{
Timestamp: time.Now(),
ID: index,
Name: name,
UUID: id,
TempC: tempC,
VramTempC: vramTempC,
GpuUtilPct: gpuUtil,
MemUtilPct: memUtil,
MemUsedMB: memUsedMB,
MemTotalMB: memTotalMB,
FanSpeedPct: fanSpeed,
PowerDrawW: powerDraw,
}, nil
}
func readSysfs() ([]GpuStat, error) {
return nil, ErrNotImplemented
}
func readSysStats() (SysStat, error) {
cpuPcts, err := cpu.Percent(0, true)
if err != nil {
return SysStat{}, err
}
vmStat, err := mem.VirtualMemory()
if err != nil {
return SysStat{}, err
}
const toMB = 1024 * 1024
var swapTotalMB, swapUsedMB int
if swapStat, err := mem.SwapMemory(); err == nil {
swapTotalMB = int(swapStat.Total / toMB)
swapUsedMB = int(swapStat.Used / toMB)
}
var loadAvg1, loadAvg5, loadAvg15 float64
if loadStat, err := load.Avg(); err == nil {
loadAvg1 = loadStat.Load1
loadAvg5 = loadStat.Load5
loadAvg15 = loadStat.Load15
}
netIO := make([]NetIOStat, 0)
if ioCounters, err := psnet.IOCounters(true); err == nil {
for _, ioc := range ioCounters {
if ioc.Name == "lo" {
continue
}
netIO = append(netIO, NetIOStat{
Name: ioc.Name,
BytesRecv: ioc.BytesRecv,
BytesSent: ioc.BytesSent,
})
}
}
return SysStat{
Timestamp: time.Now(),
CpuUtilPerCore: cpuPcts,
MemTotalMB: int(vmStat.Total / toMB),
MemUsedMB: int(vmStat.Used / toMB),
MemFreeMB: int(vmStat.Free / toMB),
SwapTotalMB: swapTotalMB,
SwapUsedMB: swapUsedMB,
LoadAvg1: loadAvg1,
LoadAvg5: loadAvg5,
LoadAvg15: loadAvg15,
NetIO: netIO,
}, nil
}
+49
View File
@@ -0,0 +1,49 @@
package perf
import (
"context"
"time"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/shirou/gopsutil/v4/cpu"
"github.com/shirou/gopsutil/v4/mem"
"github.com/shirou/gopsutil/v4/net"
)
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
return nil, ErrNotImplemented
}
func readSysStats() (SysStat, error) {
cpuPcts, err := cpu.Percent(0, true)
if err != nil {
return SysStat{}, err
}
vmStat, err := mem.VirtualMemory()
if err != nil {
return SysStat{}, err
}
const toMB = 1024 * 1024
netIO := make([]NetIOStat, 0)
if ioCounters, err := net.IOCounters(true); err == nil {
for _, ioc := range ioCounters {
netIO = append(netIO, NetIOStat{
Name: ioc.Name,
BytesRecv: ioc.BytesRecv,
BytesSent: ioc.BytesSent,
})
}
}
return SysStat{
Timestamp: time.Now(),
CpuUtilPerCore: cpuPcts,
MemTotalMB: int(vmStat.Total / toMB),
MemUsedMB: int(vmStat.Used / toMB),
MemFreeMB: int(vmStat.Free / toMB),
NetIO: netIO,
}, nil
}
+129
View File
@@ -0,0 +1,129 @@
package perf
import (
"fmt"
"net/http"
"sort"
"strings"
)
const mbToBytes = int64(1024 * 1024)
// MetricsHandler returns an http.HandlerFunc serving Prometheus text format metrics
// with the most recent system and GPU stats.
func (m *Monitor) MetricsHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
sysStats, gpuStats := m.Current()
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
if len(sysStats) > 0 {
writeSysMetrics(w, sysStats[len(sysStats)-1])
}
if len(gpuStats) > 0 {
writeGpuMetrics(w, latestPerGPU(gpuStats))
}
}
}
func writeSysMetrics(w http.ResponseWriter, s SysStat) {
fmt.Fprintf(w, "# HELP llamaswap_cpu_util_percent CPU utilization per core (0-100)\n")
fmt.Fprintf(w, "# TYPE llamaswap_cpu_util_percent gauge\n")
for i, pct := range s.CpuUtilPerCore {
fmt.Fprintf(w, "llamaswap_cpu_util_percent{core=\"%d\"} %g\n", i, pct)
}
fmt.Fprintf(w, "# HELP llamaswap_memory_total_bytes Total memory in bytes\n")
fmt.Fprintf(w, "# TYPE llamaswap_memory_total_bytes gauge\n")
fmt.Fprintf(w, "llamaswap_memory_total_bytes %d\n", int64(s.MemTotalMB)*mbToBytes)
fmt.Fprintf(w, "# HELP llamaswap_memory_used_bytes Used memory in bytes\n")
fmt.Fprintf(w, "# TYPE llamaswap_memory_used_bytes gauge\n")
fmt.Fprintf(w, "llamaswap_memory_used_bytes %d\n", int64(s.MemUsedMB)*mbToBytes)
fmt.Fprintf(w, "# HELP llamaswap_memory_free_bytes Free memory in bytes\n")
fmt.Fprintf(w, "# TYPE llamaswap_memory_free_bytes gauge\n")
fmt.Fprintf(w, "llamaswap_memory_free_bytes %d\n", int64(s.MemFreeMB)*mbToBytes)
fmt.Fprintf(w, "# HELP llamaswap_swap_total_bytes Total swap in bytes\n")
fmt.Fprintf(w, "# TYPE llamaswap_swap_total_bytes gauge\n")
fmt.Fprintf(w, "llamaswap_swap_total_bytes %d\n", int64(s.SwapTotalMB)*mbToBytes)
fmt.Fprintf(w, "# HELP llamaswap_swap_used_bytes Used swap in bytes\n")
fmt.Fprintf(w, "# TYPE llamaswap_swap_used_bytes gauge\n")
fmt.Fprintf(w, "llamaswap_swap_used_bytes %d\n", int64(s.SwapUsedMB)*mbToBytes)
fmt.Fprintf(w, "# HELP llamaswap_load_average Load average\n")
fmt.Fprintf(w, "# TYPE llamaswap_load_average gauge\n")
fmt.Fprintf(w, "llamaswap_load_average{interval=\"1m\"} %g\n", s.LoadAvg1)
fmt.Fprintf(w, "llamaswap_load_average{interval=\"5m\"} %g\n", s.LoadAvg5)
fmt.Fprintf(w, "llamaswap_load_average{interval=\"15m\"} %g\n", s.LoadAvg15)
if len(s.NetIO) > 0 {
fmt.Fprintf(w, "# HELP llamaswap_network_bytes_total Total network bytes transferred\n")
fmt.Fprintf(w, "# TYPE llamaswap_network_bytes_total counter\n")
for _, io := range s.NetIO {
iface := sanitizeLabel(io.Name)
fmt.Fprintf(w, "llamaswap_network_bytes_total{interface=\"%s\",direction=\"recv\"} %d\n", iface, io.BytesRecv)
fmt.Fprintf(w, "llamaswap_network_bytes_total{interface=\"%s\",direction=\"sent\"} %d\n", iface, io.BytesSent)
}
}
}
func writeGpuMetrics(w http.ResponseWriter, gpus []GpuStat) {
if len(gpus) == 0 {
return
}
type gpuMetric struct {
help string
name string
value func(GpuStat) float64
}
metrics := []gpuMetric{
{"GPU temperature in Celsius", "llamaswap_gpu_temperature_celsius", func(g GpuStat) float64 { return float64(g.TempC) }},
{"GPU VRAM temperature in Celsius", "llamaswap_gpu_vram_temperature_celsius", func(g GpuStat) float64 { return float64(g.VramTempC) }},
{"GPU utilization percent (0-100)", "llamaswap_gpu_util_percent", func(g GpuStat) float64 { return g.GpuUtilPct }},
{"GPU memory utilization percent (0-100)", "llamaswap_gpu_memory_util_percent", func(g GpuStat) float64 { return g.MemUtilPct }},
{"GPU memory used in bytes", "llamaswap_gpu_memory_used_bytes", func(g GpuStat) float64 { return float64(g.MemUsedMB) * float64(mbToBytes) }},
{"GPU memory total in bytes", "llamaswap_gpu_memory_total_bytes", func(g GpuStat) float64 { return float64(g.MemTotalMB) * float64(mbToBytes) }},
{"GPU fan speed percent (0-100)", "llamaswap_gpu_fan_speed_percent", func(g GpuStat) float64 { return g.FanSpeedPct }},
{"GPU power draw in watts", "llamaswap_gpu_power_draw_watts", func(g GpuStat) float64 { return g.PowerDrawW }},
}
for _, m := range metrics {
fmt.Fprintf(w, "# HELP %s %s\n", m.name, m.help)
fmt.Fprintf(w, "# TYPE %s gauge\n", m.name)
for _, g := range gpus {
if g.UUID != "" {
fmt.Fprintf(w, "%s{id=\"%d\",name=\"%s\",uuid=\"%s\"} %g\n",
m.name, g.ID, sanitizeLabel(g.Name), sanitizeLabel(g.UUID), m.value(g))
} else {
fmt.Fprintf(w, "%s{id=\"%d\",name=\"%s\"} %g\n",
m.name, g.ID, sanitizeLabel(g.Name), m.value(g))
}
}
}
}
// latestPerGPU returns the most recent GpuStat for each GPU ID, sorted by ID.
func latestPerGPU(stats []GpuStat) []GpuStat {
latest := make(map[int]GpuStat)
for _, g := range stats {
if prev, ok := latest[g.ID]; !ok || g.Timestamp.After(prev.Timestamp) {
latest[g.ID] = g
}
}
result := make([]GpuStat, 0, len(latest))
for _, g := range latest {
result = append(result, g)
}
sort.Slice(result, func(i, j int) bool { return result[i].ID < result[j].ID })
return result
}
// sanitizeLabel escapes characters that are invalid in Prometheus label values.
func sanitizeLabel(s string) string {
return strings.NewReplacer(`"`, `\"`, `\`, `\\`, "\n", `\n`).Replace(s)
}
+248
View File
@@ -0,0 +1,248 @@
package perf
import (
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/mostlygeek/llama-swap/proxy/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSanitizeLabel(t *testing.T) {
tests := []struct {
input string
want string
}{
{"normal", "normal"},
{"", ""},
{`with"quote`, `with\"quote`},
{`with\backslash`, `with\\backslash`},
{"with\nnewline", `with\nnewline`},
{`"both\n"`, `\"both\\n\"`},
}
for _, tc := range tests {
assert.Equal(t, tc.want, sanitizeLabel(tc.input), "input: %q", tc.input)
}
}
func TestLatestPerGPU_Empty(t *testing.T) {
result := latestPerGPU(nil)
assert.Empty(t, result)
}
func TestLatestPerGPU_Single(t *testing.T) {
now := time.Now()
stats := []GpuStat{{ID: 0, Name: "gpu0", Timestamp: now}}
result := latestPerGPU(stats)
require.Len(t, result, 1)
assert.Equal(t, "gpu0", result[0].Name)
}
func TestLatestPerGPU_PicksLatest(t *testing.T) {
earlier := time.Now().Add(-time.Second)
later := time.Now()
stats := []GpuStat{
{ID: 0, Name: "old", TempC: 50, Timestamp: earlier},
{ID: 0, Name: "new", TempC: 70, Timestamp: later},
}
result := latestPerGPU(stats)
require.Len(t, result, 1)
assert.Equal(t, "new", result[0].Name)
assert.Equal(t, 70, result[0].TempC)
}
func TestLatestPerGPU_MultipleGPUsSortedByID(t *testing.T) {
now := time.Now()
stats := []GpuStat{
{ID: 2, Name: "gpu2", Timestamp: now},
{ID: 0, Name: "gpu0", Timestamp: now},
{ID: 1, Name: "gpu1", Timestamp: now},
}
result := latestPerGPU(stats)
require.Len(t, result, 3)
assert.Equal(t, 0, result[0].ID)
assert.Equal(t, 1, result[1].ID)
assert.Equal(t, 2, result[2].ID)
}
func TestWriteSysMetrics(t *testing.T) {
rec := httptest.NewRecorder()
s := SysStat{
CpuUtilPerCore: []float64{10.5, 20.0},
MemTotalMB: 8192,
MemUsedMB: 4096,
MemFreeMB: 4096,
SwapTotalMB: 2048,
SwapUsedMB: 512,
LoadAvg1: 1.5,
LoadAvg5: 1.2,
LoadAvg15: 0.9,
NetIO: []NetIOStat{
{Name: "eth0", BytesRecv: 1000, BytesSent: 2000},
},
}
writeSysMetrics(rec, s)
body := rec.Body.String()
assert.Contains(t, body, `llamaswap_cpu_util_percent{core="0"} 10.5`)
assert.Contains(t, body, `llamaswap_cpu_util_percent{core="1"} 20`)
assert.Contains(t, body, "llamaswap_memory_total_bytes 8589934592")
assert.Contains(t, body, "llamaswap_memory_used_bytes 4294967296")
assert.Contains(t, body, "llamaswap_memory_free_bytes 4294967296")
assert.Contains(t, body, "llamaswap_swap_total_bytes 2147483648")
assert.Contains(t, body, "llamaswap_swap_used_bytes 536870912")
assert.Contains(t, body, `llamaswap_load_average{interval="1m"} 1.5`)
assert.Contains(t, body, `llamaswap_load_average{interval="5m"} 1.2`)
assert.Contains(t, body, `llamaswap_load_average{interval="15m"} 0.9`)
assert.Contains(t, body, `llamaswap_network_bytes_total{interface="eth0",direction="recv"} 1000`)
assert.Contains(t, body, `llamaswap_network_bytes_total{interface="eth0",direction="sent"} 2000`)
}
func TestWriteSysMetrics_NoNetIO(t *testing.T) {
rec := httptest.NewRecorder()
writeSysMetrics(rec, SysStat{CpuUtilPerCore: []float64{5.0}})
body := rec.Body.String()
assert.NotContains(t, body, "llamaswap_network_bytes_total")
}
func TestWriteGpuMetrics_Empty(t *testing.T) {
rec := httptest.NewRecorder()
writeGpuMetrics(rec, nil)
assert.Empty(t, rec.Body.String())
}
func TestWriteGpuMetrics(t *testing.T) {
rec := httptest.NewRecorder()
gpus := []GpuStat{
{
ID: 0,
Name: "NVIDIA RTX 4090",
UUID: "GPU-1234",
TempC: 75,
GpuUtilPct: 85.5,
MemUtilPct: 60.0,
MemUsedMB: 8192,
MemTotalMB: 24576,
FanSpeedPct: 55.0,
PowerDrawW: 300.5,
},
}
writeGpuMetrics(rec, gpus)
body := rec.Body.String()
assert.Contains(t, body, `llamaswap_gpu_temperature_celsius{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 75`)
assert.Contains(t, body, `llamaswap_gpu_vram_temperature_celsius{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 0`)
assert.Contains(t, body, `llamaswap_gpu_util_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 85.5`)
assert.Contains(t, body, `llamaswap_gpu_memory_util_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 60`)
assert.Contains(t, body, `llamaswap_gpu_memory_used_bytes{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"}`)
assert.Contains(t, body, `llamaswap_gpu_memory_total_bytes{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"}`)
assert.Contains(t, body, `llamaswap_gpu_fan_speed_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 55`)
assert.Contains(t, body, `llamaswap_gpu_power_draw_watts{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 300.5`)
}
func TestWriteGpuMetrics_VramTemp(t *testing.T) {
rec := httptest.NewRecorder()
gpus := []GpuStat{
{ID: 0, Name: "AMD RX 7900", UUID: "GPU-5678", TempC: 70, VramTempC: 85},
}
writeGpuMetrics(rec, gpus)
body := rec.Body.String()
assert.Contains(t, body, `llamaswap_gpu_temperature_celsius{id="0",name="AMD RX 7900",uuid="GPU-5678"} 70`)
assert.Contains(t, body, `llamaswap_gpu_vram_temperature_celsius{id="0",name="AMD RX 7900",uuid="GPU-5678"} 85`)
}
func TestWriteGpuMetrics_EmptyUUID(t *testing.T) {
rec := httptest.NewRecorder()
gpus := []GpuStat{{ID: 3, Name: "AMD RX 7900", UUID: ""}}
writeGpuMetrics(rec, gpus)
body := rec.Body.String()
assert.NotContains(t, body, "uuid=")
assert.Contains(t, body, `name="AMD RX 7900"`)
}
func TestWriteGpuMetrics_LabelSanitization(t *testing.T) {
rec := httptest.NewRecorder()
gpus := []GpuStat{
{ID: 0, Name: `GPU "special"`, UUID: "uuid\nline"},
}
writeGpuMetrics(rec, gpus)
body := rec.Body.String()
assert.Contains(t, body, `name="GPU \"special\""`)
assert.Contains(t, body, `uuid="uuid\nline"`)
}
func TestMetricsHandler_ContentType(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
rec := httptest.NewRecorder()
m.MetricsHandler()(rec, req)
assert.Equal(t, "text/plain; version=0.0.4; charset=utf-8", rec.Header().Get("Content-Type"))
}
func TestMetricsHandler_EmptyStats(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
rec := httptest.NewRecorder()
m.MetricsHandler()(rec, req)
assert.Equal(t, http.StatusOK, rec.Code)
assert.Empty(t, strings.TrimSpace(rec.Body.String()))
}
func TestMetricsHandler_WithSysStats(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
m.sysRing.Push(SysStat{Timestamp: time.Now(), CpuUtilPerCore: []float64{25.0}, MemTotalMB: 4096, MemUsedMB: 2048, MemFreeMB: 2048})
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
rec := httptest.NewRecorder()
m.MetricsHandler()(rec, req)
body := rec.Body.String()
assert.Contains(t, body, "llamaswap_cpu_util_percent")
assert.Contains(t, body, "llamaswap_memory_total_bytes")
}
func TestMetricsHandler_UsesLatestSysStat(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
now := time.Now()
m.sysRing.Push(SysStat{Timestamp: now.Add(-time.Second), MemTotalMB: 1000})
m.sysRing.Push(SysStat{Timestamp: now, MemTotalMB: 8192})
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
rec := httptest.NewRecorder()
m.MetricsHandler()(rec, req)
body := rec.Body.String()
// 8192 MB = 8589934592 bytes
assert.Contains(t, body, "llamaswap_memory_total_bytes 8589934592")
}
func TestMetricsHandler_WithGpuStats(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
m.gpuRing.Push([]GpuStat{{ID: 0, Name: "TestGPU", UUID: "uuid-0", TempC: 65, Timestamp: time.Now()}})
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
rec := httptest.NewRecorder()
m.MetricsHandler()(rec, req)
body := rec.Body.String()
assert.Contains(t, body, "llamaswap_gpu_temperature_celsius")
assert.Contains(t, body, `name="TestGPU"`)
}
+40
View File
@@ -0,0 +1,40 @@
package perf
import "time"
type GpuStat struct {
Timestamp time.Time `json:"timestamp"`
ID int `json:"id"`
Name string `json:"name"`
UUID string `json:"uuid"`
TempC int `json:"temp_c"`
VramTempC int `json:"vram_temp_c"`
GpuUtilPct float64 `json:"gpu_util_pct"`
MemUtilPct float64 `json:"mem_util_pct"`
MemUsedMB int `json:"mem_used_mb"`
MemTotalMB int `json:"mem_total_mb"`
FanSpeedPct float64 `json:"fan_speed_pct"`
PowerDrawW float64 `json:"power_draw_w"`
}
type NetIOStat struct {
Name string `json:"name"`
BytesRecv uint64 `json:"bytes_recv"`
BytesSent uint64 `json:"bytes_sent"`
}
type SysStat struct {
Timestamp time.Time `json:"timestamp"`
CpuUtilPerCore []float64 `json:"cpu_util_per_core"`
MemTotalMB int `json:"mem_total_mb"`
MemUsedMB int `json:"mem_used_mb"`
MemFreeMB int `json:"mem_free_mb"`
SwapTotalMB int `json:"swap_total_mb"`
SwapUsedMB int `json:"swap_used_mb"`
LoadAvg1 float64 `json:"load_avg_1"`
LoadAvg5 float64 `json:"load_avg_5"`
LoadAvg15 float64 `json:"load_avg_15"`
NetIO []NetIOStat `json:"net_io"`
}
+39
View File
@@ -0,0 +1,39 @@
package ring
type Buffer[T any] struct {
buf []T
head int
size int
}
func NewBuffer[T any](capacity int) Buffer[T] {
if capacity < 1 {
capacity = 1
}
return Buffer[T]{buf: make([]T, capacity)}
}
// Push adds v, overwriting the oldest entry when the buffer is full.
func (r *Buffer[T]) Push(v T) {
cap := len(r.buf)
if r.size < cap {
r.buf[(r.head+r.size)%cap] = v
r.size++
} else {
r.buf[r.head] = v
r.head = (r.head + 1) % cap
}
}
// Slice returns all entries in insertion order as a new slice.
func (r *Buffer[T]) Slice() []T {
if r.size == 0 {
return nil
}
cap := len(r.buf)
result := make([]T, r.size)
for i := 0; i < r.size; i++ {
result[i] = r.buf[(r.head+i)%cap]
}
return result
}
+44
View File
@@ -0,0 +1,44 @@
package ring
import "testing"
const benchCap = 600 // matches default MaxAge/Every (1min / 100ms)
func BenchmarkBuffer_PushNoWrap(b *testing.B) {
for b.Loop() {
buf := NewBuffer[int](b.N + 1)
for i := range b.N {
buf.Push(i)
}
}
}
func BenchmarkBuffer_PushWrap(b *testing.B) {
buf := NewBuffer[int](benchCap)
b.ResetTimer()
for i := range b.N {
buf.Push(i)
}
}
func BenchmarkBuffer_Slice(b *testing.B) {
buf := NewBuffer[int](benchCap)
for i := range benchCap {
buf.Push(i)
}
b.ResetTimer()
for range b.N {
_ = buf.Slice()
}
}
func BenchmarkBuffer_PushAndSlice(b *testing.B) {
buf := NewBuffer[int](benchCap)
b.ResetTimer()
for i := range b.N {
buf.Push(i)
if i%benchCap == 0 {
_ = buf.Slice()
}
}
}
+65
View File
@@ -0,0 +1,65 @@
package ring
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestBuffer_EmptySliceIsNil(t *testing.T) {
b := NewBuffer[int](4)
assert.Nil(t, b.Slice())
}
func TestBuffer_PushBelowCapacity(t *testing.T) {
b := NewBuffer[int](4)
b.Push(1)
b.Push(2)
assert.Equal(t, []int{1, 2}, b.Slice())
}
func TestBuffer_PushAtCapacity(t *testing.T) {
b := NewBuffer[int](3)
b.Push(1)
b.Push(2)
b.Push(3)
assert.Equal(t, []int{1, 2, 3}, b.Slice())
}
func TestBuffer_PushOverCapacityEvictsOldest(t *testing.T) {
b := NewBuffer[int](3)
b.Push(1)
b.Push(2)
b.Push(3)
b.Push(4)
assert.Equal(t, []int{2, 3, 4}, b.Slice())
}
func TestBuffer_CapacityOne(t *testing.T) {
b := NewBuffer[int](1)
b.Push(1)
b.Push(2)
assert.Equal(t, []int{2}, b.Slice())
}
func TestBuffer_ZeroCapacityDefaultsToOne(t *testing.T) {
b := NewBuffer[int](0)
b.Push(42)
assert.Equal(t, []int{42}, b.Slice())
}
func TestBuffer_SliceReturnsCopy(t *testing.T) {
b := NewBuffer[int](4)
b.Push(10)
s := b.Slice()
s[0] = 99
assert.Equal(t, []int{10}, b.Slice())
}
func TestBuffer_InsertionOrderPreservedAfterWrap(t *testing.T) {
b := NewBuffer[int](4)
for i := 1; i <= 8; i++ {
b.Push(i)
}
assert.Equal(t, []int{5, 6, 7, 8}, b.Slice())
}