Refactor Activity Page (#710)

- inference handles to store an activity record for all inference endpoints
- add path, status code, and content type to Activities page
- toggle on/off columns no Activities page 
- add configurable capture level for inference endpoints so large binary blobs are not stored in memory
- store captures in compressed binary format
This commit is contained in:
Benson Wong
2026-04-28 20:33:03 -07:00
committed by GitHub
parent a846c4f18c
commit fd3c28ffc5
16 changed files with 1397 additions and 651 deletions
+343 -170
View File
@@ -12,8 +12,10 @@ import (
"testing"
"time"
"github.com/fxamacker/cbor/v2"
"github.com/gin-gonic/gin"
"github.com/mostlygeek/llama-swap/event"
"github.com/mostlygeek/llama-swap/proxy/cache"
"github.com/stretchr/testify/assert"
"github.com/tidwall/gjson"
)
@@ -22,27 +24,29 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
t.Run("adds metrics and assigns ID", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
metric := TokenMetrics{
Model: "test-model",
InputTokens: 100,
OutputTokens: 50,
metric := ActivityLogEntry{
Model: "test-model",
Tokens: TokenMetrics{
InputTokens: 100,
OutputTokens: 50,
},
}
mm.addMetrics(metric)
mm.queueMetrics(metric)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, 0, metrics[0].ID)
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 100, metrics[0].InputTokens)
assert.Equal(t, 50, metrics[0].OutputTokens)
assert.Equal(t, 100, metrics[0].Tokens.InputTokens)
assert.Equal(t, 50, metrics[0].Tokens.OutputTokens)
})
t.Run("increments ID for each metric", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
for i := 0; i < 5; i++ {
mm.addMetrics(TokenMetrics{Model: "model"})
mm.queueMetrics(ActivityLogEntry{Model: "model"})
}
metrics := mm.getMetrics()
@@ -57,9 +61,11 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
// Add 5 metrics
for i := 0; i < 5; i++ {
mm.addMetrics(TokenMetrics{
Model: "model",
InputTokens: i,
mm.queueMetrics(ActivityLogEntry{
Model: "model",
Tokens: TokenMetrics{
InputTokens: i,
},
})
}
@@ -72,29 +78,32 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
assert.Equal(t, 4, metrics[2].ID)
})
t.Run("emits TokenMetricsEvent", func(t *testing.T) {
t.Run("emits ActivityLogEvent", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
receivedEvent := make(chan TokenMetricsEvent, 1)
cancel := event.On(func(e TokenMetricsEvent) {
receivedEvent := make(chan ActivityLogEvent, 1)
cancel := event.On(func(e ActivityLogEvent) {
receivedEvent <- e
})
defer cancel()
metric := TokenMetrics{
Model: "test-model",
InputTokens: 100,
OutputTokens: 50,
metric := ActivityLogEntry{
Model: "test-model",
Tokens: TokenMetrics{
InputTokens: 100,
OutputTokens: 50,
},
}
mm.addMetrics(metric)
mm.queueMetrics(metric)
mm.emitMetric(metric)
select {
case evt := <-receivedEvent:
assert.Equal(t, 0, evt.Metrics.ID)
assert.Equal(t, "test-model", evt.Metrics.Model)
assert.Equal(t, 100, evt.Metrics.InputTokens)
assert.Equal(t, 50, evt.Metrics.OutputTokens)
assert.Equal(t, 100, evt.Metrics.Tokens.InputTokens)
assert.Equal(t, 50, evt.Metrics.Tokens.OutputTokens)
case <-time.After(1 * time.Second):
t.Fatal("timeout waiting for event")
}
@@ -111,8 +120,8 @@ func TestMetricsMonitor_GetMetrics(t *testing.T) {
t.Run("returns copy of metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
mm.addMetrics(TokenMetrics{Model: "model1"})
mm.addMetrics(TokenMetrics{Model: "model2"})
mm.queueMetrics(ActivityLogEntry{Model: "model1"})
mm.queueMetrics(ActivityLogEntry{Model: "model2"})
metrics1 := mm.getMetrics()
metrics2 := mm.getMetrics()
@@ -135,7 +144,7 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
assert.NoError(t, err)
assert.NotNil(t, jsonData)
var metrics []TokenMetrics
var metrics []ActivityLogEntry
err = json.Unmarshal(jsonData, &metrics)
assert.NoError(t, err)
assert.Equal(t, 0, len(metrics))
@@ -143,23 +152,27 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
t.Run("returns valid JSON with metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
mm.addMetrics(TokenMetrics{
Model: "model1",
InputTokens: 100,
OutputTokens: 50,
TokensPerSecond: 25.5,
mm.queueMetrics(ActivityLogEntry{
Model: "model1",
Tokens: TokenMetrics{
InputTokens: 100,
OutputTokens: 50,
TokensPerSecond: 25.5,
},
})
mm.addMetrics(TokenMetrics{
Model: "model2",
InputTokens: 200,
OutputTokens: 100,
TokensPerSecond: 30.0,
mm.queueMetrics(ActivityLogEntry{
Model: "model2",
Tokens: TokenMetrics{
InputTokens: 200,
OutputTokens: 100,
TokensPerSecond: 30.0,
},
})
jsonData, err := mm.getMetricsJSON()
assert.NoError(t, err)
var metrics []TokenMetrics
var metrics []ActivityLogEntry
err = json.Unmarshal(jsonData, &metrics)
assert.NoError(t, err)
assert.Equal(t, 2, len(metrics))
@@ -190,14 +203,14 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 100, metrics[0].InputTokens)
assert.Equal(t, 50, metrics[0].OutputTokens)
assert.Equal(t, 100, metrics[0].Tokens.InputTokens)
assert.Equal(t, 50, metrics[0].Tokens.OutputTokens)
})
t.Run("successful request with timings data", func(t *testing.T) {
@@ -226,17 +239,17 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 100, metrics[0].InputTokens)
assert.Equal(t, 50, metrics[0].OutputTokens)
assert.Equal(t, 20, metrics[0].CachedTokens)
assert.Equal(t, 150.5, metrics[0].PromptPerSecond)
assert.Equal(t, 25.5, metrics[0].TokensPerSecond)
assert.Equal(t, 100, metrics[0].Tokens.InputTokens)
assert.Equal(t, 50, metrics[0].Tokens.OutputTokens)
assert.Equal(t, 20, metrics[0].Tokens.CachedTokens)
assert.Equal(t, 150.5, metrics[0].Tokens.PromptPerSecond)
assert.Equal(t, 25.5, metrics[0].Tokens.TokensPerSecond)
assert.Equal(t, 2000, metrics[0].DurationMs) // 500 + 1500
})
@@ -265,18 +278,18 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
// When timings data is present, it takes precedence
assert.Equal(t, 10, metrics[0].InputTokens)
assert.Equal(t, 20, metrics[0].OutputTokens)
assert.Equal(t, 10, metrics[0].Tokens.InputTokens)
assert.Equal(t, 20, metrics[0].Tokens.OutputTokens)
})
t.Run("non-OK status code does not record metrics", func(t *testing.T) {
t.Run("non-OK status code records partial metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
@@ -289,11 +302,16 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 0, len(metrics))
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, "/test", metrics[0].ReqPath)
assert.Equal(t, http.StatusBadRequest, metrics[0].RespStatusCode)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
t.Run("empty response body records minimal metrics", func(t *testing.T) {
@@ -308,14 +326,14 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
t.Run("invalid JSON records minimal metrics", func(t *testing.T) {
@@ -332,14 +350,14 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err) // Errors after response is sent are logged, not returned
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
t.Run("next handler error is propagated", func(t *testing.T) {
@@ -354,7 +372,7 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.Equal(t, expectedErr, err)
metrics := mm.getMetrics()
@@ -377,14 +395,14 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
t.Run("infill request extracts timings from last array element", func(t *testing.T) {
@@ -416,17 +434,17 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 150, metrics[0].InputTokens)
assert.Equal(t, 75, metrics[0].OutputTokens)
assert.Equal(t, 30, metrics[0].CachedTokens)
assert.Equal(t, 200.5, metrics[0].PromptPerSecond)
assert.Equal(t, 35.5, metrics[0].TokensPerSecond)
assert.Equal(t, 150, metrics[0].Tokens.InputTokens)
assert.Equal(t, 75, metrics[0].Tokens.OutputTokens)
assert.Equal(t, 30, metrics[0].Tokens.CachedTokens)
assert.Equal(t, 200.5, metrics[0].Tokens.PromptPerSecond)
assert.Equal(t, 35.5, metrics[0].Tokens.TokensPerSecond)
assert.Equal(t, 2400, metrics[0].DurationMs) // 600 + 1800
})
@@ -446,14 +464,14 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
}
@@ -507,7 +525,7 @@ func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
}
func TestMetricsMonitor_Concurrent(t *testing.T) {
t.Run("concurrent addMetrics is safe", func(t *testing.T) {
t.Run("concurrent queueMetrics is safe", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 1000, 0)
var wg sync.WaitGroup
@@ -519,10 +537,12 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
go func(id int) {
defer wg.Done()
for j := 0; j < metricsPerGoroutine; j++ {
mm.addMetrics(TokenMetrics{
Model: "test-model",
InputTokens: id*1000 + j,
OutputTokens: j,
mm.queueMetrics(ActivityLogEntry{
Model: "test-model",
Tokens: TokenMetrics{
InputTokens: id*1000 + j,
OutputTokens: j,
},
})
}
}(i)
@@ -542,7 +562,7 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
// Writer goroutine
go func() {
for i := 0; i < 50; i++ {
mm.addMetrics(TokenMetrics{Model: "test-model"})
mm.queueMetrics(ActivityLogEntry{Model: "test-model"})
time.Sleep(1 * time.Millisecond)
}
done <- true
@@ -586,10 +606,10 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
metrics, err := parseMetrics("test-model", start, usage, timings)
assert.NoError(t, err)
assert.Equal(t, 5, metrics.InputTokens)
assert.Equal(t, 1, metrics.OutputTokens)
assert.Equal(t, 10.0, metrics.PromptPerSecond)
assert.Equal(t, 2.0, metrics.TokensPerSecond)
assert.Equal(t, 5, metrics.Tokens.InputTokens)
assert.Equal(t, 1, metrics.Tokens.OutputTokens)
assert.Equal(t, 10.0, metrics.Tokens.PromptPerSecond)
assert.Equal(t, 2.0, metrics.Tokens.TokensPerSecond)
assert.GreaterOrEqual(t, metrics.DurationMs, 5000)
})
@@ -623,14 +643,14 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
// Should use timings values, not usage values
assert.Equal(t, 100, metrics[0].InputTokens)
assert.Equal(t, 50, metrics[0].OutputTokens)
assert.Equal(t, 100, metrics[0].Tokens.InputTokens)
assert.Equal(t, 50, metrics[0].Tokens.OutputTokens)
})
t.Run("handles missing cache_n in timings", func(t *testing.T) {
@@ -658,12 +678,12 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, -1, metrics[0].CachedTokens) // Default value when not present
assert.Equal(t, -1, metrics[0].Tokens.CachedTokens) // Default value when not present
})
}
@@ -693,13 +713,13 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, 100, metrics[0].InputTokens)
assert.Equal(t, 50, metrics[0].OutputTokens)
assert.Equal(t, 100, metrics[0].Tokens.InputTokens)
assert.Equal(t, 50, metrics[0].Tokens.OutputTokens)
})
t.Run("handles streaming with no valid JSON records minimal metrics", func(t *testing.T) {
@@ -722,14 +742,14 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
t.Run("v1/responses format with nested response.usage", func(t *testing.T) {
@@ -751,14 +771,14 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 17, metrics[0].InputTokens)
assert.Equal(t, 23, metrics[0].OutputTokens)
assert.Equal(t, 17, metrics[0].Tokens.InputTokens)
assert.Equal(t, 23, metrics[0].Tokens.OutputTokens)
})
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
@@ -777,14 +797,14 @@ data: [DONE]
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
}
@@ -792,20 +812,22 @@ data: [DONE]
func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
mm := newMetricsMonitor(testLogger, 1000, 0)
metric := TokenMetrics{
Model: "test-model",
CachedTokens: 100,
InputTokens: 500,
OutputTokens: 250,
PromptPerSecond: 1200.5,
TokensPerSecond: 45.8,
DurationMs: 5000,
Timestamp: time.Now(),
metric := ActivityLogEntry{
Model: "test-model",
Tokens: TokenMetrics{
CachedTokens: 100,
InputTokens: 500,
OutputTokens: 250,
PromptPerSecond: 1200.5,
TokensPerSecond: 45.8,
},
DurationMs: 5000,
Timestamp: time.Now(),
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
mm.addMetrics(metric)
mm.queueMetrics(metric)
}
}
@@ -813,20 +835,22 @@ func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
// Test performance with a smaller buffer where wrapping occurs more frequently
mm := newMetricsMonitor(testLogger, 100, 0)
metric := TokenMetrics{
Model: "test-model",
CachedTokens: 100,
InputTokens: 500,
OutputTokens: 250,
PromptPerSecond: 1200.5,
TokensPerSecond: 45.8,
DurationMs: 5000,
Timestamp: time.Now(),
metric := ActivityLogEntry{
Model: "test-model",
Tokens: TokenMetrics{
CachedTokens: 100,
InputTokens: 500,
OutputTokens: 250,
PromptPerSecond: 1200.5,
TokensPerSecond: 45.8,
},
DurationMs: 5000,
Timestamp: time.Now(),
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
mm.addMetrics(metric)
mm.queueMetrics(metric)
}
}
@@ -855,14 +879,14 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 100, metrics[0].InputTokens)
assert.Equal(t, 50, metrics[0].OutputTokens)
assert.Equal(t, 100, metrics[0].Tokens.InputTokens)
assert.Equal(t, 50, metrics[0].Tokens.OutputTokens)
})
t.Run("deflate encoded response", func(t *testing.T) {
@@ -889,14 +913,14 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 200, metrics[0].InputTokens)
assert.Equal(t, 75, metrics[0].OutputTokens)
assert.Equal(t, 200, metrics[0].Tokens.InputTokens)
assert.Equal(t, 75, metrics[0].Tokens.OutputTokens)
})
t.Run("invalid gzip data records minimal metrics", func(t *testing.T) {
@@ -917,14 +941,14 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err) // Should not return error, just log warning
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
assert.Equal(t, 0, metrics[0].Tokens.InputTokens)
assert.Equal(t, 0, metrics[0].Tokens.OutputTokens)
})
t.Run("unknown encoding treated as uncompressed", func(t *testing.T) {
@@ -944,13 +968,13 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, 300, metrics[0].InputTokens)
assert.Equal(t, 100, metrics[0].OutputTokens)
assert.Equal(t, 300, metrics[0].Tokens.InputTokens)
assert.Equal(t, 100, metrics[0].Tokens.OutputTokens)
})
}
@@ -989,7 +1013,7 @@ func TestMetricsMonitor_AddCapture(t *testing.T) {
mm.addCapture(capture)
// Should not store capture
assert.Nil(t, mm.getCaptureByID(0, false))
assert.Nil(t, mm.getCaptureByID(0))
})
t.Run("adds capture when enabled", func(t *testing.T) {
@@ -1002,22 +1026,18 @@ func TestMetricsMonitor_AddCapture(t *testing.T) {
}
mm.addCapture(capture)
retrieved := mm.getCaptureByID(0, true)
assert.NotNil(t, retrieved)
var decoded ReqRespCapture
err := json.Unmarshal(retrieved, &decoded)
assert.NoError(t, err)
assert.Equal(t, 0, decoded.ID)
assert.Equal(t, []byte("test request"), decoded.ReqBody)
assert.Equal(t, []byte("test response"), decoded.RespBody)
captured := mm.getCaptureByID(0)
assert.NotNil(t, captured)
assert.Equal(t, 0, captured.ID)
assert.Equal(t, []byte("test request"), captured.ReqBody)
assert.Equal(t, []byte("test response"), captured.RespBody)
})
t.Run("evicts oldest when exceeding max size", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
// Each full ReqRespCapture with 80 bytes random data compresses to ~185 bytes.
// 2 captures = ~370 bytes, 3 captures = ~555 bytes. Set limit so only 2 fit.
mm.maxCaptureSize = 450
mm.captureCache = cache.New(450)
// Use random-looking data that doesn't compress well with zstd
rng := rand.New(rand.NewSource(42))
@@ -1033,16 +1053,14 @@ func TestMetricsMonitor_AddCapture(t *testing.T) {
// Adding capture3 should evict capture1
mm.addCapture(capture3)
assert.Nil(t, mm.getCaptureByID(0, true), "capture 0 should be evicted")
retrieved := mm.getCaptureByID(1, true)
assert.NotNil(t, retrieved, "capture 1 should exist")
retrieved = mm.getCaptureByID(2, true)
assert.NotNil(t, retrieved, "capture 2 should exist")
assert.Nil(t, mm.getCaptureByID(0), "capture 0 should be evicted")
assert.NotNil(t, mm.getCaptureByID(1), "capture 1 should exist")
assert.NotNil(t, mm.getCaptureByID(2), "capture 2 should exist")
})
t.Run("skips capture larger than max size", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
mm.maxCaptureSize = 100
mm.captureCache = cache.New(100)
// Use random data that doesn't compress well to create an oversized capture
rng := rand.New(rand.NewSource(99))
@@ -1050,7 +1068,7 @@ func TestMetricsMonitor_AddCapture(t *testing.T) {
rng.Read(largeCapture.ReqBody)
mm.addCapture(largeCapture)
assert.Nil(t, mm.getCaptureByID(0, false), "oversized capture should not be stored")
assert.Nil(t, mm.getCaptureByID(0), "oversized capture should not be stored")
})
}
@@ -1058,7 +1076,7 @@ func TestMetricsMonitor_GetCaptureByID(t *testing.T) {
t.Run("returns nil for non-existent ID", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
assert.Nil(t, mm.getCaptureByID(999, false))
assert.Nil(t, mm.getCaptureByID(999))
})
t.Run("returns decompressed capture by ID", func(t *testing.T) {
@@ -1071,18 +1089,14 @@ func TestMetricsMonitor_GetCaptureByID(t *testing.T) {
}
mm.addCapture(capture)
retrieved := mm.getCaptureByID(42, true)
assert.NotNil(t, retrieved)
var decoded ReqRespCapture
err := json.Unmarshal(retrieved, &decoded)
assert.NoError(t, err)
assert.Equal(t, 42, decoded.ID)
assert.Equal(t, []byte("test request"), decoded.ReqBody)
assert.Equal(t, []byte("test response"), decoded.RespBody)
captured := mm.getCaptureByID(42)
assert.NotNil(t, captured)
assert.Equal(t, 42, captured.ID)
assert.Equal(t, []byte("test request"), captured.ReqBody)
assert.Equal(t, []byte("test response"), captured.RespBody)
})
t.Run("returns compressed bytes when decompress=false", func(t *testing.T) {
t.Run("stores data as compressed bytes", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
capture := ReqRespCapture{
@@ -1092,10 +1106,12 @@ func TestMetricsMonitor_GetCaptureByID(t *testing.T) {
}
mm.addCapture(capture)
compressed := mm.getCaptureByID(42, false)
compressed, exists := mm.getCompressedBytes(42)
assert.True(t, exists)
assert.NotNil(t, compressed)
// Compressed data should not be valid JSON (it's zstd-compressed)
assert.False(t, gjson.ValidBytes(compressed))
// Compressed data should not be valid CBOR (it's zstd-compressed)
var decoded ReqRespCapture
assert.Error(t, cbor.Unmarshal(compressed, &decoded))
})
}
@@ -1164,7 +1180,7 @@ func TestMetricsMonitor_WrapHandler_Capture(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
// Check metric was recorded
@@ -1173,12 +1189,8 @@ func TestMetricsMonitor_WrapHandler_Capture(t *testing.T) {
metricID := metrics[0].ID
// Check capture was stored with same ID (decompressed)
captureData := mm.getCaptureByID(metricID, true)
assert.NotNil(t, captureData)
var capture ReqRespCapture
err = json.Unmarshal(captureData, &capture)
assert.NoError(t, err)
capture := mm.getCaptureByID(metricID)
assert.NotNil(t, capture)
assert.Equal(t, metricID, capture.ID)
assert.Equal(t, []byte(requestBody), capture.ReqBody)
assert.Equal(t, []byte(responseBody), capture.RespBody)
@@ -1206,7 +1218,7 @@ func TestMetricsMonitor_WrapHandler_Capture(t *testing.T) {
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureAll, nextHandler)
assert.NoError(t, err)
// Metrics should still be recorded
@@ -1214,7 +1226,168 @@ func TestMetricsMonitor_WrapHandler_Capture(t *testing.T) {
assert.Equal(t, 1, len(metrics))
// But no capture
capture := mm.getCaptureByID(metrics[0].ID, false)
assert.Nil(t, capture)
assert.Nil(t, mm.getCaptureByID(metrics[0].ID))
})
}
func TestMetricsMonitor_WrapHandler_PartialCaptures(t *testing.T) {
requestBody := `{"model": "test"}`
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.Header().Set("Content-Type", "application/json")
w.Header().Set("X-Custom", "header-value")
w.WriteHeader(http.StatusOK)
w.Write([]byte(responseBody))
return nil
}
t.Run("only request headers", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer secret")
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureReqHeaders, nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Equal(t, "application/json", capture.ReqHeaders["Content-Type"])
assert.Equal(t, "[REDACTED]", capture.ReqHeaders["Authorization"])
assert.Nil(t, capture.ReqBody)
assert.Nil(t, capture.RespHeaders)
assert.Nil(t, capture.RespBody)
})
t.Run("only request body", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureReqBody, nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Nil(t, capture.ReqHeaders)
assert.Equal(t, []byte(requestBody), capture.ReqBody)
assert.Nil(t, capture.RespHeaders)
assert.Nil(t, capture.RespBody)
})
t.Run("only response headers", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureRespHeaders, nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Nil(t, capture.ReqHeaders)
assert.Nil(t, capture.ReqBody)
assert.Equal(t, "application/json", capture.RespHeaders["Content-Type"])
assert.Equal(t, "header-value", capture.RespHeaders["X-Custom"])
assert.Nil(t, capture.RespBody)
})
t.Run("only response body", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureRespBody, nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Nil(t, capture.ReqHeaders)
assert.Nil(t, capture.ReqBody)
assert.Nil(t, capture.RespHeaders)
assert.Equal(t, []byte(responseBody), capture.RespBody)
})
t.Run("captureReqAll", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer secret")
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureReqAll, nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Equal(t, "application/json", capture.ReqHeaders["Content-Type"])
assert.Equal(t, "[REDACTED]", capture.ReqHeaders["Authorization"])
assert.Equal(t, []byte(requestBody), capture.ReqBody)
assert.Nil(t, capture.RespHeaders)
assert.Nil(t, capture.RespBody)
})
t.Run("captureRespAll", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureRespAll, nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Nil(t, capture.ReqHeaders)
assert.Nil(t, capture.ReqBody)
assert.Equal(t, "application/json", capture.RespHeaders["Content-Type"])
assert.Equal(t, "header-value", capture.RespHeaders["X-Custom"])
assert.Equal(t, []byte(responseBody), capture.RespBody)
})
t.Run("no flags", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
req.Header.Set("Content-Type", "application/json")
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureFields(0), nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Nil(t, capture.ReqHeaders)
assert.Nil(t, capture.ReqBody)
assert.Nil(t, capture.RespHeaders)
assert.Nil(t, capture.RespBody)
})
t.Run("mixed flags req headers and resp body", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 100)
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer secret")
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, captureReqHeaders|captureRespBody, nextHandler)
assert.NoError(t, err)
capture := mm.getCaptureByID(mm.getMetrics()[0].ID)
assert.NotNil(t, capture)
assert.Equal(t, "application/json", capture.ReqHeaders["Content-Type"])
assert.Equal(t, "[REDACTED]", capture.ReqHeaders["Authorization"])
assert.Nil(t, capture.ReqBody)
assert.Nil(t, capture.RespHeaders)
assert.Equal(t, []byte(responseBody), capture.RespBody)
})
}