internal/server,shared: support request metadata (#850)

- add support for http handlers in the request chain to append metadata
to the request
- metrics middleware will include metadata in the activity log 
- update Activity UI to support metadata, drag sort columns
- update Activity UI capture dialog to use more screen space

Updates #834
This commit is contained in:
Benson Wong
2026-06-16 21:44:55 -07:00
committed by GitHub
parent 826210dac9
commit d07b063ab6
11 changed files with 398 additions and 133 deletions
+2 -2
View File
@@ -271,7 +271,7 @@ func (s *Server) startPreload() {
if err != nil {
continue
}
req = req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: modelID, ModelID: modelID}))
req = req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: modelID, ModelID: modelID, Metadata: make(map[string]string)}))
dw := &discardResponseWriter{status: http.StatusOK}
s.local.ServeHTTP(dw, req)
@@ -338,7 +338,7 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
// Strip the /upstream/<model> prefix before forwarding.
r.URL.Path = remainingPath
// Pin the resolved model so the router skips body/query extraction.
*r = *r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: searchName, ModelID: modelID}))
*r = *r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: searchName, ModelID: modelID, Metadata: make(map[string]string)}))
switch {
case s.local.Handles(modelID):
+17 -9
View File
@@ -33,15 +33,16 @@ type TokenMetrics struct {
// ActivityLogEntry represents parsed token statistics from llama-server logs.
type ActivityLogEntry struct {
ID int `json:"id"`
Timestamp time.Time `json:"timestamp"`
Model string `json:"model"`
ReqPath string `json:"req_path"`
RespContentType string `json:"resp_content_type"`
RespStatusCode int `json:"resp_status_code"`
Tokens TokenMetrics `json:"tokens"`
DurationMs int `json:"duration_ms"`
HasCapture bool `json:"has_capture"`
ID int `json:"id"`
Timestamp time.Time `json:"timestamp"`
Model string `json:"model"`
ReqPath string `json:"req_path"`
RespContentType string `json:"resp_content_type"`
RespStatusCode int `json:"resp_status_code"`
Tokens TokenMetrics `json:"tokens"`
DurationMs int `json:"duration_ms"`
HasCapture bool `json:"has_capture"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// ActivityLogEvent carries a single activity log entry to event subscribers.
@@ -135,6 +136,13 @@ func (mp *metricsMonitor) record(modelID string, r *http.Request, recorder *resp
DurationMs: int(time.Since(recorder.StartTime()).Milliseconds()),
}
if ctxData, ok := shared.ReadContext(r.Context()); ok && len(ctxData.Metadata) > 0 {
tm.Metadata = make(map[string]string, len(ctxData.Metadata))
for k, v := range ctxData.Metadata {
tm.Metadata[k] = v
}
}
queueAndEmit := func() {
tm.ID = mp.queueMetrics(tm)
mp.emitMetric(tm)
+31
View File
@@ -1,9 +1,13 @@
package server
import (
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/mostlygeek/llama-swap/internal/shared"
"github.com/tidwall/gjson"
)
@@ -56,6 +60,33 @@ func TestServer_ProcessStreamingResponse_NoData(t *testing.T) {
}
}
func TestMetricsMonitor_RecordMetadata(t *testing.T) {
mm := newMetricsMonitor(nil, 10, 0)
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"usage":{}}`))
r = r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{
ModelID: "m",
Metadata: map[string]string{"client": "web", "trace": "abc"},
}))
w := httptest.NewRecorder()
copier := newBodyCopier(w)
copier.WriteHeader(http.StatusOK)
copier.Write([]byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2}}`))
mm.record("m", r, copier, 0, nil, nil)
entries := mm.getMetrics()
if len(entries) != 1 {
t.Fatalf("want 1 entry, got %d", len(entries))
}
if entries[0].Metadata["client"] != "web" {
t.Errorf("client = %q, want web", entries[0].Metadata["client"])
}
if entries[0].Metadata["trace"] != "abc" {
t.Errorf("trace = %q, want abc", entries[0].Metadata["trace"])
}
}
func TestServer_ParseMetrics_Infill(t *testing.T) {
// /infill responses are arrays; timings live in the last element.
body := `[{"content":"a"},{"content":"b","timings":{"prompt_n":5,"predicted_n":9,"prompt_ms":10,"predicted_ms":20}}]`