internal/server,shared: support request metadata (#850)

- add support for http handlers in the request chain to append metadata
to the request
- metrics middleware will include metadata in the activity log 
- update Activity UI to support metadata, drag sort columns
- update Activity UI capture dialog to use more screen space

Updates #834
This commit is contained in:
Benson Wong
2026-06-16 21:44:55 -07:00
committed by GitHub
parent 826210dac9
commit d07b063ab6
11 changed files with 398 additions and 133 deletions
+6
View File
@@ -3,6 +3,7 @@ package scheduler
import (
"fmt"
"sort"
"strconv"
"time"
"github.com/mostlygeek/llama-swap/internal/config"
@@ -278,6 +279,11 @@ func (s *FIFO) grantHandler(req HandlerReq, modelID string) {
s.effects.GrantError(req, shared.ConcurrencyLimitError{})
return
}
if err := shared.SetReqData(req.Ctx, "fifo_priority", strconv.Itoa(s.cfg.Priority[req.Model])); err != nil {
s.logger.Debugf("failed to set fifo_priority metadata: %v", err)
}
if s.effects.GrantServe(req, modelID) {
s.inFlight[modelID]++
}
+26 -2
View File
@@ -1,6 +1,7 @@
package scheduler
import (
"context"
"errors"
"io"
"net/http"
@@ -54,8 +55,9 @@ type stopRec struct {
// fakeEffects is an in-memory scheduler.Effects. Tests program process states
// and GrantServe outcomes, then assert on the recorded calls.
type fakeEffects struct {
states map[string]process.ProcessState // model -> state; missing => not handled
serveResult map[string]bool // GrantServe return per model (default true)
states map[string]process.ProcessState // model -> state; missing => not handled
serveResult map[string]bool // GrantServe return per model (default true)
lastServeReq HandlerReq
starts []startRec
grants []grantRec
@@ -98,6 +100,7 @@ func (f *fakeEffects) GrantServe(req HandlerReq, modelID string) bool {
if v, set := f.serveResult[modelID]; set {
ok = v
}
f.lastServeReq = req
f.grants = append(f.grants, grantRec{model: modelID, serve: ok})
return ok
}
@@ -169,6 +172,27 @@ func TestFIFO_FastPath(t *testing.T) {
}
}
func TestFIFO_GrantSetsPriorityMetadata(t *testing.T) {
eff := newFakeEffects()
eff.states["a"] = process.StateReady
cfg := config.FifoConfig{Priority: map[string]int{"a": 7}}
s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, cfg, nil, eff)
ctx := shared.SetContext(context.Background(), shared.ReqContextData{ModelID: "a", Metadata: make(map[string]string)})
s.OnRequest(HandlerReq{Model: "a", Ctx: ctx})
if got := eff.served("a"); got != 1 {
t.Fatalf("served(a)=%d want 1", got)
}
data, ok := shared.ReadContext(eff.lastServeReq.Ctx)
if !ok {
t.Fatal("context data missing from granted request")
}
if data.Metadata["fifo_priority"] != "7" {
t.Errorf("fifo_priority = %q, want 7", data.Metadata["fifo_priority"])
}
}
func TestFIFO_ModelNotFound(t *testing.T) {
eff := newFakeEffects() // no states => model unknown
s := newFIFO(&stubPlanner{}, eff)