internal/server,shared: support request metadata (#850)
- add support for http handlers in the request chain to append metadata to the request - metrics middleware will include metadata in the activity log - update Activity UI to support metadata, drag sort columns - update Activity UI capture dialog to use more screen space Updates #834
This commit is contained in:
@@ -3,6 +3,7 @@ package scheduler
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
@@ -278,6 +279,11 @@ func (s *FIFO) grantHandler(req HandlerReq, modelID string) {
|
||||
s.effects.GrantError(req, shared.ConcurrencyLimitError{})
|
||||
return
|
||||
}
|
||||
|
||||
if err := shared.SetReqData(req.Ctx, "fifo_priority", strconv.Itoa(s.cfg.Priority[req.Model])); err != nil {
|
||||
s.logger.Debugf("failed to set fifo_priority metadata: %v", err)
|
||||
}
|
||||
|
||||
if s.effects.GrantServe(req, modelID) {
|
||||
s.inFlight[modelID]++
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -54,8 +55,9 @@ type stopRec struct {
|
||||
// fakeEffects is an in-memory scheduler.Effects. Tests program process states
|
||||
// and GrantServe outcomes, then assert on the recorded calls.
|
||||
type fakeEffects struct {
|
||||
states map[string]process.ProcessState // model -> state; missing => not handled
|
||||
serveResult map[string]bool // GrantServe return per model (default true)
|
||||
states map[string]process.ProcessState // model -> state; missing => not handled
|
||||
serveResult map[string]bool // GrantServe return per model (default true)
|
||||
lastServeReq HandlerReq
|
||||
|
||||
starts []startRec
|
||||
grants []grantRec
|
||||
@@ -98,6 +100,7 @@ func (f *fakeEffects) GrantServe(req HandlerReq, modelID string) bool {
|
||||
if v, set := f.serveResult[modelID]; set {
|
||||
ok = v
|
||||
}
|
||||
f.lastServeReq = req
|
||||
f.grants = append(f.grants, grantRec{model: modelID, serve: ok})
|
||||
return ok
|
||||
}
|
||||
@@ -169,6 +172,27 @@ func TestFIFO_FastPath(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_GrantSetsPriorityMetadata(t *testing.T) {
|
||||
eff := newFakeEffects()
|
||||
eff.states["a"] = process.StateReady
|
||||
cfg := config.FifoConfig{Priority: map[string]int{"a": 7}}
|
||||
s := NewFIFO("test", logmon.NewWriter(io.Discard), &stubPlanner{}, cfg, nil, eff)
|
||||
|
||||
ctx := shared.SetContext(context.Background(), shared.ReqContextData{ModelID: "a", Metadata: make(map[string]string)})
|
||||
s.OnRequest(HandlerReq{Model: "a", Ctx: ctx})
|
||||
|
||||
if got := eff.served("a"); got != 1 {
|
||||
t.Fatalf("served(a)=%d want 1", got)
|
||||
}
|
||||
data, ok := shared.ReadContext(eff.lastServeReq.Ctx)
|
||||
if !ok {
|
||||
t.Fatal("context data missing from granted request")
|
||||
}
|
||||
if data.Metadata["fifo_priority"] != "7" {
|
||||
t.Errorf("fifo_priority = %q, want 7", data.Metadata["fifo_priority"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestFIFO_ModelNotFound(t *testing.T) {
|
||||
eff := newFakeEffects() // no states => model unknown
|
||||
s := newFIFO(&stubPlanner{}, eff)
|
||||
|
||||
Reference in New Issue
Block a user