ed77385d08
- When a model is manually loaded show a cancel buttton and a queued status - Implement cancellation in scheduler.Scheduler interface and FIFO scheduler - Add cache bust query parameter to bypass browser cache Fixes #844
634 lines
20 KiB
Go
634 lines
20 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"errors"
|
|
"io"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/mostlygeek/llama-swap/internal/config"
|
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
|
"github.com/mostlygeek/llama-swap/internal/process"
|
|
)
|
|
|
|
// FIFO methods all run on the router's single run-loop goroutine, so these
|
|
// tests drive them directly and synchronously. A swap is "completed" by calling
|
|
// OnSwapDone, a served request "finishes" by calling OnServeDone — exactly the
|
|
// events the run loop would deliver. fakeEffects records every side-effect and
|
|
// stubPlanner supplies a fixed eviction set per target.
|
|
|
|
// stubPlanner returns a fixed eviction list per target.
|
|
type stubPlanner struct {
|
|
evict map[string][]string
|
|
}
|
|
|
|
func (s *stubPlanner) EvictionFor(target string, _ []string) []string {
|
|
if s.evict == nil {
|
|
return nil
|
|
}
|
|
return s.evict[target]
|
|
}
|
|
|
|
func (s *stubPlanner) OnSwapStart(string, []string) {}
|
|
|
|
// grantRec is one GrantError / GrantServe call. err!=nil marks an error grant;
|
|
// otherwise it is a serve grant and serve reports whether the caller received it.
|
|
type grantRec struct {
|
|
model string
|
|
err error
|
|
serve bool
|
|
}
|
|
|
|
type startRec struct {
|
|
model string
|
|
evict []string
|
|
}
|
|
|
|
type stopRec struct {
|
|
timeout time.Duration
|
|
ids []string
|
|
}
|
|
|
|
// fakeEffects is an in-memory scheduler.Effects. Tests program process states
|
|
// and GrantServe outcomes, then assert on the recorded calls.
|
|
type fakeEffects struct {
|
|
states map[string]process.ProcessState // model -> state; missing => not handled
|
|
serveResult map[string]bool // GrantServe return per model (default true)
|
|
|
|
starts []startRec
|
|
grants []grantRec
|
|
stops []stopRec
|
|
}
|
|
|
|
func newFakeEffects() *fakeEffects {
|
|
return &fakeEffects{
|
|
states: map[string]process.ProcessState{},
|
|
serveResult: map[string]bool{},
|
|
}
|
|
}
|
|
|
|
func (f *fakeEffects) ModelState(modelID string) (process.ProcessState, bool) {
|
|
st, ok := f.states[modelID]
|
|
return st, ok
|
|
}
|
|
|
|
func (f *fakeEffects) RunningModels() map[string]process.ProcessState {
|
|
out := make(map[string]process.ProcessState)
|
|
for id, st := range f.states {
|
|
if st == process.StateStopped || st == process.StateShutdown {
|
|
continue
|
|
}
|
|
out[id] = st
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (f *fakeEffects) StartSwap(modelID string, evict []string) {
|
|
f.starts = append(f.starts, startRec{model: modelID, evict: evict})
|
|
}
|
|
|
|
func (f *fakeEffects) GrantError(req HandlerReq, err error) {
|
|
f.grants = append(f.grants, grantRec{model: req.Model, err: err})
|
|
}
|
|
|
|
func (f *fakeEffects) GrantServe(req HandlerReq, modelID string) bool {
|
|
ok := true
|
|
if v, set := f.serveResult[modelID]; set {
|
|
ok = v
|
|
}
|
|
f.grants = append(f.grants, grantRec{model: modelID, serve: ok})
|
|
return ok
|
|
}
|
|
|
|
func (f *fakeEffects) StopProcesses(timeout time.Duration, ids []string) {
|
|
f.stops = append(f.stops, stopRec{timeout: timeout, ids: ids})
|
|
}
|
|
|
|
// served counts grants that handed modelID a handler and were received.
|
|
func (f *fakeEffects) served(modelID string) int {
|
|
n := 0
|
|
for _, g := range f.grants {
|
|
if g.err == nil && g.serve && g.model == modelID {
|
|
n++
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
// errored counts error grants, optionally filtered by model ("" = any).
|
|
func (f *fakeEffects) errored(model string) int {
|
|
n := 0
|
|
for _, g := range f.grants {
|
|
if g.err != nil && (model == "" || g.model == model) {
|
|
n++
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
// startsFor counts StartSwap calls for modelID.
|
|
func (f *fakeEffects) startsFor(modelID string) int {
|
|
n := 0
|
|
for _, s := range f.starts {
|
|
if s.model == modelID {
|
|
n++
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
func newFIFO(planner Swapper, eff Effects) *FIFO {
|
|
return NewFIFO("test", logmon.NewWriter(io.Discard), planner, config.FifoConfig{}, eff)
|
|
}
|
|
|
|
func req(model string) HandlerReq { return HandlerReq{Model: model} }
|
|
|
|
// reqCh creates a HandlerReq with a unique Respond channel so OnCancel can
|
|
// identify it among queued requests and swap waiters.
|
|
func reqCh(model string) HandlerReq {
|
|
return HandlerReq{
|
|
Model: model,
|
|
Respond: make(chan HandlerResp, 1),
|
|
}
|
|
}
|
|
|
|
func TestFIFO_FastPath(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateReady
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
s.OnRequest(req("a"))
|
|
|
|
if got := eff.startsFor("a"); got != 0 {
|
|
t.Errorf("StartSwap calls=%d want 0 (fast path should not swap)", got)
|
|
}
|
|
if got := eff.served("a"); got != 1 {
|
|
t.Errorf("served(a)=%d want 1", got)
|
|
}
|
|
}
|
|
|
|
func TestFIFO_ModelNotFound(t *testing.T) {
|
|
eff := newFakeEffects() // no states => model unknown
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
s.OnRequest(req("ghost"))
|
|
|
|
if got := len(eff.starts); got != 0 {
|
|
t.Errorf("StartSwap calls=%d want 0", got)
|
|
}
|
|
if eff.errored("ghost") != 1 {
|
|
t.Fatalf("want 1 error grant for ghost, grants=%+v", eff.grants)
|
|
}
|
|
if !errors.Is(eff.grants[0].err, ErrModelNotFound) {
|
|
t.Errorf("err=%v want ErrModelNotFound", eff.grants[0].err)
|
|
}
|
|
}
|
|
|
|
func TestFIFO_OnDemandStartThenServe(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
s.OnRequest(req("a"))
|
|
if got := eff.startsFor("a"); got != 1 {
|
|
t.Fatalf("StartSwap(a)=%d want 1", got)
|
|
}
|
|
if got := eff.served("a"); got != 0 {
|
|
t.Errorf("served(a)=%d want 0 before swap completes", got)
|
|
}
|
|
|
|
// Swap finishes, model is now ready.
|
|
eff.states["a"] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
|
|
|
if got := eff.served("a"); got != 1 {
|
|
t.Errorf("served(a)=%d want 1 after swap done", got)
|
|
}
|
|
}
|
|
|
|
func TestFIFO_JoinInFlightSwap(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
s.OnRequest(req("a")) // starts swap
|
|
s.OnRequest(req("a")) // joins
|
|
s.OnRequest(req("a")) // joins
|
|
|
|
if got := eff.startsFor("a"); got != 1 {
|
|
t.Fatalf("StartSwap(a)=%d want 1 (all three share one swap)", got)
|
|
}
|
|
|
|
eff.states["a"] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
|
|
|
if got := eff.served("a"); got != 3 {
|
|
t.Errorf("served(a)=%d want 3 (one swap serves all waiters)", got)
|
|
}
|
|
}
|
|
|
|
func TestFIFO_SwapDoneError_FailsAllWaiters(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
s.OnRequest(req("a"))
|
|
s.OnRequest(req("a"))
|
|
|
|
s.OnSwapDone(SwapDone{ModelID: "a", Err: errors.New("boom")})
|
|
|
|
if eff.served("a") != 0 {
|
|
t.Errorf("served(a)=%d want 0 on swap error", eff.served("a"))
|
|
}
|
|
if eff.errored("a") != 2 {
|
|
t.Errorf("errored(a)=%d want 2 (both waiters fail)", eff.errored("a"))
|
|
}
|
|
}
|
|
|
|
// TestFIFO_QueueOnEvictionCollision covers a request whose target evicts the
|
|
// model currently being swapped: it must queue until that swap finishes AND its
|
|
// served request drains, because starting it would stop a busy process.
|
|
func TestFIFO_QueueOnEvictionCollision(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
eff.states["b"] = process.StateStopped
|
|
// Loading b evicts a.
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
|
|
|
s.OnRequest(req("a")) // StartSwap(a)
|
|
s.OnRequest(req("b")) // collides with a's in-flight swap -> queue
|
|
if got := eff.startsFor("b"); got != 0 {
|
|
t.Fatalf("b started early: StartSwap(b)=%d want 0", got)
|
|
}
|
|
|
|
// a becomes ready and is granted (now serving, inFlight[a]=1).
|
|
eff.states["a"] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
|
if got := eff.startsFor("b"); got != 0 {
|
|
t.Fatalf("b started while a is serving: StartSwap(b)=%d want 0", got)
|
|
}
|
|
|
|
// a's request finishes -> a no longer in-flight -> b may now swap.
|
|
s.OnServeDone(ServeDoneEvent{ModelID: "a"})
|
|
if got := eff.startsFor("b"); got != 1 {
|
|
t.Fatalf("StartSwap(b)=%d want 1 after a drained", got)
|
|
}
|
|
if got := eff.starts[len(eff.starts)-1].evict; len(got) != 1 || got[0] != "a" {
|
|
t.Errorf("b swap evict=%v want [a]", got)
|
|
}
|
|
}
|
|
|
|
// TestFIFO_DisjointSwapsRunInParallel verifies two requests with
|
|
// non-conflicting evict sets both start without waiting for each other.
|
|
func TestFIFO_DisjointSwapsRunInParallel(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
eff.states["b"] = process.StateStopped
|
|
s := newFIFO(&stubPlanner{}, eff) // empty evicts
|
|
|
|
s.OnRequest(req("a"))
|
|
s.OnRequest(req("b"))
|
|
|
|
if eff.startsFor("a") != 1 || eff.startsFor("b") != 1 {
|
|
t.Fatalf("StartSwap a=%d b=%d want 1 each (parallel)", eff.startsFor("a"), eff.startsFor("b"))
|
|
}
|
|
}
|
|
|
|
// TestFIFO_OverlappingEvictSetsDoNotRunInParallel verifies two swaps with
|
|
// different targets that evict the *same* model do not run concurrently: the
|
|
// second must queue rather than double-evict the shared model. Neither target is
|
|
// in the other's evict set, so this is only caught by the evict-set overlap
|
|
// check in collidesWith.
|
|
func TestFIFO_OverlappingEvictSetsDoNotRunInParallel(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
eff.states["b"] = process.StateStopped
|
|
eff.states["x"] = process.StateReady // shared eviction target, running
|
|
// Loading a or b both require evicting x.
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"a": {"x"}, "b": {"x"}}}, eff)
|
|
|
|
s.OnRequest(req("a")) // StartSwap(a, [x])
|
|
s.OnRequest(req("b")) // overlaps a's evict set ([x]) -> queue
|
|
if eff.startsFor("a") != 1 {
|
|
t.Fatalf("StartSwap(a)=%d want 1", eff.startsFor("a"))
|
|
}
|
|
if got := eff.startsFor("b"); got != 0 {
|
|
t.Fatalf("b started in parallel while a evicts x: StartSwap(b)=%d want 0", got)
|
|
}
|
|
|
|
// a's swap completes and x is gone; b can now evict nothing and start.
|
|
eff.states["a"] = process.StateReady
|
|
eff.states["x"] = process.StateStopped
|
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
|
if got := eff.startsFor("b"); got != 1 {
|
|
t.Fatalf("StartSwap(b)=%d want 1 after a's swap drained", got)
|
|
}
|
|
}
|
|
|
|
// TestFIFO_QueueDrainPromotesMultiple verifies completing one swap unblocks
|
|
// every queued request that no longer collides — they all start together.
|
|
func TestFIFO_QueueDrainPromotesMultiple(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
eff.states["b"] = process.StateStopped
|
|
eff.states["c"] = process.StateStopped
|
|
// a's swap evicts both b and c; b and c evict nothing.
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"a": {"b", "c"}}}, eff)
|
|
|
|
s.OnRequest(req("a")) // StartSwap(a, [b,c])
|
|
s.OnRequest(req("b")) // collides (in a's evict set) -> queue
|
|
s.OnRequest(req("c")) // collides -> queue
|
|
if eff.startsFor("b") != 0 || eff.startsFor("c") != 0 {
|
|
t.Fatalf("b/c started early")
|
|
}
|
|
|
|
eff.states["a"] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
|
|
|
// b and c have empty evict sets and don't evict a, so both start now.
|
|
if eff.startsFor("b") != 1 || eff.startsFor("c") != 1 {
|
|
t.Fatalf("StartSwap b=%d c=%d want 1 each after a done", eff.startsFor("b"), eff.startsFor("c"))
|
|
}
|
|
if eff.served("a") != 1 {
|
|
t.Errorf("served(a)=%d want 1", eff.served("a"))
|
|
}
|
|
}
|
|
|
|
// TestFIFO_QueueCollation verifies duplicate requests collapse into one swap
|
|
// per model: the second request for each model joins the active swap (at arrival
|
|
// or at drain time) rather than triggering its own swap.
|
|
func TestFIFO_QueueCollation(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
for _, id := range []string{"a", "b", "c"} {
|
|
eff.states[id] = process.StateStopped
|
|
}
|
|
// Each model evicts the other two: all swaps are mutually exclusive.
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{
|
|
"a": {"b", "c"},
|
|
"b": {"a", "c"},
|
|
"c": {"a", "b"},
|
|
}}, eff)
|
|
|
|
for _, id := range []string{"a", "b", "c", "a", "b", "c"} {
|
|
s.OnRequest(req(id))
|
|
}
|
|
|
|
// Drain a, then its served requests, which promotes b; repeat for b -> c.
|
|
drain := func(model string, waiters int) {
|
|
eff.states[model] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: model})
|
|
for i := 0; i < waiters; i++ {
|
|
s.OnServeDone(ServeDoneEvent{ModelID: model})
|
|
}
|
|
}
|
|
drain("a", 2)
|
|
drain("b", 2)
|
|
drain("c", 2)
|
|
|
|
for _, id := range []string{"a", "b", "c"} {
|
|
if got := eff.startsFor(id); got != 1 {
|
|
t.Errorf("StartSwap(%s)=%d want 1 (collation)", id, got)
|
|
}
|
|
if got := eff.served(id); got != 2 {
|
|
t.Errorf("served(%s)=%d want 2", id, got)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestFIFO_NoSwapWhileServing verifies a model still handling requests is not
|
|
// evicted: the evicting request waits until every in-flight request drains.
|
|
func TestFIFO_NoSwapWhileServing(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateReady
|
|
eff.states["b"] = process.StateStopped
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
|
|
|
s.OnRequest(req("a")) // fast path, inFlight[a]=1
|
|
s.OnRequest(req("a")) // fast path, inFlight[a]=2
|
|
s.OnRequest(req("b")) // would evict busy a -> queue
|
|
if eff.startsFor("b") != 0 {
|
|
t.Fatalf("b started while a serving")
|
|
}
|
|
|
|
s.OnServeDone(ServeDoneEvent{ModelID: "a"}) // inFlight[a]=1
|
|
if eff.startsFor("b") != 0 {
|
|
t.Fatalf("b started while a still serving one request")
|
|
}
|
|
|
|
s.OnServeDone(ServeDoneEvent{ModelID: "a"}) // inFlight[a]=0
|
|
if eff.startsFor("b") != 1 {
|
|
t.Fatalf("StartSwap(b)=%d want 1 after a fully drained", eff.startsFor("b"))
|
|
}
|
|
}
|
|
|
|
// TestFIFO_GrantServeFalseDoesNotLeakInFlight verifies that when a caller has
|
|
// walked away (GrantServe returns false) the in-flight count is not bumped, so a
|
|
// later evicting request is not blocked forever.
|
|
func TestFIFO_GrantServeFalseDoesNotLeakInFlight(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
eff.states["b"] = process.StateStopped
|
|
eff.serveResult["a"] = false // a's waiter is gone by grant time
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
|
|
|
s.OnRequest(req("a"))
|
|
eff.states["a"] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: "a"}) // grant fails, inFlight[a] stays 0
|
|
|
|
// b evicts a; since a is not in-flight, b should start immediately.
|
|
s.OnRequest(req("b"))
|
|
if eff.startsFor("b") != 1 {
|
|
t.Fatalf("StartSwap(b)=%d want 1 (no leaked in-flight on a)", eff.startsFor("b"))
|
|
}
|
|
}
|
|
|
|
// TestFIFO_OnShutdown_FailsAllWaiters verifies shutdown errors every waiter the
|
|
// scheduler holds: active-swap waiters and queued requests alike.
|
|
func TestFIFO_OnShutdown_FailsAllWaiters(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
for _, id := range []string{"a", "b", "c"} {
|
|
eff.states[id] = process.StateStopped
|
|
}
|
|
// a and b load in parallel; c collides with both and queues.
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"c": {"a", "b"}}}, eff)
|
|
|
|
s.OnRequest(req("a")) // StartSwap(a)
|
|
s.OnRequest(req("a")) // join a
|
|
s.OnRequest(req("b")) // StartSwap(b)
|
|
s.OnRequest(req("b")) // join b
|
|
s.OnRequest(req("c")) // queued
|
|
|
|
s.OnShutdown(errors.New("shutting down"))
|
|
|
|
if got := eff.errored(""); got != 5 {
|
|
t.Errorf("error grants=%d want 5 (2 a + 2 b + 1 c)", got)
|
|
}
|
|
}
|
|
|
|
func TestFIFO_OnUnload_ReleasesActiveWaiters(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
s.OnRequest(req("a")) // active swap a with one waiter
|
|
s.OnRequest(req("a")) // join
|
|
|
|
s.OnUnload([]string{"a"}, time.Second)
|
|
|
|
if got := eff.errored("a"); got != 2 {
|
|
t.Errorf("errored(a)=%d want 2 (active swap waiters released)", got)
|
|
}
|
|
if len(eff.stops) != 1 || len(eff.stops[0].ids) != 1 || eff.stops[0].ids[0] != "a" {
|
|
t.Errorf("StopProcesses=%+v want one call stopping [a]", eff.stops)
|
|
}
|
|
if eff.stops[0].timeout != time.Second {
|
|
t.Errorf("StopProcesses timeout=%v want 1s", eff.stops[0].timeout)
|
|
}
|
|
}
|
|
|
|
func TestFIFO_OnUnload_DropsQueuedRequests(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
eff.states["b"] = process.StateStopped
|
|
// b evicts a, so a request for b queues while a is loading.
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
|
|
|
s.OnRequest(req("a")) // StartSwap(a)
|
|
s.OnRequest(req("b")) // queued
|
|
|
|
s.OnUnload([]string{"b"}, time.Second)
|
|
|
|
if got := eff.errored("b"); got != 1 {
|
|
t.Errorf("errored(b)=%d want 1 (queued request dropped)", got)
|
|
}
|
|
if got := eff.startsFor("b"); got != 0 {
|
|
t.Errorf("StartSwap(b)=%d want 0 (b should never start)", got)
|
|
}
|
|
// a's swap is untouched: its waiter is neither served nor errored yet.
|
|
if eff.served("a") != 0 || eff.errored("a") != 0 {
|
|
t.Errorf("a swap should be untouched: served=%d errored=%d", eff.served("a"), eff.errored("a"))
|
|
}
|
|
}
|
|
|
|
// TestFIFO_PriorityQueueOrder verifies queued requests are ordered by descending
|
|
// priority, with arrival (FIFO) order preserved among equal-priority models.
|
|
func TestFIFO_PriorityQueueOrder(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
for _, m := range []string{"z", "A", "B", "C", "D"} {
|
|
eff.states[m] = process.StateStopped
|
|
}
|
|
// z's swap evicts every other model, so any request that arrives while z is
|
|
// loading collides with z's in-flight swap and parks in the queue.
|
|
planner := &stubPlanner{evict: map[string][]string{"z": {"A", "B", "C", "D"}}}
|
|
cfg := config.FifoConfig{Priority: map[string]int{"A": 10, "B": 5, "C": 5, "D": 1}}
|
|
s := NewFIFO("test", logmon.NewWriter(io.Discard), planner, cfg, eff)
|
|
|
|
s.OnRequest(req("z")) // StartSwap(z, [A,B,C,D])
|
|
|
|
// Arrive out of priority order; B before C exercises FIFO tie-breaking.
|
|
for _, m := range []string{"B", "D", "C", "A"} {
|
|
s.OnRequest(req(m))
|
|
}
|
|
|
|
got := make([]string, len(s.queued))
|
|
for i, q := range s.queued {
|
|
got[i] = q.Model
|
|
}
|
|
want := []string{"A", "B", "C", "D"}
|
|
if len(got) != len(want) {
|
|
t.Fatalf("queue=%v want %v", got, want)
|
|
}
|
|
for i := range want {
|
|
if got[i] != want[i] {
|
|
t.Fatalf("queue=%v want %v", got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestFIFO_OnCancel_QueuedRequest verifies that cancelling a queued request
|
|
// prevents drainQueue from ever starting a model load for it. Without OnCancel
|
|
// the dead request would sit in the queue until a drain triggers a wasted swap.
|
|
func TestFIFO_OnCancel_QueuedRequest(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
eff.states["b"] = process.StateStopped
|
|
// b evicts a, so a request for b queues while a is loading.
|
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
|
|
|
s.OnRequest(req("a")) // StartSwap(a)
|
|
|
|
cancelledReq := reqCh("b")
|
|
s.OnRequest(cancelledReq) // queued (collides with a's in-flight swap)
|
|
if len(s.queued) != 1 {
|
|
t.Fatalf("queue len=%d want 1 before cancel", len(s.queued))
|
|
}
|
|
|
|
// Client disconnects.
|
|
s.OnCancel(cancelledReq)
|
|
|
|
if len(s.queued) != 0 {
|
|
t.Fatalf("queue len=%d want 0 after cancel", len(s.queued))
|
|
}
|
|
|
|
// a's swap finishes; drainQueue runs but b is gone — no swap for b.
|
|
eff.states["a"] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
|
|
|
if got := eff.startsFor("b"); got != 0 {
|
|
t.Errorf("StartSwap(b)=%d want 0 (cancelled request should not trigger a load)", got)
|
|
}
|
|
}
|
|
|
|
// TestFIFO_OnCancel_SwapWaiter verifies that cancelling a request that joined an
|
|
// in-flight swap removes it from the waiter list. When the swap completes, the
|
|
// cancelled waiter receives no grant and does not bump the in-flight count.
|
|
func TestFIFO_OnCancel_SwapWaiter(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateStopped
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
liveReq := reqCh("a")
|
|
cancelledReq := reqCh("a")
|
|
s.OnRequest(liveReq) // starts swap
|
|
s.OnRequest(cancelledReq) // joins
|
|
|
|
if sw := s.active["a"]; len(sw.waiters) != 2 {
|
|
t.Fatalf("waiters=%d want 2", len(sw.waiters))
|
|
}
|
|
|
|
s.OnCancel(cancelledReq)
|
|
|
|
if sw := s.active["a"]; len(sw.waiters) != 1 {
|
|
t.Fatalf("waiters=%d want 1 after cancel", len(sw.waiters))
|
|
}
|
|
|
|
// Swap finishes: only the live waiter is granted.
|
|
eff.states["a"] = process.StateReady
|
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
|
|
|
if got := eff.served("a"); got != 1 {
|
|
t.Errorf("served(a)=%d want 1 (only the non-cancelled waiter)", got)
|
|
}
|
|
}
|
|
|
|
// TestFIFO_OnCancel_NotPresent is a no-op: cancelling a request that was already
|
|
// granted (and is no longer queued or waiting) must not affect anything.
|
|
func TestFIFO_OnCancel_NotPresent(t *testing.T) {
|
|
eff := newFakeEffects()
|
|
eff.states["a"] = process.StateReady
|
|
s := newFIFO(&stubPlanner{}, eff)
|
|
|
|
r := reqCh("a")
|
|
s.OnRequest(r) // fast-path served immediately
|
|
|
|
// Cancel after grant — should be a harmless no-op.
|
|
s.OnCancel(r)
|
|
|
|
if got := eff.served("a"); got != 1 {
|
|
t.Errorf("served(a)=%d want 1 (cancel of granted request is a no-op)", got)
|
|
}
|
|
if len(s.queued) != 0 {
|
|
t.Errorf("queue should be empty, len=%d", len(s.queued))
|
|
}
|
|
}
|