Files
llama-swap/internal/router/helpers_test.go
T
Benson Wong 9b3a33d7b9 Implement new scheduler (#823)
- introduce internal/router/scheduler to decouple routing, swapping and
queuing into interface contracts.
- introduce a new `routing` configuration section that supersedes
`matrix` and `group` while maintaining backwards compatibility
- add FIFO scheduler with prioritized queuing 
- add internal/router/design.md as developer documentation on
implementing new schedulers and routers

Fixes #797
2026-06-10 20:34:25 -07:00

219 lines
5.2 KiB
Go

package router
import (
"context"
"fmt"
"io"
"net/http"
"net/http/httptest"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/process"
)
// groupRouting builds a normalized RoutingConfig for the group router, mirroring
// what config.LoadConfigFromReader produces. Tests use it to populate
// config.Config.Routing without going through LoadConfig.
func groupRouting(groups map[string]config.GroupConfig) config.RoutingConfig {
return config.RoutingConfig{
Router: config.RouterConfig{
Use: "group",
Settings: config.RouterSettings{Groups: groups},
},
}
}
// fakeProcess is an in-memory implementation of process.Process used to drive
// the routers through their state machine without spawning real upstreams.
type fakeProcess struct {
id string
mu sync.Mutex
state process.ProcessState
readyCh chan struct{}
stopCh chan struct{}
runStarted chan struct{} // closed on the first Run call
stopStarted chan struct{} // closed on the first Stop call
autoReady bool
// serveBlock, when non-nil, makes ServeHTTP receive from it before
// writing its response. Tests use this to hold a request in-flight.
// Closing the channel releases every blocked ServeHTTP caller.
serveBlock chan struct{}
// serveStarted is closed on the first ServeHTTP entry, letting tests
// wait deterministically for the handler to begin executing.
serveStarted chan struct{}
// stopBlock, when non-nil, makes Stop receive from it (after signalling
// stopStarted) before completing. Tests use this to prove that several
// Stop calls can be in flight simultaneously.
stopBlock chan struct{}
runCalls atomic.Int32
stopCalls atomic.Int32
serveCalls atomic.Int32
// inFlightServe counts ServeHTTP calls currently inside the handler.
// stoppedWhileServing flips true if Stop is ever called while that
// counter is non-zero — a direct, race-free observation of the
// "swap mid-request" anti-property.
inFlightServe atomic.Int32
stoppedWhileServing atomic.Bool
}
func newFakeProcess(id string) *fakeProcess {
return &fakeProcess{
id: id,
state: process.StateStopped,
readyCh: make(chan struct{}),
stopCh: make(chan struct{}),
runStarted: make(chan struct{}),
stopStarted: make(chan struct{}),
serveStarted: make(chan struct{}),
}
}
func (f *fakeProcess) setState(s process.ProcessState) {
f.mu.Lock()
defer f.mu.Unlock()
f.state = s
if s == process.StateReady {
select {
case <-f.readyCh:
default:
close(f.readyCh)
}
}
}
func (f *fakeProcess) State() process.ProcessState {
f.mu.Lock()
defer f.mu.Unlock()
return f.state
}
func (f *fakeProcess) markReady() { f.setState(process.StateReady) }
func (f *fakeProcess) Run(_ time.Duration) error {
f.runCalls.Add(1)
f.mu.Lock()
if f.state != process.StateStopped {
s := f.state
f.mu.Unlock()
return fmt.Errorf("fakeProcess %s: Run called while %s", f.id, s)
}
f.state = process.StateStarting
sc := f.stopCh
select {
case <-f.runStarted:
default:
close(f.runStarted)
}
f.mu.Unlock()
if f.autoReady {
f.setState(process.StateReady)
}
<-sc
return nil
}
func (f *fakeProcess) Stop(_ time.Duration) error {
f.stopCalls.Add(1)
if f.inFlightServe.Load() > 0 {
f.stoppedWhileServing.Store(true)
}
f.mu.Lock()
select {
case <-f.stopStarted:
default:
close(f.stopStarted)
}
f.mu.Unlock()
// Test hook: hold Stop here so the test can prove multiple Stops are
// in flight at the same time before any of them complete.
if f.stopBlock != nil {
<-f.stopBlock
}
f.mu.Lock()
defer f.mu.Unlock()
if f.state == process.StateStopped {
return nil
}
f.state = process.StateStopped
select {
case <-f.stopCh:
default:
close(f.stopCh)
}
return nil
}
func (f *fakeProcess) WaitReady(ctx context.Context) error {
f.mu.Lock()
if f.state == process.StateReady {
f.mu.Unlock()
return nil
}
rc := f.readyCh
f.mu.Unlock()
select {
case <-rc:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func (f *fakeProcess) Logger() *logmon.Monitor { return logmon.NewWriter(io.Discard) }
func (f *fakeProcess) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
f.serveCalls.Add(1)
f.inFlightServe.Add(1)
defer f.inFlightServe.Add(-1)
f.mu.Lock()
select {
case <-f.serveStarted:
default:
close(f.serveStarted)
}
f.mu.Unlock()
if f.serveBlock != nil {
<-f.serveBlock
}
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, "ok:%s", f.id)
}
// waitProcessed drains n events from ch, fataling on timeout. One event fires
// per handlerReq or swapDone fully absorbed by run().
func waitProcessed(t *testing.T, ch chan struct{}, n int) {
t.Helper()
for i := 0; i < n; i++ {
select {
case <-ch:
case <-time.After(2 * time.Second):
t.Fatalf("waitProcessed: only %d/%d events received", i, n)
}
}
}
func newRequest(model string) *http.Request {
body := fmt.Sprintf(`{"model":%q}`, model)
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
r.Header.Set("Content-Type", "application/json")
return r
}
func newRequestCtx(ctx context.Context, model string) *http.Request {
return newRequest(model).WithContext(ctx)
}