9b3a33d7b9
- introduce internal/router/scheduler to decouple routing, swapping and queuing into interface contracts. - introduce a new `routing` configuration section that supersedes `matrix` and `group` while maintaining backwards compatibility - add FIFO scheduler with prioritized queuing - add internal/router/design.md as developer documentation on implementing new schedulers and routers Fixes #797
219 lines
5.2 KiB
Go
219 lines
5.2 KiB
Go
package router
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/mostlygeek/llama-swap/internal/config"
|
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
|
"github.com/mostlygeek/llama-swap/internal/process"
|
|
)
|
|
|
|
// groupRouting builds a normalized RoutingConfig for the group router, mirroring
|
|
// what config.LoadConfigFromReader produces. Tests use it to populate
|
|
// config.Config.Routing without going through LoadConfig.
|
|
func groupRouting(groups map[string]config.GroupConfig) config.RoutingConfig {
|
|
return config.RoutingConfig{
|
|
Router: config.RouterConfig{
|
|
Use: "group",
|
|
Settings: config.RouterSettings{Groups: groups},
|
|
},
|
|
}
|
|
}
|
|
|
|
// fakeProcess is an in-memory implementation of process.Process used to drive
|
|
// the routers through their state machine without spawning real upstreams.
|
|
type fakeProcess struct {
|
|
id string
|
|
|
|
mu sync.Mutex
|
|
state process.ProcessState
|
|
readyCh chan struct{}
|
|
stopCh chan struct{}
|
|
runStarted chan struct{} // closed on the first Run call
|
|
stopStarted chan struct{} // closed on the first Stop call
|
|
|
|
autoReady bool
|
|
|
|
// serveBlock, when non-nil, makes ServeHTTP receive from it before
|
|
// writing its response. Tests use this to hold a request in-flight.
|
|
// Closing the channel releases every blocked ServeHTTP caller.
|
|
serveBlock chan struct{}
|
|
// serveStarted is closed on the first ServeHTTP entry, letting tests
|
|
// wait deterministically for the handler to begin executing.
|
|
serveStarted chan struct{}
|
|
// stopBlock, when non-nil, makes Stop receive from it (after signalling
|
|
// stopStarted) before completing. Tests use this to prove that several
|
|
// Stop calls can be in flight simultaneously.
|
|
stopBlock chan struct{}
|
|
|
|
runCalls atomic.Int32
|
|
stopCalls atomic.Int32
|
|
serveCalls atomic.Int32
|
|
|
|
// inFlightServe counts ServeHTTP calls currently inside the handler.
|
|
// stoppedWhileServing flips true if Stop is ever called while that
|
|
// counter is non-zero — a direct, race-free observation of the
|
|
// "swap mid-request" anti-property.
|
|
inFlightServe atomic.Int32
|
|
stoppedWhileServing atomic.Bool
|
|
}
|
|
|
|
func newFakeProcess(id string) *fakeProcess {
|
|
return &fakeProcess{
|
|
id: id,
|
|
state: process.StateStopped,
|
|
readyCh: make(chan struct{}),
|
|
stopCh: make(chan struct{}),
|
|
runStarted: make(chan struct{}),
|
|
stopStarted: make(chan struct{}),
|
|
serveStarted: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
func (f *fakeProcess) setState(s process.ProcessState) {
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
f.state = s
|
|
if s == process.StateReady {
|
|
select {
|
|
case <-f.readyCh:
|
|
default:
|
|
close(f.readyCh)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (f *fakeProcess) State() process.ProcessState {
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
return f.state
|
|
}
|
|
|
|
func (f *fakeProcess) markReady() { f.setState(process.StateReady) }
|
|
|
|
func (f *fakeProcess) Run(_ time.Duration) error {
|
|
f.runCalls.Add(1)
|
|
f.mu.Lock()
|
|
if f.state != process.StateStopped {
|
|
s := f.state
|
|
f.mu.Unlock()
|
|
return fmt.Errorf("fakeProcess %s: Run called while %s", f.id, s)
|
|
}
|
|
f.state = process.StateStarting
|
|
sc := f.stopCh
|
|
select {
|
|
case <-f.runStarted:
|
|
default:
|
|
close(f.runStarted)
|
|
}
|
|
f.mu.Unlock()
|
|
|
|
if f.autoReady {
|
|
f.setState(process.StateReady)
|
|
}
|
|
<-sc
|
|
return nil
|
|
}
|
|
|
|
func (f *fakeProcess) Stop(_ time.Duration) error {
|
|
f.stopCalls.Add(1)
|
|
if f.inFlightServe.Load() > 0 {
|
|
f.stoppedWhileServing.Store(true)
|
|
}
|
|
f.mu.Lock()
|
|
select {
|
|
case <-f.stopStarted:
|
|
default:
|
|
close(f.stopStarted)
|
|
}
|
|
f.mu.Unlock()
|
|
|
|
// Test hook: hold Stop here so the test can prove multiple Stops are
|
|
// in flight at the same time before any of them complete.
|
|
if f.stopBlock != nil {
|
|
<-f.stopBlock
|
|
}
|
|
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
if f.state == process.StateStopped {
|
|
return nil
|
|
}
|
|
f.state = process.StateStopped
|
|
select {
|
|
case <-f.stopCh:
|
|
default:
|
|
close(f.stopCh)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f *fakeProcess) WaitReady(ctx context.Context) error {
|
|
f.mu.Lock()
|
|
if f.state == process.StateReady {
|
|
f.mu.Unlock()
|
|
return nil
|
|
}
|
|
rc := f.readyCh
|
|
f.mu.Unlock()
|
|
select {
|
|
case <-rc:
|
|
return nil
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
|
|
func (f *fakeProcess) Logger() *logmon.Monitor { return logmon.NewWriter(io.Discard) }
|
|
|
|
func (f *fakeProcess) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
|
|
f.serveCalls.Add(1)
|
|
f.inFlightServe.Add(1)
|
|
defer f.inFlightServe.Add(-1)
|
|
f.mu.Lock()
|
|
select {
|
|
case <-f.serveStarted:
|
|
default:
|
|
close(f.serveStarted)
|
|
}
|
|
f.mu.Unlock()
|
|
if f.serveBlock != nil {
|
|
<-f.serveBlock
|
|
}
|
|
w.WriteHeader(http.StatusOK)
|
|
fmt.Fprintf(w, "ok:%s", f.id)
|
|
}
|
|
|
|
// waitProcessed drains n events from ch, fataling on timeout. One event fires
|
|
// per handlerReq or swapDone fully absorbed by run().
|
|
func waitProcessed(t *testing.T, ch chan struct{}, n int) {
|
|
t.Helper()
|
|
for i := 0; i < n; i++ {
|
|
select {
|
|
case <-ch:
|
|
case <-time.After(2 * time.Second):
|
|
t.Fatalf("waitProcessed: only %d/%d events received", i, n)
|
|
}
|
|
}
|
|
}
|
|
|
|
func newRequest(model string) *http.Request {
|
|
body := fmt.Sprintf(`{"model":%q}`, model)
|
|
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
|
|
r.Header.Set("Content-Type", "application/json")
|
|
return r
|
|
}
|
|
|
|
func newRequestCtx(ctx context.Context, model string) *http.Request {
|
|
return newRequest(model).WithContext(ctx)
|
|
}
|