Implement new scheduler (#823)

- introduce internal/router/scheduler to decouple routing, swapping and
queuing into interface contracts.
- introduce a new `routing` configuration section that supersedes
`matrix` and `group` while maintaining backwards compatibility
- add FIFO scheduler with prioritized queuing 
- add internal/router/design.md as developer documentation on
implementing new schedulers and routers

Fixes #797
This commit is contained in:
Benson Wong
2026-06-10 20:34:25 -07:00
committed by GitHub
parent 0cfe5a6639
commit 9b3a33d7b9
26 changed files with 2398 additions and 1330 deletions
+17 -45
View File
@@ -2,11 +2,11 @@ package router
import (
"fmt"
"sort"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/process"
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
)
type Matrix struct {
@@ -14,20 +14,23 @@ type Matrix struct {
}
func NewMatrix(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Matrix, error) {
if conf.Matrix == nil {
mtx := conf.Routing.Router.Settings.Matrix
if mtx == nil {
return nil, fmt.Errorf("matrix router requires a matrix configuration")
}
planner := &matrixPlanner{
solver: newMatrixSolver(conf.ExpandedSets, conf.Matrix.ResolvedEvictCosts()),
swapper := &matrixSwapper{
solver: newMatrixSolver(mtx.ExpandedSets, mtx.ResolvedEvictCosts()),
logger: proxylog,
}
// Build a process for every model in the config. Any model can run alone
// even if it is not part of a set; this mirrors proxy.NewMatrix.
processes := make(map[string]process.Process, len(conf.Models))
base := newBaseRouter("matrix", conf, processes, planner, proxylog)
planner.processes = processes
base := newBaseRouter("matrix", conf, processes, proxylog,
func(name string, logger *logmon.Monitor, eff scheduler.Effects) scheduler.Scheduler {
return scheduler.NewFIFO(name, logger, swapper, conf.Routing.Scheduler.Settings.Fifo, eff)
})
for mid, modelCfg := range conf.Models {
procLog := logmon.NewWriter(upstreamlog)
@@ -45,20 +48,18 @@ func NewMatrix(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Matr
return r, nil
}
// matrixPlanner decides evictions by asking the matrix solver against the
// current running set.
type matrixPlanner struct {
solver *matrixSolver
processes map[string]process.Process
logger *logmon.Monitor
// matrixSwapper decides evictions by asking the matrix solver against the
// running set the scheduler hands it.
type matrixSwapper struct {
solver *matrixSolver
logger *logmon.Monitor
}
func (p *matrixPlanner) EvictionFor(target string, alsoRunning []string) []string {
return p.solver.Solve(target, p.runningSet(alsoRunning)).Evict
func (p *matrixSwapper) EvictionFor(target string, running []string) []string {
return p.solver.Solve(target, running).Evict
}
func (p *matrixPlanner) OnSwapStart(target string) {
running := p.runningModels()
func (p *matrixSwapper) OnSwapStart(target string, running []string) {
result := p.solver.Solve(target, running)
switch {
case len(result.Evict) > 0:
@@ -70,32 +71,3 @@ func (p *matrixPlanner) OnSwapStart(target string) {
p.logger.Debugf("matrix: model=%s already running in set=%s dsl=%q", target, result.SetName, result.DSL)
}
}
func (p *matrixPlanner) runningModels() []string {
return p.runningSet(nil)
}
// runningSet returns the union of live processes (State != Stopped/Shutdown)
// and any extra IDs the baseRouter has already committed to loading but which
// the process state machine has not yet reflected.
func (p *matrixPlanner) runningSet(alsoRunning []string) []string {
seen := make(map[string]struct{}, len(p.processes))
var running []string
for id, proc := range p.processes {
st := proc.State()
if st == process.StateStopped || st == process.StateShutdown {
continue
}
seen[id] = struct{}{}
running = append(running, id)
}
for _, id := range alsoRunning {
if _, dup := seen[id]; dup {
continue
}
seen[id] = struct{}{}
running = append(running, id)
}
sort.Strings(running)
return running
}