Implement new scheduler (#823)

- introduce internal/router/scheduler to decouple routing, swapping and
queuing into interface contracts.
- introduce a new `routing` configuration section that supersedes
`matrix` and `group` while maintaining backwards compatibility
- add FIFO scheduler with prioritized queuing 
- add internal/router/design.md as developer documentation on
implementing new schedulers and routers

Fixes #797
This commit is contained in:
Benson Wong
2026-06-10 20:34:25 -07:00
committed by GitHub
parent 0cfe5a6639
commit 9b3a33d7b9
26 changed files with 2398 additions and 1330 deletions
+171
View File
@@ -1544,3 +1544,174 @@ peers:
assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
}
// twoModels is a minimal models block reused by the routing tests below.
const twoModels = `
models:
gemma:
cmd: echo gemma
proxy: http://localhost:8080
qwen:
cmd: echo qwen
proxy: http://localhost:8081
`
func TestConfig_Routing_LegacyTopLevelGroups(t *testing.T) {
yaml := twoModels + `
groups:
g1:
members: [gemma, qwen]
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "group", cfg.Routing.Router.Use)
// default group injected for orphaned models (none here) still leaves g1
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
}
func TestConfig_Routing_LegacyTopLevelMatrix(t *testing.T) {
yaml := twoModels + `
matrix:
vars:
g: gemma
q: qwen
sets:
combo: "g | q"
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
require.NotNil(t, cfg.Routing.Router.Settings.Matrix)
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
}
func TestConfig_Routing_RouterUseMatrix(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: matrix
settings:
matrix:
vars:
g: gemma
q: qwen
sets:
combo: "g | q"
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
require.NotNil(t, cfg.Routing.Router.Settings.Matrix)
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
}
func TestConfig_Routing_RouterUseGroup(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: group
settings:
groups:
g1:
members: [gemma, qwen]
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "group", cfg.Routing.Router.Use)
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
}
func TestConfig_Routing_DefaultsToGroup(t *testing.T) {
cfg, err := LoadConfigFromReader(strings.NewReader(twoModels))
require.NoError(t, err)
assert.Equal(t, "group", cfg.Routing.Router.Use)
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
}
func TestConfig_Routing_LegacyAndRoutingConflict(t *testing.T) {
yaml := twoModels + `
groups:
g1:
members: [gemma, qwen]
routing:
router:
use: group
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "migrate")
}
func TestConfig_Routing_RouterUseMatrixWithoutSettings(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: matrix
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "routing.router.settings.matrix is not set")
}
// Both groups and matrix may be defined under routing.router.settings;
// routing.router.use selects which one is active.
func TestConfig_Routing_RouterSettingsBothGroupsAndMatrix(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: group
settings:
groups:
g1:
members: [gemma, qwen]
matrix:
sets:
s: "gemma"
`
config, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
// use: group means groups are active and matrix is ignored
assert.Equal(t, "group", config.Routing.Router.Use)
assert.Nil(t, config.Matrix)
assert.Contains(t, config.Groups, "g1")
}
func TestConfig_Routing_UnknownRouter(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: bogus
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "unknown router")
}
func TestConfig_Routing_FifoPriorityUnknownModel(t *testing.T) {
yaml := twoModels + `
routing:
scheduler:
settings:
fifo:
priority:
nope: 5
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "unknown model")
}
func TestConfig_Routing_FifoPriorityKnownModel(t *testing.T) {
yaml := twoModels + `
routing:
scheduler:
settings:
fifo:
priority:
gemma: 5
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, 5, cfg.Routing.Scheduler.Settings.Fifo.Priority["gemma"])
}