internal/router/scheduler: add serial scheduler, default on this fork
Add a strict one-model-at-a-time scheduler. Requests run in exact arrival order; at most one runs at a time; switching to a different model evicts every other running model first so a single model occupies memory at a time. Unlike fifo it never reorders or batches same-model requests, and it ignores group/matrix co-residency entirely, making the single-model guarantee a property of the scheduler rather than the config. - new Serial scheduler implementing the Scheduler interface - register "serial" in scheduler.New; default routing.scheduler.use to "serial" at config load (fifo still selectable for upstream behavior) - update config schema, example config, and config defaults tests Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1572,7 +1572,7 @@ groups:
|
||||
assert.Equal(t, "group", cfg.Routing.Router.Use)
|
||||
// default group injected for orphaned models (none here) still leaves g1
|
||||
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
|
||||
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
|
||||
assert.Equal(t, "serial", cfg.Routing.Scheduler.Use)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_LegacyTopLevelMatrix(t *testing.T) {
|
||||
@@ -1631,7 +1631,7 @@ func TestConfig_Routing_DefaultsToGroup(t *testing.T) {
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(twoModels))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "group", cfg.Routing.Router.Use)
|
||||
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
|
||||
assert.Equal(t, "serial", cfg.Routing.Scheduler.Use)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_LegacyAndRoutingConflict(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user