internal/router/scheduler: add serial scheduler, default on this fork
Validate JSON Schema / validate-schema (push) Successful in 9m53s
Linux CI / run-tests (push) Failing after 15m57s
Windows CI / run-tests (push) Has been cancelled

Add a strict one-model-at-a-time scheduler. Requests run in exact
arrival order; at most one runs at a time; switching to a different
model evicts every other running model first so a single model occupies
memory at a time. Unlike fifo it never reorders or batches same-model
requests, and it ignores group/matrix co-residency entirely, making the
single-model guarantee a property of the scheduler rather than the config.

- new Serial scheduler implementing the Scheduler interface
- register "serial" in scheduler.New; default routing.scheduler.use to
  "serial" at config load (fifo still selectable for upstream behavior)
- update config schema, example config, and config defaults tests

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-28 12:17:32 -04:00
parent 0a25b3bd31
commit 542b79dacf
9 changed files with 683 additions and 15 deletions
+1 -1
View File
@@ -277,7 +277,7 @@ groups:
},
},
Scheduler: SchedulerConfig{
Use: "fifo",
Use: "serial",
},
},
}
+2 -2
View File
@@ -1572,7 +1572,7 @@ groups:
assert.Equal(t, "group", cfg.Routing.Router.Use)
// default group injected for orphaned models (none here) still leaves g1
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
assert.Equal(t, "serial", cfg.Routing.Scheduler.Use)
}
func TestConfig_Routing_LegacyTopLevelMatrix(t *testing.T) {
@@ -1631,7 +1631,7 @@ func TestConfig_Routing_DefaultsToGroup(t *testing.T) {
cfg, err := LoadConfigFromReader(strings.NewReader(twoModels))
require.NoError(t, err)
assert.Equal(t, "group", cfg.Routing.Router.Use)
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
assert.Equal(t, "serial", cfg.Routing.Scheduler.Use)
}
func TestConfig_Routing_LegacyAndRoutingConflict(t *testing.T) {
+1 -1
View File
@@ -266,7 +266,7 @@ groups:
},
},
Scheduler: SchedulerConfig{
Use: "fifo",
Use: "serial",
},
},
}
+8 -3
View File
@@ -358,11 +358,16 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
config.Routing.Router.Settings.Matrix = config.Matrix
config.Routing.Router.Settings.Groups = config.Groups
// This fork defaults to the "serial" scheduler: one model loaded at a time,
// requests served in strict arrival order. Set use: fifo for the upstream
// throughput-oriented behavior that batches same-model requests.
if config.Routing.Scheduler.Use == "" {
config.Routing.Scheduler.Use = "fifo"
config.Routing.Scheduler.Use = "serial"
}
if config.Routing.Scheduler.Use != "fifo" {
return Config{}, fmt.Errorf("routing.scheduler.use: unknown scheduler %q (valid: fifo)", config.Routing.Scheduler.Use)
switch config.Routing.Scheduler.Use {
case "fifo", "serial":
default:
return Config{}, fmt.Errorf("routing.scheduler.use: unknown scheduler %q (valid: fifo, serial)", config.Routing.Scheduler.Use)
}
for modelID := range config.Routing.Scheduler.Settings.Fifo.Priority {
if _, found := config.RealModelName(modelID); !found {