internal/router/scheduler: add serial scheduler, default on this fork
Add a strict one-model-at-a-time scheduler. Requests run in exact arrival order; at most one runs at a time; switching to a different model evicts every other running model first so a single model occupies memory at a time. Unlike fifo it never reorders or batches same-model requests, and it ignores group/matrix co-residency entirely, making the single-model guarantee a property of the scheduler rather than the config. - new Serial scheduler implementing the Scheduler interface - register "serial" in scheduler.New; default routing.scheduler.use to "serial" at config load (fifo still selectable for upstream behavior) - update config schema, example config, and config defaults tests Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -277,7 +277,7 @@ groups:
|
||||
},
|
||||
},
|
||||
Scheduler: SchedulerConfig{
|
||||
Use: "fifo",
|
||||
Use: "serial",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1572,7 +1572,7 @@ groups:
|
||||
assert.Equal(t, "group", cfg.Routing.Router.Use)
|
||||
// default group injected for orphaned models (none here) still leaves g1
|
||||
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
|
||||
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
|
||||
assert.Equal(t, "serial", cfg.Routing.Scheduler.Use)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_LegacyTopLevelMatrix(t *testing.T) {
|
||||
@@ -1631,7 +1631,7 @@ func TestConfig_Routing_DefaultsToGroup(t *testing.T) {
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(twoModels))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "group", cfg.Routing.Router.Use)
|
||||
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
|
||||
assert.Equal(t, "serial", cfg.Routing.Scheduler.Use)
|
||||
}
|
||||
|
||||
func TestConfig_Routing_LegacyAndRoutingConflict(t *testing.T) {
|
||||
|
||||
@@ -266,7 +266,7 @@ groups:
|
||||
},
|
||||
},
|
||||
Scheduler: SchedulerConfig{
|
||||
Use: "fifo",
|
||||
Use: "serial",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -358,11 +358,16 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
config.Routing.Router.Settings.Matrix = config.Matrix
|
||||
config.Routing.Router.Settings.Groups = config.Groups
|
||||
|
||||
// This fork defaults to the "serial" scheduler: one model loaded at a time,
|
||||
// requests served in strict arrival order. Set use: fifo for the upstream
|
||||
// throughput-oriented behavior that batches same-model requests.
|
||||
if config.Routing.Scheduler.Use == "" {
|
||||
config.Routing.Scheduler.Use = "fifo"
|
||||
config.Routing.Scheduler.Use = "serial"
|
||||
}
|
||||
if config.Routing.Scheduler.Use != "fifo" {
|
||||
return Config{}, fmt.Errorf("routing.scheduler.use: unknown scheduler %q (valid: fifo)", config.Routing.Scheduler.Use)
|
||||
switch config.Routing.Scheduler.Use {
|
||||
case "fifo", "serial":
|
||||
default:
|
||||
return Config{}, fmt.Errorf("routing.scheduler.use: unknown scheduler %q (valid: fifo, serial)", config.Routing.Scheduler.Use)
|
||||
}
|
||||
for modelID := range config.Routing.Scheduler.Settings.Fifo.Priority {
|
||||
if _, found := config.RealModelName(modelID); !found {
|
||||
|
||||
Reference in New Issue
Block a user