Implement new scheduler (#823)

- introduce internal/router/scheduler to decouple routing, swapping and
queuing into interface contracts.
- introduce a new `routing` configuration section that supersedes
`matrix` and `group` while maintaining backwards compatibility
- add FIFO scheduler with prioritized queuing 
- add internal/router/design.md as developer documentation on
implementing new schedulers and routers

Fixes #797
This commit is contained in:
Benson Wong
2026-06-10 20:34:25 -07:00
committed by GitHub
parent 0cfe5a6639
commit 9b3a33d7b9
26 changed files with 2398 additions and 1330 deletions
+89 -6
View File
@@ -129,13 +129,16 @@ type Config struct {
GlobalTTL int `yaml:"globalTTL"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Profiles map[string][]string `yaml:"profiles"`
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
// swap matrix: solver-based alternative to groups
Matrix *MatrixConfig `yaml:"matrix"`
// routing is the canonical source for swap/scheduling configuration.
// New code must read Routing, never the backwards-compat fields below.
Routing RoutingConfig `yaml:"routing"`
// populated during validation when matrix is configured
ExpandedSets []ExpandedSet `yaml:"-"`
// Groups and Matrix are permanent backwards-compat input fields for the
// legacy top-level `groups:`/`matrix:` keys. They are normalized into
// Routing by LoadConfigFromReader. New code must not read them directly.
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
Matrix *MatrixConfig `yaml:"matrix"`
// for key/value replacements in model's cmd, cmdStop, proxy, checkEndPoint
Macros MacroList `yaml:"macros"`
@@ -162,6 +165,35 @@ type Config struct {
Peers PeerDictionaryConfig `yaml:"peers"`
}
// RoutingConfig is the canonical, normalized routing/scheduling configuration.
type RoutingConfig struct {
Scheduler SchedulerConfig `yaml:"scheduler"`
Router RouterConfig `yaml:"router"`
}
type SchedulerConfig struct {
Use string `yaml:"use"` // default "fifo"
Settings SchedulerSettings `yaml:"settings"`
}
type SchedulerSettings struct {
Fifo FifoConfig `yaml:"fifo"`
}
type FifoConfig struct {
Priority map[string]int `yaml:"priority"` // model ID -> priority, default 0
}
type RouterConfig struct {
Use string `yaml:"use"` // "group" (default) | "matrix"
Settings RouterSettings `yaml:"settings"`
}
type RouterSettings struct {
Groups map[string]GroupConfig `yaml:"groups"`
Matrix *MatrixConfig `yaml:"matrix"`
}
func (c *Config) RealModelName(search string) (string, bool) {
if _, found := c.Models[search]; found {
return search, true
@@ -455,6 +487,34 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
config.Models[modelId] = modelConfig
}
// Normalize routing config. The legacy top-level `matrix`/`groups` keys and
// the new `routing.router` block are mutually exclusive: a config may use
// either style, never both.
hasTopLevel := config.Matrix != nil || len(config.Groups) > 0
rtr := config.Routing.Router
hasRouting := rtr.Use != "" || rtr.Settings.Matrix != nil || len(rtr.Settings.Groups) > 0
if hasTopLevel && hasRouting {
return Config{}, fmt.Errorf("config uses both the legacy top-level 'matrix'/'groups' keys and the new 'routing.router' block; please migrate the top-level keys into 'routing.router' and remove them")
}
if !hasTopLevel {
// Both groups and matrix may be defined under routing.router.settings;
// routing.router.use selects which one is active, so there is no conflict.
rs := config.Routing.Router.Settings
switch config.Routing.Router.Use {
case "matrix":
if rs.Matrix == nil {
return Config{}, fmt.Errorf("routing.router.use is 'matrix' but routing.router.settings.matrix is not set")
}
config.Matrix = rs.Matrix
case "group", "":
config.Groups = rs.Groups
default:
return Config{}, fmt.Errorf("routing.router.use: unknown router %q (valid: group, matrix)", config.Routing.Router.Use)
}
}
// groups XOR matrix
if config.Matrix != nil && len(config.Groups) > 0 {
return Config{}, fmt.Errorf("config cannot use both 'groups' and 'matrix'")
@@ -465,7 +525,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
if err != nil {
return Config{}, fmt.Errorf("matrix: %w", err)
}
config.ExpandedSets = expandedSets
config.Matrix.ExpandedSets = expandedSets
} else {
config = AddDefaultGroupToConfig(config)
@@ -487,6 +547,29 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
}
}
// Build the canonical Config.Routing from the effective result. Both legacy
// and new-style configs converge here. The Matrix pointer is shared so
// ExpandedSets stays in one place.
if config.Matrix != nil {
config.Routing.Router.Use = "matrix"
} else {
config.Routing.Router.Use = "group"
}
config.Routing.Router.Settings.Matrix = config.Matrix
config.Routing.Router.Settings.Groups = config.Groups
if config.Routing.Scheduler.Use == "" {
config.Routing.Scheduler.Use = "fifo"
}
if config.Routing.Scheduler.Use != "fifo" {
return Config{}, fmt.Errorf("routing.scheduler.use: unknown scheduler %q (valid: fifo)", config.Routing.Scheduler.Use)
}
for modelID := range config.Routing.Scheduler.Settings.Fifo.Priority {
if _, found := config.RealModelName(modelID); !found {
return Config{}, fmt.Errorf("routing.scheduler.settings.fifo.priority references unknown model %q", modelID)
}
}
// Clean up hooks preload
if len(config.Hooks.OnStartup.Preload) > 0 {
var toPreload []string
+28 -15
View File
@@ -173,6 +173,25 @@ groups:
IdleConn: 90,
}
expectedGroups := map[string]GroupConfig{
DEFAULT_GROUP_ID: {
Swap: true,
Exclusive: true,
Members: []string{"model1", "model3"},
},
"group1": {
Swap: true,
Exclusive: false,
Members: []string{"model2"},
},
"forever": {
Swap: true,
Exclusive: false,
Persistent: true,
Members: []string{"model4"},
},
}
expected := Config{
LogLevel: "info",
LogTimeFormat: "",
@@ -246,22 +265,16 @@ groups:
"m2": "model2",
"mthree": "model3",
},
Groups: map[string]GroupConfig{
DEFAULT_GROUP_ID: {
Swap: true,
Exclusive: true,
Members: []string{"model1", "model3"},
Groups: expectedGroups,
Routing: RoutingConfig{
Router: RouterConfig{
Use: "group",
Settings: RouterSettings{
Groups: expectedGroups,
},
},
"group1": {
Swap: true,
Exclusive: false,
Members: []string{"model2"},
},
"forever": {
Swap: true,
Exclusive: false,
Persistent: true,
Members: []string{"model4"},
Scheduler: SchedulerConfig{
Use: "fifo",
},
},
}
+60
View File
@@ -0,0 +1,60 @@
package config
import (
"encoding/json"
"os"
"testing"
"github.com/google/jsonschema-go/jsonschema"
"gopkg.in/yaml.v3"
)
// TestConfig_ExampleMatchesSchema validates that config.example.yaml conforms to
// config-schema.json. Both files live at the repository root.
func TestConfig_ExampleMatchesSchema(t *testing.T) {
const (
schemaPath = "../../config-schema.json"
examplePath = "../../config.example.yaml"
)
schemaBytes, err := os.ReadFile(schemaPath)
if err != nil {
t.Fatalf("reading %s: %v", schemaPath, err)
}
var schema jsonschema.Schema
if err := json.Unmarshal(schemaBytes, &schema); err != nil {
t.Fatalf("unmarshalling schema: %v", err)
}
resolved, err := schema.Resolve(&jsonschema.ResolveOptions{
BaseURI: "https://github.com/mostlygeek/llama-swap/",
})
if err != nil {
t.Fatalf("resolving schema: %v", err)
}
exampleBytes, err := os.ReadFile(examplePath)
if err != nil {
t.Fatalf("reading %s: %v", examplePath, err)
}
// Convert YAML to a JSON-like value so numbers and keys match what the
// validator expects.
var yamlValue any
if err := yaml.Unmarshal(exampleBytes, &yamlValue); err != nil {
t.Fatalf("unmarshalling example yaml: %v", err)
}
jsonBytes, err := json.Marshal(yamlValue)
if err != nil {
t.Fatalf("converting example to json: %v", err)
}
var instance any
if err := json.Unmarshal(jsonBytes, &instance); err != nil {
t.Fatalf("unmarshalling example json: %v", err)
}
if err := resolved.Validate(instance); err != nil {
t.Fatalf("config.example.yaml does not match config-schema.json:\n%v", err)
}
}
+171
View File
@@ -1544,3 +1544,174 @@ peers:
assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
}
// twoModels is a minimal models block reused by the routing tests below.
const twoModels = `
models:
gemma:
cmd: echo gemma
proxy: http://localhost:8080
qwen:
cmd: echo qwen
proxy: http://localhost:8081
`
func TestConfig_Routing_LegacyTopLevelGroups(t *testing.T) {
yaml := twoModels + `
groups:
g1:
members: [gemma, qwen]
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "group", cfg.Routing.Router.Use)
// default group injected for orphaned models (none here) still leaves g1
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
}
func TestConfig_Routing_LegacyTopLevelMatrix(t *testing.T) {
yaml := twoModels + `
matrix:
vars:
g: gemma
q: qwen
sets:
combo: "g | q"
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
require.NotNil(t, cfg.Routing.Router.Settings.Matrix)
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
}
func TestConfig_Routing_RouterUseMatrix(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: matrix
settings:
matrix:
vars:
g: gemma
q: qwen
sets:
combo: "g | q"
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
require.NotNil(t, cfg.Routing.Router.Settings.Matrix)
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
}
func TestConfig_Routing_RouterUseGroup(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: group
settings:
groups:
g1:
members: [gemma, qwen]
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, "group", cfg.Routing.Router.Use)
assert.Contains(t, cfg.Routing.Router.Settings.Groups, "g1")
}
func TestConfig_Routing_DefaultsToGroup(t *testing.T) {
cfg, err := LoadConfigFromReader(strings.NewReader(twoModels))
require.NoError(t, err)
assert.Equal(t, "group", cfg.Routing.Router.Use)
assert.Equal(t, "fifo", cfg.Routing.Scheduler.Use)
}
func TestConfig_Routing_LegacyAndRoutingConflict(t *testing.T) {
yaml := twoModels + `
groups:
g1:
members: [gemma, qwen]
routing:
router:
use: group
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "migrate")
}
func TestConfig_Routing_RouterUseMatrixWithoutSettings(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: matrix
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "routing.router.settings.matrix is not set")
}
// Both groups and matrix may be defined under routing.router.settings;
// routing.router.use selects which one is active.
func TestConfig_Routing_RouterSettingsBothGroupsAndMatrix(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: group
settings:
groups:
g1:
members: [gemma, qwen]
matrix:
sets:
s: "gemma"
`
config, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
// use: group means groups are active and matrix is ignored
assert.Equal(t, "group", config.Routing.Router.Use)
assert.Nil(t, config.Matrix)
assert.Contains(t, config.Groups, "g1")
}
func TestConfig_Routing_UnknownRouter(t *testing.T) {
yaml := twoModels + `
routing:
router:
use: bogus
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "unknown router")
}
func TestConfig_Routing_FifoPriorityUnknownModel(t *testing.T) {
yaml := twoModels + `
routing:
scheduler:
settings:
fifo:
priority:
nope: 5
`
_, err := LoadConfigFromReader(strings.NewReader(yaml))
require.Error(t, err)
assert.Contains(t, err.Error(), "unknown model")
}
func TestConfig_Routing_FifoPriorityKnownModel(t *testing.T) {
yaml := twoModels + `
routing:
scheduler:
settings:
fifo:
priority:
gemma: 5
`
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.Equal(t, 5, cfg.Routing.Scheduler.Settings.Fifo.Priority["gemma"])
}
+28 -15
View File
@@ -165,6 +165,25 @@ groups:
IdleConn: 90,
}
expectedGroups := map[string]GroupConfig{
DEFAULT_GROUP_ID: {
Swap: true,
Exclusive: true,
Members: []string{"model1", "model3"},
},
"group1": {
Swap: true,
Exclusive: false,
Members: []string{"model2"},
},
"forever": {
Swap: true,
Exclusive: false,
Persistent: true,
Members: []string{"model4"},
},
}
expected := Config{
LogLevel: "info",
LogTimeFormat: "",
@@ -235,22 +254,16 @@ groups:
"m2": "model2",
"mthree": "model3",
},
Groups: map[string]GroupConfig{
DEFAULT_GROUP_ID: {
Swap: true,
Exclusive: true,
Members: []string{"model1", "model3"},
Groups: expectedGroups,
Routing: RoutingConfig{
Router: RouterConfig{
Use: "group",
Settings: RouterSettings{
Groups: expectedGroups,
},
},
"group1": {
Swap: true,
Exclusive: false,
Members: []string{"model2"},
},
"forever": {
Swap: true,
Exclusive: false,
Persistent: true,
Members: []string{"model4"},
Scheduler: SchedulerConfig{
Use: "fifo",
},
},
}
+3
View File
@@ -15,6 +15,9 @@ type MatrixConfig struct {
Var map[string]string `yaml:"vars"`
EvictCosts map[string]int `yaml:"evict_costs"`
Sets OrderedSets `yaml:"sets"`
// populated by ValidateMatrix; not settable from yaml
ExpandedSets []ExpandedSet `yaml:"-"`
}
// SetEntry is a single named set with its DSL expression.
+3 -1
View File
@@ -289,7 +289,9 @@ matrix:
cfg, err := LoadConfigFromReader(strings.NewReader(yaml))
require.NoError(t, err)
assert.NotNil(t, cfg.Matrix)
assert.Len(t, cfg.ExpandedSets, 2)
assert.Len(t, cfg.Matrix.ExpandedSets, 2)
assert.Equal(t, "matrix", cfg.Routing.Router.Use)
assert.Len(t, cfg.Routing.Router.Settings.Matrix.ExpandedSets, 2)
// Groups should be empty when matrix is used
assert.Empty(t, cfg.Groups)
}