schedule,shared: move concurrency 429 limits into scheduler code (#849)

- make concurrency limiting the scheduler.Scheduler's responsibility
- eliminate the separate concurrency limit middleware 
- move concurrencyLimit logic into scheduler.FIFO to maintain backwards compatibility
- add HTTPError from #834 

Updates #834
This commit is contained in:
Benson Wong
2026-06-15 22:35:12 -07:00
committed by GitHub
parent 8e84b2ec4f
commit 6cf1317341
14 changed files with 278 additions and 171 deletions
+4 -5
View File
@@ -10,7 +10,6 @@ import (
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/process"
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
)
// newTestMatrix builds a Matrix router from supplied processes, bypassing
@@ -22,10 +21,10 @@ func newTestMatrix(t *testing.T, conf config.Config, expanded []config.ExpandedS
solver: newMatrixSolver(expanded, evictCosts),
logger: logger,
}
base := newBaseRouter("matrix", conf, processes, logger,
func(name string, l *logmon.Monitor, eff scheduler.Effects) scheduler.Scheduler {
return scheduler.NewFIFO(name, l, swapper, conf.Routing.Scheduler.Settings.Fifo, eff)
})
base, err := newBaseRouter("matrix", conf, processes, logger, swapper)
if err != nil {
t.Fatalf("newBaseRouter: %v", err)
}
base.testProcessed = make(chan struct{}, 64)
r := &Matrix{baseRouter: base}
go base.run()