schedule,shared: move concurrency 429 limits into scheduler code (#849)

- make concurrency limiting the scheduler.Scheduler's responsibility
- eliminate the separate concurrency limit middleware 
- move concurrencyLimit logic into scheduler.FIFO to maintain backwards compatibility
- add HTTPError from #834 

Updates #834
This commit is contained in:
Benson Wong
2026-06-15 22:35:12 -07:00
committed by GitHub
parent 8e84b2ec4f
commit 6cf1317341
14 changed files with 278 additions and 171 deletions
+4 -5
View File
@@ -6,7 +6,6 @@ import (
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/process"
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
)
type Group struct {
@@ -30,10 +29,10 @@ func NewGroup(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Group
}
processes := make(map[string]process.Process, len(modelToGroup))
base := newBaseRouter("group", conf, processes, proxylog,
func(name string, logger *logmon.Monitor, eff scheduler.Effects) scheduler.Scheduler {
return scheduler.NewFIFO(name, logger, swapper, conf.Routing.Scheduler.Settings.Fifo, eff)
})
base, err := newBaseRouter("group", conf, processes, proxylog, swapper)
if err != nil {
return nil, fmt.Errorf("creating base router: %w", err)
}
for mid := range modelToGroup {
modelCfg, _, ok := conf.FindConfig(mid)