schedule,shared: move concurrency 429 limits into scheduler code (#849)
- make concurrency limiting the scheduler.Scheduler's responsibility - eliminate the separate concurrency limit middleware - move concurrencyLimit logic into scheduler.FIFO to maintain backwards compatibility - add HTTPError from #834 Updates #834
This commit is contained in:
@@ -6,7 +6,6 @@ import (
|
||||
"github.com/mostlygeek/llama-swap/internal/config"
|
||||
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||
"github.com/mostlygeek/llama-swap/internal/process"
|
||||
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
|
||||
)
|
||||
|
||||
type Group struct {
|
||||
@@ -30,10 +29,10 @@ func NewGroup(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Group
|
||||
}
|
||||
|
||||
processes := make(map[string]process.Process, len(modelToGroup))
|
||||
base := newBaseRouter("group", conf, processes, proxylog,
|
||||
func(name string, logger *logmon.Monitor, eff scheduler.Effects) scheduler.Scheduler {
|
||||
return scheduler.NewFIFO(name, logger, swapper, conf.Routing.Scheduler.Settings.Fifo, eff)
|
||||
})
|
||||
base, err := newBaseRouter("group", conf, processes, proxylog, swapper)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating base router: %w", err)
|
||||
}
|
||||
|
||||
for mid := range modelToGroup {
|
||||
modelCfg, _, ok := conf.FindConfig(mid)
|
||||
|
||||
Reference in New Issue
Block a user