b424261aca
Lifts mort's pkg/logic/llms into executus/model, decoupled from mort: - tiers.go: the tier resolver now reads a host-supplied config.Source under "model.tier.<name>" with host-supplied fallbacks (Configure(cfg, defaults, ttl)), instead of convar.Manager. Tier NAMES + specs are host config; the resolution mechanism (cache, reasoning-suffix dialect, chain validation) is generic. No tier names hard-coded in the harness. - sink.go: usage/trace recording inverted off mort's llmusage/llmtrace into UsageSink / TraceSink seams + a model-owned Span, with nil-safe context attribution helpers (WithModel/WithTraceID/WithUsageTool/WithUsageUser). Both sinks optional (nil = off) so a light host records nothing. - lane decoration repointed to executus/lane; utils.Errorf -> fmt.Errorf. - call.go keeps GenerateWith[T] (instrumented structured output) — this is the structured-output primitive; no separate structured/ package. - llmmeta moved over model/ (the meta-LLM helper: tier allowlist + JSON retry + ledger). Its tests configure a minimal tier table via TestMain. New tests cover the inversion: config overrides fallback, tier registration, reasoning-suffix survival, nested-tier rejection, nil-sink no-ops. Full module: go build/vet/test -race green; core go.sum still free of gorm/redis/discordgo/sqlite. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
111 lines
4.2 KiB
Go
111 lines
4.2 KiB
Go
// Package llms — wiring.go: the production boot hook that rebuilds the
|
|
// package registry with the lane registry, the failover convars, and the
|
|
// failover-event observer.
|
|
//
|
|
// Why a dedicated helper (vs spreading registry construction through
|
|
// mort.go): the chatbot regression test in lane_chatbot_test.go and the
|
|
// production boot path must call the SAME wiring code. Historically
|
|
// mort.go skipped the lane wiring entirely (lanes were defined but never
|
|
// installed — 30+ skill_runs in production with 0 skill_queue_jobs rows);
|
|
// concentrating the install here means a regression in one wires fails
|
|
// the test for the other.
|
|
package model
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"time"
|
|
|
|
majordomo "gitea.stevedudenhoeffer.com/steve/majordomo"
|
|
)
|
|
|
|
// WireOptions configures Wire. The zero value rebuilds the registry with
|
|
// no lanes and default failover behavior.
|
|
type WireOptions struct {
|
|
// Lanes is the lane registry every provider is decorated with. nil
|
|
// disables lane queueing (calls pass straight through) but keeps
|
|
// error attribution for the failover log.
|
|
Lanes LaneRegistry
|
|
|
|
// FailoverMaxRetries maps the llms.failover.max_retries convar onto
|
|
// majordomo's ChainConfig.TransientRetries (same-target retries after
|
|
// a transient error). <= 0 keeps majordomo's default (1).
|
|
FailoverMaxRetries int
|
|
|
|
// FailoverCooldown maps the llms.failover.cooldown_seconds convar
|
|
// onto health.Config.BaseCooldown. majordomo grows the cooldown
|
|
// exponentially from this base per consecutive bench; the cap is
|
|
// max(FailoverCooldown, 5m) so the operator's dial dominates.
|
|
// <= 0 keeps the mort default (300s).
|
|
FailoverCooldown time.Duration
|
|
|
|
// FailoverObserver receives one event per failover decision (failed
|
|
// attempt, bench, benched-skip). Wire it to failoverlog.NewObserver.
|
|
// Attribution (caller/run/prompts) rides on the event's error — see
|
|
// CallInfoFromError.
|
|
FailoverObserver func(majordomo.FailoverEvent)
|
|
}
|
|
|
|
// Wire rebuilds the package registry from opts and installs it. Call once
|
|
// at boot, after the lane registry and the failover convars exist (and
|
|
// after Init for DB-backed tiers — though Init and Wire are order-
|
|
// independent: the tier resolver is consulted through a delegating
|
|
// indirection).
|
|
//
|
|
// Rebuilding discards in-memory health/bench state — Wire is a boot-time
|
|
// operation, not a runtime toggle.
|
|
//
|
|
// When Lanes is non-nil, the well-known lanes (KnownLanes) are eagerly
|
|
// registered so admin dashboards have baseline state from the moment mort
|
|
// starts instead of "no lanes registered" until the first LLM call.
|
|
//
|
|
// Returns the installed registry for inspection (tests, health surfaces).
|
|
func Wire(ctx context.Context, opts WireOptions) *majordomo.Registry {
|
|
r := buildRegistry(buildConfig{
|
|
lanes: opts.Lanes,
|
|
maxRetries: opts.FailoverMaxRetries,
|
|
cooldown: opts.FailoverCooldown,
|
|
observer: opts.FailoverObserver,
|
|
})
|
|
setRegistry(r)
|
|
|
|
if opts.Lanes != nil {
|
|
names := KnownLanes()
|
|
for _, name := range names {
|
|
opts.Lanes.GetOrCreate(ctx, name)
|
|
}
|
|
slog.Info("llms: wired lane-aware registry", "lanes", len(names))
|
|
} else {
|
|
slog.Warn("llms: Wire called without a lane registry — lane queueing is inert")
|
|
}
|
|
return r
|
|
}
|
|
|
|
// KnownLanes returns the well-known lane names the LLM transport resolves
|
|
// to. Eager-registering these at boot gives admin dashboards
|
|
// (`/skills/admin/queues`, `.skill admin queue`) a baseline view from the
|
|
// moment mort starts — without this, the dashboard reads "no lanes
|
|
// registered" until the first chatbot/skill call materialises the lane
|
|
// via lazy GetOrCreate.
|
|
//
|
|
// Why this list (and not "every lane name ever"): these are the ones
|
|
// LaneFor in lane_mapping.go can produce for a real model spec. Future
|
|
// non-LLM lanes (e.g. a future image-generation lane) should be eagerly
|
|
// registered by their owning subsystem, not here.
|
|
//
|
|
// LaneSkillDefault is included even though it isn't an LLM-routing
|
|
// lane: skills run through it via skillexec.WithLaneRegistry, and the
|
|
// skills admin dashboard needs to see it from boot.
|
|
//
|
|
// Test: wiring_test.go::TestKnownLanes_NonEmpty.
|
|
func KnownLanes() []string {
|
|
return []string{
|
|
LaneOllama,
|
|
LaneAnthropicThinking,
|
|
LaneAnthropicDefault,
|
|
LaneM1,
|
|
LaneLLMDefault,
|
|
"skill-default",
|
|
}
|
|
}
|