// Package skillexec runs saved Skill definitions via majordomo's agent // loop (gitea.stevedudenhoeffer.com/steve/majordomo/agent). // // Why: a Skill is data; the executor turns data into a running agent // (resolve model, build toolbox, start audit, run the agent loop, // finish audit, deliver). package budget import ( "context" "errors" "fmt" "log/slog" "time" ) // BudgetTracker enforces per-user GPU budgets in v2. v1 ships // NoOpBudget which always allows. The interface exists now so the v2 // migration is a single line in the executor. // // Why interface now: the executor's Check/Commit calls would need to // be added in v2 anyway; doing it now means v2 only swaps NoOp for // DBBudget without touching call sites. type BudgetTracker interface { // Check reports whether the caller has remaining budget. Returns // nil for "yes" or an error describing the exhaustion. Check(ctx context.Context, callerID string) error // Commit records that the caller spent runtimeSeconds of budget on // this run. Called after the agent completes (success or error). Commit(ctx context.Context, callerID string, runtimeSeconds float64) } // NoOpBudget always allows and never records. v1 default. type NoOpBudget struct{} // NewNoOpBudget constructs the no-op tracker. func NewNoOpBudget() BudgetTracker { return NoOpBudget{} } // Check always returns nil. func (NoOpBudget) Check(_ context.Context, _ string) error { return nil } // Commit is a no-op. func (NoOpBudget) Commit(_ context.Context, _ string, _ float64) {} // ErrBudgetExceeded is returned by DBBudget.Check when the caller's // 7-day rolling window has hit the convar-configured cap. // // Why a sentinel: callers (executor, audit writer) need to distinguish // budget rejection from generic errors so they can record // status="budget_exceeded" instead of "error" and skip user-visible // delivery side-effects. var ErrBudgetExceeded = errors.New("weekly skill budget exceeded") // BudgetNotifier is the optional callback DBBudget invokes when a // Check rejects a caller. Production wires a Discord-DM hook so the // user knows why their skill failed; tests inject a recorder. // // nil is allowed and is silently skipped. type BudgetNotifier func(ctx context.Context, userID string, secondsUsed, cap float64) // DBBudget enforces per-user weekly GPU budgets via the BudgetStorage // interface. The "weekly" cap is a rolling 7-day window — see // SkillBudget for the rollover semantics. // // Why a closure for the limit instead of an int field: the cap comes // from a runtime convar. Reading it on every Check means a `.convar // set skills.user_budget_seconds_per_week 7200` takes effect on the // next call without restarting the bot or rewiring the executor. type DBBudget struct { storage BudgetStorage // weeklyLimit returns the current cap in seconds. Reads convar at // every Check so a runtime convar bump takes effect on the next // call. weeklyLimit func() float64 // notify is called when a Check rejects a caller. Optional — // production wires a Discord-DM hook so the user knows why their // skill failed. nil-safe. notify BudgetNotifier // now is the time source. Test injects a fake clock; production // uses time.Now. now func() time.Time } // NewDBBudget constructs a DBBudget. now may be nil — defaults to // time.Now. // // Why time injection: budget rollover is time-sensitive; tests need to // fast-forward past the 7-day boundary deterministically. now=nil // means production callers (mort.go) don't have to think about it. // // Test: pass a closure that returns a fixed instant; assert rollover // only happens when (now - WindowStart) >= 7 days. func NewDBBudget(storage BudgetStorage, weeklyLimit func() float64, notify BudgetNotifier, now func() time.Time) *DBBudget { if now == nil { now = time.Now } return &DBBudget{ storage: storage, weeklyLimit: weeklyLimit, notify: notify, now: now, } } // Check returns ErrBudgetExceeded if the caller has spent at least // weeklyLimit seconds in the current rolling 7-day window. // // Why anonymous callerID="" is unbudgeted: scheduler-driven and // system-initiated runs don't have a Discord user to bill; charging // "system" would conflate them with a real user. The scheduler sets // CallerID to the skill owner where applicable, so cron-loop // abusiveness still consumes the owner's budget. // // Why cap<=0 means "disabled": operator wants a runtime kill-switch. // Setting the convar to "0" turns enforcement off without restart. // // Test: Get returns nil → Check returns nil; Get returns row with // SecondsUsed >= cap → Check returns ErrBudgetExceeded and notify is // invoked; window expired (>=7d) → Check returns nil regardless of // SecondsUsed. func (b *DBBudget) Check(ctx context.Context, callerID string) error { if callerID == "" { return nil } bud, err := b.storage.Get(ctx, callerID) if err != nil { return fmt.Errorf("budget: %w", err) } if bud != nil { if b.now().Sub(bud.WindowStart) < 7*24*time.Hour { cap := b.weeklyLimit() if cap > 0 && bud.SecondsUsed >= cap { if b.notify != nil { b.notify(ctx, callerID, bud.SecondsUsed, cap) } return ErrBudgetExceeded } } } return nil } // Commit records the run's runtime against the caller's budget. // Failures are logged but never returned — budget accounting must // not break user-visible execution. // // Why callerID="" is a no-op: matches Check's anonymous-caller // shortcut; system runs don't get billed. // // Why runtimeSeconds<=0 is a no-op: a run that errored before // resolving a model has wallSecs near 0 in floating-point terms but // can also be exactly 0 (synthetic test fixtures). Skipping avoids // spurious 0-runs rows from short-lived failures. // // Test: Commit(50) → Get reports SecondsUsed=50; storage failure // surfaces only as a slog.Warn (no panic, no return). func (b *DBBudget) Commit(ctx context.Context, callerID string, runtimeSeconds float64) { if callerID == "" || runtimeSeconds <= 0 { return } if err := b.storage.Add(ctx, callerID, runtimeSeconds, b.now()); err != nil { slog.Warn("skills budget: commit failed", "user", callerID, "error", err) } }