package run import ( "context" "fmt" "time" "gitea.stevedudenhoeffer.com/steve/majordomo/agent" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // criticDeadlineCheck is how often the deadline-watch goroutine polls the // critic's hard deadline. Small relative to any realistic soft timeout. const criticDeadlineCheck = time.Second // criticBinding wires a CriticHandle into a run: the executor forwards activity // (steps + tool starts) to it, binds the run's hard cancellation to the critic's // extendable deadline, and exposes the critic's Steer messages as an agent // RunOption. All methods are nil-safe so the executor can call them // unconditionally when no critic is configured. type criticBinding struct { h CriticHandle } // criticOwnsDeadline reports whether a critic is configured AND this run enables // it — the single predicate that decides the two-tier-timeout path. Used by BOTH // Run (to choose the generous runaway ceiling over the literal MaxRuntime cap) and // startCritic (the arm/no-op gate), so the two can never drift. func (e *Executor) criticOwnsDeadline(ra RunnableAgent) bool { return e.cfg.Ports.Critic != nil && ra.Critic.Enabled } // startCritic begins critic monitoring for this run when one is configured and // the agent enables it. It launches a goroutine that cancels runCtx (via // cancelCause) the moment the critic's hard deadline passes — the critic may // extend that deadline, so a healthy-but-slow run is given room while a hung one // is killed. When the deadline passes because the critic KILLED the run // (KillCause() != nil), the cancellation cause is ErrCriticKill (→ status // "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→ // "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST // defer the returned stop. // // softTrigger is the run's resolved MaxRuntime: for a critic-owned run MaxRuntime // is the soft wake (mort's two-tier semantics — the critic first reviews once the // run exceeds its nominal budget, and its backstop = softTrigger × multiplier). // The caller (Run) always passes the resolved MaxRuntime, which withFallbacks // guarantees is > 0; the 90s floor below is purely a defensive guard for a // hypothetical caller that passes a non-positive value. func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo, softTrigger time.Duration) (*criticBinding, func()) { noop := func() {} if !e.criticOwnsDeadline(ra) { return nil, noop } soft := softTrigger if soft <= 0 { soft = 90 * time.Second // defensive only; the sole caller passes MaxRuntime (>0) } h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft) if h == nil { return nil, noop } done := make(chan struct{}) go func() { // A host CriticHandle.Deadline() that panics must not crash the process // (this runs on its own goroutine, so the executor's top-level recover // can't catch it). Log-free best-effort: just stop watching. defer func() { _ = recover() }() t := time.NewTicker(criticDeadlineCheck) defer t.Stop() for { select { case <-done: return case <-runCtx.Done(): return case <-t.C: // A zero deadline = no hard cap (not yet set); otherwise cancel // once we're at or past it, distinguishing an explicit kill from a // natural backstop expiry so the run gets the right status. if d := h.Deadline(); !d.IsZero() && !time.Now().Before(d) { if cause := h.KillCause(); cause != nil { cancelCause(fmt.Errorf("%w: %s", ErrCriticKill, cause.Error())) } else { cancelCause(context.DeadlineExceeded) } return } } } }() return &criticBinding{h: h}, func() { close(done) h.Stop() } } func (b *criticBinding) recordStep(iter int, resp *llm.Response) { if b != nil { b.h.RecordStep(iter, resp) } } // recordToolStart forwards a tool call to the critic. NOTE: majordomo's step // observer only fires AFTER an iteration completes, so this currently lands // post-tool, not at dispatch — the activity clock is refreshed once per // iteration, not mid-tool. A single very long tool call (e.g. a 30-min render) // therefore won't refresh the clock until it returns; a host that runs such // tools should feed interim progress to its Critic (mort's InstallProgressBridge // pattern). A true pre-dispatch refresh needs a majordomo hook (follow-up). func (b *criticBinding) recordToolStart(name, args string) { if b != nil { b.h.RecordToolStart(name, args) } } // maxStepsOption returns the agent step-ceiling Option. With no critic it's a // fixed WithMaxSteps(base); with a critic it's a DYNAMIC WithMaxStepsFunc that // polls the handle each step (so the critic can raise a long run's budget), // falling back to base when the handle defers (MaxSteps() <= 0). func (b *criticBinding) maxStepsOption(base int) agent.Option { if b == nil { return agent.WithMaxSteps(base) } return agent.WithMaxStepsFunc(func() int { if n := b.h.MaxSteps(); n > 0 { return n } return base }) } // drainSteer returns the critic's queued steer messages (nil-safe), so the // executor can merge them with the session steer mailbox into one WithSteer. func (b *criticBinding) drainSteer() []llm.Message { if b == nil { return nil } return b.h.Steer() }