Files
executus/run/critic.go
T
steve cb4c612461
executus CI / test (pull_request) Successful in 1m45s
fix(run): address gadfly review of the critic-deadline PR
All 11 findings were real (3 clusters):

- Failsafe ceiling could pre-empt the critic's backstop (e9c9483f, 9109317b,
  d5a9bf0d, 76ad171e): CriticAbsoluteMax was 6h, but the host's backstop
  (MaxRuntime × multiplier, or its own absolute max) can reach 6h+, so the
  ceiling fired first and reintroduced a premature hard cap. Now CriticAbsoluteMax
  is a 24h RUNAWAY guard set far beyond any realistic backstop (the host clamps
  its own backstop to a much smaller absolute max, e.g. mort's 6h convar), so it
  never pre-empts a healthy supervised run. Comments corrected.

- nil Monitor handle lost the MaxRuntime cap (df016a6f, 9dd42827): a critic-enabled
  run whose host Monitor returned no handle had no deadline-watch and was bounded
  only by the generous ceiling. Added an unsupervised-run failsafe that re-wraps
  runCtx to the nominal MaxRuntime when the critic is enabled but didn't arm.
  New test TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime.

- CriticSoftTimeout vestigial / dead fallback (f7764919, 9805bebe, 6864086f,
  b2b11721): the soft trigger is now always the resolved MaxRuntime (> 0), so the
  CriticSoftTimeout field + its startCritic fallback were unreachable. Removed the
  field entirely; the remaining 90s floor is documented as defensive-only.

- DRY (f30ce827): extracted e.criticOwnsDeadline(ra), now the single predicate used
  by both Run and startCritic so they can't drift.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Jo75sqmeVPgFUWZQBn179X
2026-06-30 11:32:46 -04:00

140 lines
5.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package run
import (
"context"
"fmt"
"time"
"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// criticDeadlineCheck is how often the deadline-watch goroutine polls the
// critic's hard deadline. Small relative to any realistic soft timeout.
const criticDeadlineCheck = time.Second
// criticBinding wires a CriticHandle into a run: the executor forwards activity
// (steps + tool starts) to it, binds the run's hard cancellation to the critic's
// extendable deadline, and exposes the critic's Steer messages as an agent
// RunOption. All methods are nil-safe so the executor can call them
// unconditionally when no critic is configured.
type criticBinding struct {
h CriticHandle
}
// criticOwnsDeadline reports whether a critic is configured AND this run enables
// it — the single predicate that decides the two-tier-timeout path. Used by BOTH
// Run (to choose the generous runaway ceiling over the literal MaxRuntime cap) and
// startCritic (the arm/no-op gate), so the two can never drift.
func (e *Executor) criticOwnsDeadline(ra RunnableAgent) bool {
return e.cfg.Ports.Critic != nil && ra.Critic.Enabled
}
// startCritic begins critic monitoring for this run when one is configured and
// the agent enables it. It launches a goroutine that cancels runCtx (via
// cancelCause) the moment the critic's hard deadline passes — the critic may
// extend that deadline, so a healthy-but-slow run is given room while a hung one
// is killed. When the deadline passes because the critic KILLED the run
// (KillCause() != nil), the cancellation cause is ErrCriticKill (→ status
// "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→
// "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST
// defer the returned stop.
//
// softTrigger is the run's resolved MaxRuntime: for a critic-owned run MaxRuntime
// is the soft wake (mort's two-tier semantics — the critic first reviews once the
// run exceeds its nominal budget, and its backstop = softTrigger × multiplier).
// The caller (Run) always passes the resolved MaxRuntime, which withFallbacks
// guarantees is > 0; the 90s floor below is purely a defensive guard for a
// hypothetical caller that passes a non-positive value.
func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo, softTrigger time.Duration) (*criticBinding, func()) {
noop := func() {}
if !e.criticOwnsDeadline(ra) {
return nil, noop
}
soft := softTrigger
if soft <= 0 {
soft = 90 * time.Second // defensive only; the sole caller passes MaxRuntime (>0)
}
h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft)
if h == nil {
return nil, noop
}
done := make(chan struct{})
go func() {
// A host CriticHandle.Deadline() that panics must not crash the process
// (this runs on its own goroutine, so the executor's top-level recover
// can't catch it). Log-free best-effort: just stop watching.
defer func() { _ = recover() }()
t := time.NewTicker(criticDeadlineCheck)
defer t.Stop()
for {
select {
case <-done:
return
case <-runCtx.Done():
return
case <-t.C:
// A zero deadline = no hard cap (not yet set); otherwise cancel
// once we're at or past it, distinguishing an explicit kill from a
// natural backstop expiry so the run gets the right status.
if d := h.Deadline(); !d.IsZero() && !time.Now().Before(d) {
if cause := h.KillCause(); cause != nil {
cancelCause(fmt.Errorf("%w: %s", ErrCriticKill, cause.Error()))
} else {
cancelCause(context.DeadlineExceeded)
}
return
}
}
}
}()
return &criticBinding{h: h}, func() {
close(done)
h.Stop()
}
}
func (b *criticBinding) recordStep(iter int, resp *llm.Response) {
if b != nil {
b.h.RecordStep(iter, resp)
}
}
// recordToolStart forwards a tool call to the critic. NOTE: majordomo's step
// observer only fires AFTER an iteration completes, so this currently lands
// post-tool, not at dispatch — the activity clock is refreshed once per
// iteration, not mid-tool. A single very long tool call (e.g. a 30-min render)
// therefore won't refresh the clock until it returns; a host that runs such
// tools should feed interim progress to its Critic (mort's InstallProgressBridge
// pattern). A true pre-dispatch refresh needs a majordomo hook (follow-up).
func (b *criticBinding) recordToolStart(name, args string) {
if b != nil {
b.h.RecordToolStart(name, args)
}
}
// maxStepsOption returns the agent step-ceiling Option. With no critic it's a
// fixed WithMaxSteps(base); with a critic it's a DYNAMIC WithMaxStepsFunc that
// polls the handle each step (so the critic can raise a long run's budget),
// falling back to base when the handle defers (MaxSteps() <= 0).
func (b *criticBinding) maxStepsOption(base int) agent.Option {
if b == nil {
return agent.WithMaxSteps(base)
}
return agent.WithMaxStepsFunc(func() int {
if n := b.h.MaxSteps(); n > 0 {
return n
}
return base
})
}
// drainSteer returns the critic's queued steer messages (nil-safe), so the
// executor can merge them with the session steer mailbox into one WithSteer.
func (b *criticBinding) drainSteer() []llm.Message {
if b == nil {
return nil
}
return b.h.Steer()
}