cb4c612461
executus CI / test (pull_request) Successful in 1m45s
All 11 findings were real (3 clusters): - Failsafe ceiling could pre-empt the critic's backstop (e9c9483f, 9109317b, d5a9bf0d, 76ad171e): CriticAbsoluteMax was 6h, but the host's backstop (MaxRuntime × multiplier, or its own absolute max) can reach 6h+, so the ceiling fired first and reintroduced a premature hard cap. Now CriticAbsoluteMax is a 24h RUNAWAY guard set far beyond any realistic backstop (the host clamps its own backstop to a much smaller absolute max, e.g. mort's 6h convar), so it never pre-empts a healthy supervised run. Comments corrected. - nil Monitor handle lost the MaxRuntime cap (df016a6f, 9dd42827): a critic-enabled run whose host Monitor returned no handle had no deadline-watch and was bounded only by the generous ceiling. Added an unsupervised-run failsafe that re-wraps runCtx to the nominal MaxRuntime when the critic is enabled but didn't arm. New test TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime. - CriticSoftTimeout vestigial / dead fallback (f7764919, 9805bebe, 6864086f, b2b11721): the soft trigger is now always the resolved MaxRuntime (> 0), so the CriticSoftTimeout field + its startCritic fallback were unreachable. Removed the field entirely; the remaining 90s floor is documented as defensive-only. - DRY (f30ce827): extracted e.criticOwnsDeadline(ra), now the single predicate used by both Run and startCritic so they can't drift. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01Jo75sqmeVPgFUWZQBn179X
140 lines
5.2 KiB
Go
140 lines
5.2 KiB
Go
package run
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"time"
|
||
|
||
"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
|
||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||
)
|
||
|
||
// criticDeadlineCheck is how often the deadline-watch goroutine polls the
|
||
// critic's hard deadline. Small relative to any realistic soft timeout.
|
||
const criticDeadlineCheck = time.Second
|
||
|
||
// criticBinding wires a CriticHandle into a run: the executor forwards activity
|
||
// (steps + tool starts) to it, binds the run's hard cancellation to the critic's
|
||
// extendable deadline, and exposes the critic's Steer messages as an agent
|
||
// RunOption. All methods are nil-safe so the executor can call them
|
||
// unconditionally when no critic is configured.
|
||
type criticBinding struct {
|
||
h CriticHandle
|
||
}
|
||
|
||
// criticOwnsDeadline reports whether a critic is configured AND this run enables
|
||
// it — the single predicate that decides the two-tier-timeout path. Used by BOTH
|
||
// Run (to choose the generous runaway ceiling over the literal MaxRuntime cap) and
|
||
// startCritic (the arm/no-op gate), so the two can never drift.
|
||
func (e *Executor) criticOwnsDeadline(ra RunnableAgent) bool {
|
||
return e.cfg.Ports.Critic != nil && ra.Critic.Enabled
|
||
}
|
||
|
||
// startCritic begins critic monitoring for this run when one is configured and
|
||
// the agent enables it. It launches a goroutine that cancels runCtx (via
|
||
// cancelCause) the moment the critic's hard deadline passes — the critic may
|
||
// extend that deadline, so a healthy-but-slow run is given room while a hung one
|
||
// is killed. When the deadline passes because the critic KILLED the run
|
||
// (KillCause() != nil), the cancellation cause is ErrCriticKill (→ status
|
||
// "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→
|
||
// "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST
|
||
// defer the returned stop.
|
||
//
|
||
// softTrigger is the run's resolved MaxRuntime: for a critic-owned run MaxRuntime
|
||
// is the soft wake (mort's two-tier semantics — the critic first reviews once the
|
||
// run exceeds its nominal budget, and its backstop = softTrigger × multiplier).
|
||
// The caller (Run) always passes the resolved MaxRuntime, which withFallbacks
|
||
// guarantees is > 0; the 90s floor below is purely a defensive guard for a
|
||
// hypothetical caller that passes a non-positive value.
|
||
func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo, softTrigger time.Duration) (*criticBinding, func()) {
|
||
noop := func() {}
|
||
if !e.criticOwnsDeadline(ra) {
|
||
return nil, noop
|
||
}
|
||
soft := softTrigger
|
||
if soft <= 0 {
|
||
soft = 90 * time.Second // defensive only; the sole caller passes MaxRuntime (>0)
|
||
}
|
||
h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft)
|
||
if h == nil {
|
||
return nil, noop
|
||
}
|
||
done := make(chan struct{})
|
||
go func() {
|
||
// A host CriticHandle.Deadline() that panics must not crash the process
|
||
// (this runs on its own goroutine, so the executor's top-level recover
|
||
// can't catch it). Log-free best-effort: just stop watching.
|
||
defer func() { _ = recover() }()
|
||
t := time.NewTicker(criticDeadlineCheck)
|
||
defer t.Stop()
|
||
for {
|
||
select {
|
||
case <-done:
|
||
return
|
||
case <-runCtx.Done():
|
||
return
|
||
case <-t.C:
|
||
// A zero deadline = no hard cap (not yet set); otherwise cancel
|
||
// once we're at or past it, distinguishing an explicit kill from a
|
||
// natural backstop expiry so the run gets the right status.
|
||
if d := h.Deadline(); !d.IsZero() && !time.Now().Before(d) {
|
||
if cause := h.KillCause(); cause != nil {
|
||
cancelCause(fmt.Errorf("%w: %s", ErrCriticKill, cause.Error()))
|
||
} else {
|
||
cancelCause(context.DeadlineExceeded)
|
||
}
|
||
return
|
||
}
|
||
}
|
||
}
|
||
}()
|
||
return &criticBinding{h: h}, func() {
|
||
close(done)
|
||
h.Stop()
|
||
}
|
||
}
|
||
|
||
func (b *criticBinding) recordStep(iter int, resp *llm.Response) {
|
||
if b != nil {
|
||
b.h.RecordStep(iter, resp)
|
||
}
|
||
}
|
||
|
||
// recordToolStart forwards a tool call to the critic. NOTE: majordomo's step
|
||
// observer only fires AFTER an iteration completes, so this currently lands
|
||
// post-tool, not at dispatch — the activity clock is refreshed once per
|
||
// iteration, not mid-tool. A single very long tool call (e.g. a 30-min render)
|
||
// therefore won't refresh the clock until it returns; a host that runs such
|
||
// tools should feed interim progress to its Critic (mort's InstallProgressBridge
|
||
// pattern). A true pre-dispatch refresh needs a majordomo hook (follow-up).
|
||
func (b *criticBinding) recordToolStart(name, args string) {
|
||
if b != nil {
|
||
b.h.RecordToolStart(name, args)
|
||
}
|
||
}
|
||
|
||
// maxStepsOption returns the agent step-ceiling Option. With no critic it's a
|
||
// fixed WithMaxSteps(base); with a critic it's a DYNAMIC WithMaxStepsFunc that
|
||
// polls the handle each step (so the critic can raise a long run's budget),
|
||
// falling back to base when the handle defers (MaxSteps() <= 0).
|
||
func (b *criticBinding) maxStepsOption(base int) agent.Option {
|
||
if b == nil {
|
||
return agent.WithMaxSteps(base)
|
||
}
|
||
return agent.WithMaxStepsFunc(func() int {
|
||
if n := b.h.MaxSteps(); n > 0 {
|
||
return n
|
||
}
|
||
return base
|
||
})
|
||
}
|
||
|
||
// drainSteer returns the critic's queued steer messages (nil-safe), so the
|
||
// executor can merge them with the session steer mailbox into one WithSteer.
|
||
func (b *criticBinding) drainSteer() []llm.Message {
|
||
if b == nil {
|
||
return nil
|
||
}
|
||
return b.h.Steer()
|
||
}
|