feat(run): critic owns the deadline — MaxRuntime becomes the soft trigger #21

Merged
steve merged 3 commits from feat/critic-owns-deadline into main 2026-06-30 15:56:31 +00:00
4 changed files with 81 additions and 31 deletions
Showing only changes of commit cb4c612461 - Show all commits
+13 -6
View File
@@ -22,6 +22,14 @@ type criticBinding struct {
h CriticHandle
}
// criticOwnsDeadline reports whether a critic is configured AND this run enables
// it — the single predicate that decides the two-tier-timeout path. Used by BOTH
// Run (to choose the generous runaway ceiling over the literal MaxRuntime cap) and
// startCritic (the arm/no-op gate), so the two can never drift.
func (e *Executor) criticOwnsDeadline(ra RunnableAgent) bool {
return e.cfg.Ports.Critic != nil && ra.Critic.Enabled
}
// startCritic begins critic monitoring for this run when one is configured and
// the agent enables it. It launches a goroutine that cancels runCtx (via
// cancelCause) the moment the critic's hard deadline passes — the critic may
@@ -35,18 +43,17 @@ type criticBinding struct {
// softTrigger is the run's resolved MaxRuntime: for a critic-owned run MaxRuntime
// is the soft wake (mort's two-tier semantics — the critic first reviews once the
Review

🟡 softTrigger=MaxRuntime is overloaded as the default battery's idle-stall window (critic.go:262), delaying hang detection from ~90s to a full MaxRuntime; comment 'reviews once the run exceeds its nominal budget' misdescribes the idle-based default behavior

correctness · flagged by 1 model

🪰 Gadfly · advisory

🟡 **softTrigger=MaxRuntime is overloaded as the default battery's idle-stall window (critic.go:262), delaying hang detection from ~90s to a full MaxRuntime; comment 'reviews once the run exceeds its nominal budget' misdescribes the idle-based default behavior** _correctness · flagged by 1 model_ <sub>🪰 Gadfly · advisory</sub>
// run exceeds its nominal budget, and its backstop = softTrigger × multiplier).
// It falls back to the configured CriticSoftTimeout when the run set no MaxRuntime.
// The caller (Run) always passes the resolved MaxRuntime, which withFallbacks
// guarantees is > 0; the 90s floor below is purely a defensive guard for a
// hypothetical caller that passes a non-positive value.
func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo, softTrigger time.Duration) (*criticBinding, func()) {
noop := func() {}
if e.cfg.Ports.Critic == nil || !ra.Critic.Enabled {
if !e.criticOwnsDeadline(ra) {
return nil, noop
}
soft := softTrigger
if soft <= 0 {
soft = e.cfg.Defaults.CriticSoftTimeout
}
if soft <= 0 {
soft = 90 * time.Second // defensive: withFallbacks normally guarantees >0
soft = 90 * time.Second // defensive only; the sole caller passes MaxRuntime (>0)
Outdated
Review

Unreachable 90s defensive fallback reintroduces a bare magic literal that was previously a named default (CriticSoftTimeout)

maintainability · flagged by 1 model

🪰 Gadfly · advisory

⚪ **Unreachable 90s defensive fallback reintroduces a bare magic literal that was previously a named default (CriticSoftTimeout)** _maintainability · flagged by 1 model_ <sub>🪰 Gadfly · advisory</sub>
}
h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft)
if h == nil {
+25 -3
View File
@@ -106,7 +106,7 @@ func (c *capturingCritic) Monitor(_ context.Context, _ run.RunInfo, soft time.Du
// TestCriticSoftTriggerIsMaxRuntime: the soft trigger handed to the host critic is
// the run's resolved MaxRuntime (mort's two-tier model — the critic first wakes once
// the run exceeds its nominal budget), NOT the global Defaults.CriticSoftTimeout.
// the run exceeds its nominal budget), not some global/default value.
func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
fp := fake.New("fake")
fp.Enqueue("m", fake.Reply("done"))
@@ -116,7 +116,6 @@ func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
Registry: tool.NewRegistry(),
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
Ports: run.Ports{Critic: cc},
Defaults: run.Defaults{CriticSoftTimeout: 90 * time.Second}, // distinct from MaxRuntime below
})
const wantSoft = 7 * time.Minute
ex.Run(context.Background(),
@@ -126,6 +125,29 @@ func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
got := cc.soft
cc.mu.Unlock()
if got != wantSoft {
t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v (not Defaults.CriticSoftTimeout)", got, wantSoft)
t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v", got, wantSoft)
}
}
// TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime: the agent enables the
// critic but the host Monitor returns NO handle (nil) — there is no deadline-watch,
// so the run is unsupervised. It must fall back to the nominal MaxRuntime hard cap
// (the slow 200ms tool outlasts the 20ms MaxRuntime → the run errors), NOT run free
// up to the generous CriticAbsoluteMax runaway ceiling.
func TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime(t *testing.T) {
m := slowModel()
cc := &capturingCritic{} // h is the nil interface → Monitor returns a nil handle
ex := run.New(run.Config{
Registry: tool.NewRegistry(),
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
Ports: run.Ports{Critic: cc},
Defaults: run.Defaults{CriticAbsoluteMax: time.Hour}, // generous ceiling; must NOT be what bounds the run
})
res := ex.Run(context.Background(),
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond,
Critic: run.CriticConfig{Enabled: true}},
slowToolInvocation("r", 200*time.Millisecond), "go")
if res.Err == nil {
t.Fatalf("critic-enabled run with a nil Monitor handle must fall back to the MaxRuntime hard cap; got output=%q err=nil", res.Output)
}
}
+2 -2
View File
@@ -61,8 +61,8 @@ func TestCriticRaisesStepCeiling(t *testing.T) {
Registry: tool.NewRegistry(),
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
Ports: run.Ports{Critic: &fakeCritic{h: h}},
// large soft timeout so the deadline-watch never interferes in the test
Defaults: run.Defaults{CriticSoftTimeout: time.Hour},
// The fake handle's Deadline() is zero (no hard deadline), so the
// deadline-watch never interferes regardless of the soft trigger.
})
res := ex.Run(context.Background(),
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 1, Critic: run.CriticConfig{Enabled: true}},
+41 -20
View File
@@ -29,13 +29,17 @@ type Defaults struct {
MaxConsecutiveToolErrors int // loop guard; default 3
MaxSameToolCallRepeats int // retry-storm guard; default 3
CompactionThresholdRatio float64 // fraction of model context to compact at; default 0.7
CriticSoftTimeout time.Duration // idle window before the critic wakes; default 90s
// CriticAbsoluteMax is the failsafe wall-clock ceiling for a critic-OWNED run
// (Ports.Critic set AND the agent enables it). For such a run MaxRuntime is the
// SOFT trigger, not a hard cap, and the critic's extendable backstop is the
// normal deadline — so this ceiling only fires if the critic never acts (a
// broken/nil host handle). Default 6h; never shorter than the run's MaxRuntime.
// Non-critic runs ignore it (they keep the literal MaxRuntime kill).
// CriticAbsoluteMax is the RUNAWAY ceiling for a critic-OWNED run (Ports.Critic
// set AND the agent enables it). For such a run MaxRuntime is the SOFT trigger,
// not a hard cap, and the critic's own extendable backstop is the normal
// deadline. This ceiling exists ONLY to stop a critic that never advances its
// deadline (a broken host handle) from running forever, so it is deliberately
// set FAR beyond any realistic backstop (default 24h): the host clamps its own
Review

Comment references external system convar value

maintainability · flagged by 1 model

  • run/executor.go:37-39 — External system reference in comment: The CriticAbsoluteMax field comment references mort's agents.critic.absolute_max_seconds = 6h — an external convar from a different codebase. While contextually helpful, embedding external system configuration details in kernel comments creates a maintenance coupling. Suggested fix: Rephrase to describe the relationship without hardcoding external values (e.g., "the host clamps its own backstop to a smaller absolute max").

🪰 Gadfly · advisory

⚪ **Comment references external system convar value** _maintainability · flagged by 1 model_ - **`run/executor.go:37-39` — External system reference in comment**: The `CriticAbsoluteMax` field comment references `mort's agents.critic.absolute_max_seconds = 6h` — an external convar from a different codebase. While contextually helpful, embedding external system configuration details in kernel comments creates a maintenance coupling. Suggested fix: Rephrase to describe the relationship without hardcoding external values (e.g., "the host clamps its own backstop to a smaller absolute max"). <sub>🪰 Gadfly · advisory</sub>
// backstop to a much smaller absolute max (e.g. mort's agents.critic.
// absolute_max_seconds = 6h), so the ceiling never pre-empts a healthy
// supervised run. Keep it well above the host's absolute max. Never shorter than
// the run's MaxRuntime. Non-critic runs ignore it (they keep the literal
// MaxRuntime kill).
CriticAbsoluteMax time.Duration
}
@@ -58,11 +62,8 @@ func (d Defaults) withFallbacks() Defaults {
if d.CompactionThresholdRatio <= 0 {
d.CompactionThresholdRatio = 0.7
}
if d.CriticSoftTimeout <= 0 {
d.CriticSoftTimeout = 90 * time.Second
}
if d.CriticAbsoluteMax <= 0 {
d.CriticAbsoluteMax = 6 * time.Hour
d.CriticAbsoluteMax = 24 * time.Hour
}
return d
}
@@ -289,19 +290,26 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
// MaxRuntime becomes the SOFT trigger (passed to startCritic), and the
// critic's extendable backstop — watched in startCritic, which cancels via
// cancelCause — is the real deadline. A slow-but-progressing run is given
// room up to the backstop; only a stalled one is killed. We still wrap a
// GENEROUS WithTimeout at CriticAbsoluteMax so a broken/nil critic handle
// can't run unbounded; that ceiling never fires before the critic's backstop.
// room up to that backstop; only a stalled one is killed. The base context
// gets a WithTimeout at CriticAbsoluteMax (default 24h) purely as a RUNAWAY
// guard for a critic that never advances its deadline: it is set FAR beyond
// any realistic backstop (the host clamps its own backstop to a much smaller
// absolute max, e.g. mort's 6h convar), so it does NOT pre-empt a healthy
// supervised run. If the host critic fails to ARM (nil handle), the run is
// unsupervised and we tighten the cap back down to MaxRuntime below.
// A NESTED cause-carrying layer (cancelCause) lets a critic kill surface as a
// distinct "killed": only an ErrCriticKill cause is consulted in statusFor; a
// generic run error, a backstop expiry, or a caller cancel is classified by the
// run error itself.
criticOwnsDeadline := e.cfg.Ports.Critic != nil && ra.Critic.Enabled
criticOwns := e.criticOwnsDeadline(ra)
hardCap := maxRuntime
if criticOwnsDeadline {
if criticOwns {
// Runaway guard only — the critic's own (extendable) deadline-watch is the
// normal cap. Never shorter than the nominal budget, in case an operator
// sets MaxRuntime above the runaway ceiling (a degenerate config).
hardCap = e.cfg.Defaults.CriticAbsoluteMax
if hardCap < maxRuntime {
hardCap = maxRuntime // the failsafe ceiling is never shorter than the nominal budget
hardCap = maxRuntime
}
}
timeoutCtx, cancelTimeout := context.WithTimeout(context.WithoutCancel(ctx), hardCap)
Outdated
Review

🟠 Nested 24h timeout wastes timer resources in unsupervised-run failsafe path

performance · flagged by 1 model

  • run/executor.go:315-340Inefficient nested timeout in unsupervised-run failsafe. When criticOwns && critic == nil (critic configured/enabled but host returned nil handle), the code first creates timeoutCtx with the generous 24h CriticAbsoluteMax (line 315), then wraps it with a nested WithTimeout(runCtx, maxRuntime) (line 338). The outer 24h timer remains scheduled even though it will never fire (the inner maxRuntime timeout fires first). In a high-throughput system where cri…

🪰 Gadfly · advisory

🟠 **Nested 24h timeout wastes timer resources in unsupervised-run failsafe path** _performance · flagged by 1 model_ - **run/executor.go:315-340** — *Inefficient nested timeout in unsupervised-run failsafe.* When `criticOwns && critic == nil` (critic configured/enabled but host returned nil handle), the code first creates `timeoutCtx` with the generous 24h `CriticAbsoluteMax` (line 315), then wraps it with a nested `WithTimeout(runCtx, maxRuntime)` (line 338). The outer 24h timer remains scheduled even though it will never fire (the inner `maxRuntime` timeout fires first). In a high-throughput system where cri… <sub>🪰 Gadfly · advisory</sub>
@@ -310,9 +318,6 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
defer cancelCause(nil)
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
defer mergeCancel()
// The finalize defer (top of Run) now has a run context to read the
// cancellation cause from (shutdown vs critic-kill vs deadline vs cancel).
checkpointCause = func() error { return context.Cause(runCtx) }
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
// its host Escalator. When it owns the deadline, MaxRuntime is its soft trigger
@@ -322,6 +327,22 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info, maxRuntime)
defer stopCritic()
// Unsupervised-run failsafe: the agent enabled the critic (so the base context
// got the generous runaway ceiling instead of MaxRuntime), but the host Monitor
// returned no handle — there is no deadline-watch. Without this the run would be
// bounded only by the 24h ceiling. Tighten it back to the nominal MaxRuntime so
// an unsupervised run can't hold its slot far past budget. mort's adapter always
// arms when the flag is set, so this is pure defence in depth.
if criticOwns && critic == nil {
Review

🟡 Unusual defer-in-conditional pattern in unsupervised failsafe

maintainability · flagged by 1 model

  • run/executor.go:336-340 — Unusual defer-in-conditional pattern: The unsupervised-run failsafe wraps runCtx with a new timeout and defers cancellation inside the if block. While this works in Go (the defer is registered conditionally and captures the variable), this pattern is less common and slightly harder to read than the alternative of declaring the variable outside and using a named cleanup function. Suggested fix: Extract to a small helper or use a more conventional pattern: ```…

🪰 Gadfly · advisory

🟡 **Unusual defer-in-conditional pattern in unsupervised failsafe** _maintainability · flagged by 1 model_ - **`run/executor.go:336-340` — Unusual defer-in-conditional pattern**: The unsupervised-run failsafe wraps `runCtx` with a new timeout and defers cancellation inside the `if` block. While this works in Go (the defer is registered conditionally and captures the variable), this pattern is less common and slightly harder to read than the alternative of declaring the variable outside and using a named cleanup function. Suggested fix: Extract to a small helper or use a more conventional pattern: ```… <sub>🪰 Gadfly · advisory</sub>
var cancelUnsupervised context.CancelFunc
runCtx, cancelUnsupervised = context.WithTimeout(runCtx, maxRuntime)
defer cancelUnsupervised()
}
// The finalize defer (top of Run) now has a run context to read the
// cancellation cause from (shutdown vs critic-kill vs deadline vs cancel). Set
// AFTER the unsupervised-failsafe re-wrap so it reads the context the loop runs on.
checkpointCause = func() error { return context.Cause(runCtx) }
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
// audit recorder, and keep the live iteration counter fresh. majordomo's
// step observer hands us each completed iteration; we zip the model's tool