feat(run): critic owns the deadline — MaxRuntime becomes the soft trigger #21
+13
-6
@@ -22,6 +22,14 @@ type criticBinding struct {
|
||||
h CriticHandle
|
||||
}
|
||||
|
||||
// criticOwnsDeadline reports whether a critic is configured AND this run enables
|
||||
// it — the single predicate that decides the two-tier-timeout path. Used by BOTH
|
||||
// Run (to choose the generous runaway ceiling over the literal MaxRuntime cap) and
|
||||
// startCritic (the arm/no-op gate), so the two can never drift.
|
||||
func (e *Executor) criticOwnsDeadline(ra RunnableAgent) bool {
|
||||
return e.cfg.Ports.Critic != nil && ra.Critic.Enabled
|
||||
}
|
||||
|
||||
// startCritic begins critic monitoring for this run when one is configured and
|
||||
// the agent enables it. It launches a goroutine that cancels runCtx (via
|
||||
// cancelCause) the moment the critic's hard deadline passes — the critic may
|
||||
@@ -35,18 +43,17 @@ type criticBinding struct {
|
||||
// softTrigger is the run's resolved MaxRuntime: for a critic-owned run MaxRuntime
|
||||
// is the soft wake (mort's two-tier semantics — the critic first reviews once the
|
||||
|
|
||||
// run exceeds its nominal budget, and its backstop = softTrigger × multiplier).
|
||||
// It falls back to the configured CriticSoftTimeout when the run set no MaxRuntime.
|
||||
// The caller (Run) always passes the resolved MaxRuntime, which withFallbacks
|
||||
// guarantees is > 0; the 90s floor below is purely a defensive guard for a
|
||||
// hypothetical caller that passes a non-positive value.
|
||||
func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo, softTrigger time.Duration) (*criticBinding, func()) {
|
||||
noop := func() {}
|
||||
if e.cfg.Ports.Critic == nil || !ra.Critic.Enabled {
|
||||
if !e.criticOwnsDeadline(ra) {
|
||||
return nil, noop
|
||||
}
|
||||
soft := softTrigger
|
||||
if soft <= 0 {
|
||||
soft = e.cfg.Defaults.CriticSoftTimeout
|
||||
}
|
||||
if soft <= 0 {
|
||||
soft = 90 * time.Second // defensive: withFallbacks normally guarantees >0
|
||||
soft = 90 * time.Second // defensive only; the sole caller passes MaxRuntime (>0)
|
||||
|
gitea-actions
commented
⚪ Unreachable 90s defensive fallback reintroduces a bare magic literal that was previously a named default (CriticSoftTimeout) maintainability · flagged by 1 model 🪰 Gadfly · advisory ⚪ **Unreachable 90s defensive fallback reintroduces a bare magic literal that was previously a named default (CriticSoftTimeout)**
_maintainability · flagged by 1 model_
<sub>🪰 Gadfly · advisory</sub>
|
||||
}
|
||||
h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft)
|
||||
if h == nil {
|
||||
|
||||
@@ -106,7 +106,7 @@ func (c *capturingCritic) Monitor(_ context.Context, _ run.RunInfo, soft time.Du
|
||||
|
||||
// TestCriticSoftTriggerIsMaxRuntime: the soft trigger handed to the host critic is
|
||||
// the run's resolved MaxRuntime (mort's two-tier model — the critic first wakes once
|
||||
// the run exceeds its nominal budget), NOT the global Defaults.CriticSoftTimeout.
|
||||
// the run exceeds its nominal budget), not some global/default value.
|
||||
func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
|
||||
fp := fake.New("fake")
|
||||
fp.Enqueue("m", fake.Reply("done"))
|
||||
@@ -116,7 +116,6 @@ func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||
Ports: run.Ports{Critic: cc},
|
||||
Defaults: run.Defaults{CriticSoftTimeout: 90 * time.Second}, // distinct from MaxRuntime below
|
||||
})
|
||||
const wantSoft = 7 * time.Minute
|
||||
ex.Run(context.Background(),
|
||||
@@ -126,6 +125,29 @@ func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
|
||||
got := cc.soft
|
||||
cc.mu.Unlock()
|
||||
if got != wantSoft {
|
||||
t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v (not Defaults.CriticSoftTimeout)", got, wantSoft)
|
||||
t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v", got, wantSoft)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime: the agent enables the
|
||||
// critic but the host Monitor returns NO handle (nil) — there is no deadline-watch,
|
||||
// so the run is unsupervised. It must fall back to the nominal MaxRuntime hard cap
|
||||
// (the slow 200ms tool outlasts the 20ms MaxRuntime → the run errors), NOT run free
|
||||
// up to the generous CriticAbsoluteMax runaway ceiling.
|
||||
func TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime(t *testing.T) {
|
||||
m := slowModel()
|
||||
cc := &capturingCritic{} // h is the nil interface → Monitor returns a nil handle
|
||||
ex := run.New(run.Config{
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||
Ports: run.Ports{Critic: cc},
|
||||
Defaults: run.Defaults{CriticAbsoluteMax: time.Hour}, // generous ceiling; must NOT be what bounds the run
|
||||
})
|
||||
res := ex.Run(context.Background(),
|
||||
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond,
|
||||
Critic: run.CriticConfig{Enabled: true}},
|
||||
slowToolInvocation("r", 200*time.Millisecond), "go")
|
||||
if res.Err == nil {
|
||||
t.Fatalf("critic-enabled run with a nil Monitor handle must fall back to the MaxRuntime hard cap; got output=%q err=nil", res.Output)
|
||||
}
|
||||
}
|
||||
|
||||
+2
-2
@@ -61,8 +61,8 @@ func TestCriticRaisesStepCeiling(t *testing.T) {
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||
Ports: run.Ports{Critic: &fakeCritic{h: h}},
|
||||
// large soft timeout so the deadline-watch never interferes in the test
|
||||
Defaults: run.Defaults{CriticSoftTimeout: time.Hour},
|
||||
// The fake handle's Deadline() is zero (no hard deadline), so the
|
||||
// deadline-watch never interferes regardless of the soft trigger.
|
||||
})
|
||||
res := ex.Run(context.Background(),
|
||||
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 1, Critic: run.CriticConfig{Enabled: true}},
|
||||
|
||||
+41
-20
@@ -29,13 +29,17 @@ type Defaults struct {
|
||||
MaxConsecutiveToolErrors int // loop guard; default 3
|
||||
MaxSameToolCallRepeats int // retry-storm guard; default 3
|
||||
CompactionThresholdRatio float64 // fraction of model context to compact at; default 0.7
|
||||
CriticSoftTimeout time.Duration // idle window before the critic wakes; default 90s
|
||||
// CriticAbsoluteMax is the failsafe wall-clock ceiling for a critic-OWNED run
|
||||
// (Ports.Critic set AND the agent enables it). For such a run MaxRuntime is the
|
||||
// SOFT trigger, not a hard cap, and the critic's extendable backstop is the
|
||||
// normal deadline — so this ceiling only fires if the critic never acts (a
|
||||
// broken/nil host handle). Default 6h; never shorter than the run's MaxRuntime.
|
||||
// Non-critic runs ignore it (they keep the literal MaxRuntime kill).
|
||||
// CriticAbsoluteMax is the RUNAWAY ceiling for a critic-OWNED run (Ports.Critic
|
||||
// set AND the agent enables it). For such a run MaxRuntime is the SOFT trigger,
|
||||
// not a hard cap, and the critic's own extendable backstop is the normal
|
||||
// deadline. This ceiling exists ONLY to stop a critic that never advances its
|
||||
// deadline (a broken host handle) from running forever, so it is deliberately
|
||||
// set FAR beyond any realistic backstop (default 24h): the host clamps its own
|
||||
|
gitea-actions
commented
⚪ Comment references external system convar value maintainability · flagged by 1 model
🪰 Gadfly · advisory ⚪ **Comment references external system convar value**
_maintainability · flagged by 1 model_
- **`run/executor.go:37-39` — External system reference in comment**: The `CriticAbsoluteMax` field comment references `mort's agents.critic.absolute_max_seconds = 6h` — an external convar from a different codebase. While contextually helpful, embedding external system configuration details in kernel comments creates a maintenance coupling. Suggested fix: Rephrase to describe the relationship without hardcoding external values (e.g., "the host clamps its own backstop to a smaller absolute max").
<sub>🪰 Gadfly · advisory</sub>
|
||||
// backstop to a much smaller absolute max (e.g. mort's agents.critic.
|
||||
// absolute_max_seconds = 6h), so the ceiling never pre-empts a healthy
|
||||
// supervised run. Keep it well above the host's absolute max. Never shorter than
|
||||
// the run's MaxRuntime. Non-critic runs ignore it (they keep the literal
|
||||
// MaxRuntime kill).
|
||||
CriticAbsoluteMax time.Duration
|
||||
}
|
||||
|
||||
@@ -58,11 +62,8 @@ func (d Defaults) withFallbacks() Defaults {
|
||||
if d.CompactionThresholdRatio <= 0 {
|
||||
d.CompactionThresholdRatio = 0.7
|
||||
}
|
||||
if d.CriticSoftTimeout <= 0 {
|
||||
d.CriticSoftTimeout = 90 * time.Second
|
||||
}
|
||||
if d.CriticAbsoluteMax <= 0 {
|
||||
d.CriticAbsoluteMax = 6 * time.Hour
|
||||
d.CriticAbsoluteMax = 24 * time.Hour
|
||||
}
|
||||
return d
|
||||
}
|
||||
@@ -289,19 +290,26 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
// MaxRuntime becomes the SOFT trigger (passed to startCritic), and the
|
||||
// critic's extendable backstop — watched in startCritic, which cancels via
|
||||
// cancelCause — is the real deadline. A slow-but-progressing run is given
|
||||
// room up to the backstop; only a stalled one is killed. We still wrap a
|
||||
// GENEROUS WithTimeout at CriticAbsoluteMax so a broken/nil critic handle
|
||||
// can't run unbounded; that ceiling never fires before the critic's backstop.
|
||||
// room up to that backstop; only a stalled one is killed. The base context
|
||||
// gets a WithTimeout at CriticAbsoluteMax (default 24h) purely as a RUNAWAY
|
||||
// guard for a critic that never advances its deadline: it is set FAR beyond
|
||||
// any realistic backstop (the host clamps its own backstop to a much smaller
|
||||
// absolute max, e.g. mort's 6h convar), so it does NOT pre-empt a healthy
|
||||
// supervised run. If the host critic fails to ARM (nil handle), the run is
|
||||
// unsupervised and we tighten the cap back down to MaxRuntime below.
|
||||
// A NESTED cause-carrying layer (cancelCause) lets a critic kill surface as a
|
||||
// distinct "killed": only an ErrCriticKill cause is consulted in statusFor; a
|
||||
// generic run error, a backstop expiry, or a caller cancel is classified by the
|
||||
// run error itself.
|
||||
criticOwnsDeadline := e.cfg.Ports.Critic != nil && ra.Critic.Enabled
|
||||
criticOwns := e.criticOwnsDeadline(ra)
|
||||
hardCap := maxRuntime
|
||||
if criticOwnsDeadline {
|
||||
if criticOwns {
|
||||
// Runaway guard only — the critic's own (extendable) deadline-watch is the
|
||||
// normal cap. Never shorter than the nominal budget, in case an operator
|
||||
// sets MaxRuntime above the runaway ceiling (a degenerate config).
|
||||
hardCap = e.cfg.Defaults.CriticAbsoluteMax
|
||||
if hardCap < maxRuntime {
|
||||
hardCap = maxRuntime // the failsafe ceiling is never shorter than the nominal budget
|
||||
hardCap = maxRuntime
|
||||
}
|
||||
}
|
||||
timeoutCtx, cancelTimeout := context.WithTimeout(context.WithoutCancel(ctx), hardCap)
|
||||
|
gitea-actions
commented
🟠 Nested 24h timeout wastes timer resources in unsupervised-run failsafe path performance · flagged by 1 model
🪰 Gadfly · advisory 🟠 **Nested 24h timeout wastes timer resources in unsupervised-run failsafe path**
_performance · flagged by 1 model_
- **run/executor.go:315-340** — *Inefficient nested timeout in unsupervised-run failsafe.* When `criticOwns && critic == nil` (critic configured/enabled but host returned nil handle), the code first creates `timeoutCtx` with the generous 24h `CriticAbsoluteMax` (line 315), then wraps it with a nested `WithTimeout(runCtx, maxRuntime)` (line 338). The outer 24h timer remains scheduled even though it will never fire (the inner `maxRuntime` timeout fires first). In a high-throughput system where cri…
<sub>🪰 Gadfly · advisory</sub>
|
||||
@@ -310,9 +318,6 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
defer cancelCause(nil)
|
||||
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
|
||||
defer mergeCancel()
|
||||
// The finalize defer (top of Run) now has a run context to read the
|
||||
// cancellation cause from (shutdown vs critic-kill vs deadline vs cancel).
|
||||
checkpointCause = func() error { return context.Cause(runCtx) }
|
||||
|
||||
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
|
||||
// its host Escalator. When it owns the deadline, MaxRuntime is its soft trigger
|
||||
@@ -322,6 +327,22 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info, maxRuntime)
|
||||
defer stopCritic()
|
||||
|
||||
// Unsupervised-run failsafe: the agent enabled the critic (so the base context
|
||||
// got the generous runaway ceiling instead of MaxRuntime), but the host Monitor
|
||||
// returned no handle — there is no deadline-watch. Without this the run would be
|
||||
// bounded only by the 24h ceiling. Tighten it back to the nominal MaxRuntime so
|
||||
// an unsupervised run can't hold its slot far past budget. mort's adapter always
|
||||
// arms when the flag is set, so this is pure defence in depth.
|
||||
if criticOwns && critic == nil {
|
||||
|
gitea-actions
commented
🟡 Unusual defer-in-conditional pattern in unsupervised failsafe maintainability · flagged by 1 model
🪰 Gadfly · advisory 🟡 **Unusual defer-in-conditional pattern in unsupervised failsafe**
_maintainability · flagged by 1 model_
- **`run/executor.go:336-340` — Unusual defer-in-conditional pattern**: The unsupervised-run failsafe wraps `runCtx` with a new timeout and defers cancellation inside the `if` block. While this works in Go (the defer is registered conditionally and captures the variable), this pattern is less common and slightly harder to read than the alternative of declaring the variable outside and using a named cleanup function. Suggested fix: Extract to a small helper or use a more conventional pattern: ```…
<sub>🪰 Gadfly · advisory</sub>
|
||||
var cancelUnsupervised context.CancelFunc
|
||||
runCtx, cancelUnsupervised = context.WithTimeout(runCtx, maxRuntime)
|
||||
defer cancelUnsupervised()
|
||||
}
|
||||
// The finalize defer (top of Run) now has a run context to read the
|
||||
// cancellation cause from (shutdown vs critic-kill vs deadline vs cancel). Set
|
||||
// AFTER the unsupervised-failsafe re-wrap so it reads the context the loop runs on.
|
||||
checkpointCause = func() error { return context.Cause(runCtx) }
|
||||
|
||||
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
|
||||
// audit recorder, and keep the live iteration counter fresh. majordomo's
|
||||
// step observer hands us each completed iteration; we zip the model's tool
|
||||
|
||||
Reference in New Issue
Block a user
🟡 softTrigger=MaxRuntime is overloaded as the default battery's idle-stall window (critic.go:262), delaying hang detection from ~90s to a full MaxRuntime; comment 'reviews once the run exceeds its nominal budget' misdescribes the idle-based default behavior
correctness · flagged by 1 model
🪰 Gadfly · advisory