feat(run): critic owns the deadline — MaxRuntime becomes the soft trigger #21
+19
-7
@@ -22,6 +22,14 @@ type criticBinding struct {
|
|||||||
h CriticHandle
|
h CriticHandle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// criticOwnsDeadline reports whether a critic is configured AND this run enables
|
||||||
|
// it — the single predicate that decides the two-tier-timeout path. Used by BOTH
|
||||||
|
// Run (to choose the generous runaway ceiling over the literal MaxRuntime cap) and
|
||||||
|
// startCritic (the arm/no-op gate), so the two can never drift.
|
||||||
|
func (e *Executor) criticOwnsDeadline(ra RunnableAgent) bool {
|
||||||
|
return e.cfg.Ports.Critic != nil && ra.Critic.Enabled
|
||||||
|
}
|
||||||
|
|
||||||
// startCritic begins critic monitoring for this run when one is configured and
|
// startCritic begins critic monitoring for this run when one is configured and
|
||||||
// the agent enables it. It launches a goroutine that cancels runCtx (via
|
// the agent enables it. It launches a goroutine that cancels runCtx (via
|
||||||
// cancelCause) the moment the critic's hard deadline passes — the critic may
|
// cancelCause) the moment the critic's hard deadline passes — the critic may
|
||||||
@@ -31,16 +39,20 @@ type criticBinding struct {
|
|||||||
// "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→
|
// "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→
|
||||||
// "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST
|
// "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST
|
||||||
// defer the returned stop.
|
// defer the returned stop.
|
||||||
func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo) (*criticBinding, func()) {
|
//
|
||||||
|
// softTrigger is the run's resolved MaxRuntime: for a critic-owned run MaxRuntime
|
||||||
|
// is the soft wake (mort's two-tier semantics — the critic first reviews once the
|
||||||
|
|
|||||||
|
// run exceeds its nominal budget, and its backstop = softTrigger × multiplier).
|
||||||
|
// The caller (Run) always passes the resolved MaxRuntime, which withFallbacks
|
||||||
|
// guarantees is > 0, so no fallback is needed here. (A non-positive soft would make
|
||||||
|
// the host Monitor return no handle, and Run's unsupervised-run failsafe then bounds
|
||||||
|
// the run at MaxRuntime — so even that impossible case stays bounded.)
|
||||||
|
func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo, softTrigger time.Duration) (*criticBinding, func()) {
|
||||||
noop := func() {}
|
noop := func() {}
|
||||||
if e.cfg.Ports.Critic == nil || !ra.Critic.Enabled {
|
if !e.criticOwnsDeadline(ra) {
|
||||||
return nil, noop
|
return nil, noop
|
||||||
}
|
}
|
||||||
soft := e.cfg.Defaults.CriticSoftTimeout
|
h := e.cfg.Ports.Critic.Monitor(runCtx, info, softTrigger)
|
||||||
if soft <= 0 {
|
|
||||||
soft = 90 * time.Second // defensive: withFallbacks normally guarantees >0
|
|
||||||
}
|
|
||||||
h := e.cfg.Ports.Critic.Monitor(runCtx, info, soft)
|
|
||||||
if h == nil {
|
if h == nil {
|
||||||
|
gitea-actions
commented
⚪ Unreachable 90s defensive fallback reintroduces a bare magic literal that was previously a named default (CriticSoftTimeout) maintainability · flagged by 1 model 🪰 Gadfly · advisory ⚪ **Unreachable 90s defensive fallback reintroduces a bare magic literal that was previously a named default (CriticSoftTimeout)**
_maintainability · flagged by 1 model_
<sub>🪰 Gadfly · advisory</sub>
|
|||||||
return nil, noop
|
return nil, noop
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,153 @@
|
|||||||
|
package run_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/executus/run"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||||||
|
)
|
||||||
|
|
||||||
|
// slowToolInvocation builds an Invocation whose session factory adds a "slow"
|
||||||
|
// tool that sleeps for d (respecting ctx). The model script calls it once, then
|
||||||
|
// answers — so the run's wall-clock is dominated by d, letting a test set a tiny
|
||||||
|
// MaxRuntime and observe whether MaxRuntime hard-cancels the run.
|
||||||
|
func slowToolInvocation(runID string, d time.Duration) tool.Invocation {
|
||||||
|
slow := llm.DefineTool("slow", "sleeps for a while",
|
||||||
|
func(ctx context.Context, _ struct{}) (any, error) {
|
||||||
|
select {
|
||||||
|
case <-time.After(d):
|
||||||
|
return "ok", nil
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return tool.Invocation{
|
||||||
|
RunID: runID,
|
||||||
|
SessionToolFactory: func(_ tool.AgentSession) tool.SessionTools {
|
||||||
|
return tool.SessionTools{Tools: []llm.Tool{slow}}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func slowModel() llm.Model {
|
||||||
|
fp := fake.New("fake")
|
||||||
|
fp.Enqueue("m",
|
||||||
|
fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c1", Name: "slow", Arguments: []byte(`{}`)}}}),
|
||||||
|
fake.Reply("done"),
|
||||||
|
)
|
||||||
|
m, _ := fp.Model("m")
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNoCritic_MaxRuntimeIsHardCap: the legacy contract is preserved — without a
|
||||||
|
// critic, MaxRuntime is a literal WithTimeout that kills a run whose work outlasts
|
||||||
|
// it. The slow tool (200ms) outlasts MaxRuntime (20ms), so runCtx cancels mid-tool
|
||||||
|
// and the run ends in error (timeout).
|
||||||
|
func TestNoCritic_MaxRuntimeIsHardCap(t *testing.T) {
|
||||||
|
m := slowModel()
|
||||||
|
ex := run.New(run.Config{
|
||||||
|
Registry: tool.NewRegistry(),
|
||||||
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||||
|
})
|
||||||
|
res := ex.Run(context.Background(),
|
||||||
|
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond},
|
||||||
|
slowToolInvocation("r", 200*time.Millisecond), "go")
|
||||||
|
if res.Err == nil {
|
||||||
|
t.Fatalf("non-critic run should hard-timeout at MaxRuntime; got output=%q err=nil", res.Output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCriticOwnsDeadline_SurvivesPastMaxRuntime: the fix — when the critic owns the
|
||||||
|
// deadline (Ports.Critic set + Critic.Enabled), MaxRuntime becomes the SOFT trigger
|
||||||
|
// and is NOT a hard cap. The fake critic exposes no hard deadline (Deadline()==zero,
|
||||||
|
// no kill), so the only hard ceiling is CriticAbsoluteMax (10s here). The slow tool
|
||||||
|
// (200ms) outlasts the tiny MaxRuntime (20ms) but the run completes — proving the
|
||||||
|
// old agentexec two-tier semantics are restored.
|
||||||
|
func TestCriticOwnsDeadline_SurvivesPastMaxRuntime(t *testing.T) {
|
||||||
|
m := slowModel()
|
||||||
|
h := &fakeCriticHandle{} // Deadline()==zero → no hard deadline, no kill
|
||||||
|
ex := run.New(run.Config{
|
||||||
|
Registry: tool.NewRegistry(),
|
||||||
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||||
|
Ports: run.Ports{Critic: &fakeCritic{h: h}},
|
||||||
|
Defaults: run.Defaults{CriticAbsoluteMax: 10 * time.Second},
|
||||||
|
})
|
||||||
|
res := ex.Run(context.Background(),
|
||||||
|
run.RunnableAgent{Name: "watched", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond,
|
||||||
|
Critic: run.CriticConfig{Enabled: true}},
|
||||||
|
slowToolInvocation("r", 200*time.Millisecond), "go")
|
||||||
|
if res.Err != nil {
|
||||||
|
t.Fatalf("critic-owned run must survive past MaxRuntime (soft trigger); got err=%v", res.Err)
|
||||||
|
}
|
||||||
|
if res.Output != "done" {
|
||||||
|
t.Errorf("output = %q, want %q", res.Output, "done")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// capturingCritic records the soft trigger the executor passes to Monitor.
|
||||||
|
type capturingCritic struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
soft time.Duration
|
||||||
|
h run.CriticHandle
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *capturingCritic) Monitor(_ context.Context, _ run.RunInfo, soft time.Duration) run.CriticHandle {
|
||||||
|
c.mu.Lock()
|
||||||
|
c.soft = soft
|
||||||
|
c.mu.Unlock()
|
||||||
|
return c.h
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCriticSoftTriggerIsMaxRuntime: the soft trigger handed to the host critic is
|
||||||
|
// the run's resolved MaxRuntime (mort's two-tier model — the critic first wakes once
|
||||||
|
// the run exceeds its nominal budget), not some global/default value.
|
||||||
|
func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
|
||||||
|
fp := fake.New("fake")
|
||||||
|
fp.Enqueue("m", fake.Reply("done"))
|
||||||
|
m, _ := fp.Model("m")
|
||||||
|
cc := &capturingCritic{h: &fakeCriticHandle{}}
|
||||||
|
ex := run.New(run.Config{
|
||||||
|
Registry: tool.NewRegistry(),
|
||||||
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||||
|
Ports: run.Ports{Critic: cc},
|
||||||
|
})
|
||||||
|
const wantSoft = 7 * time.Minute
|
||||||
|
ex.Run(context.Background(),
|
||||||
|
run.RunnableAgent{Name: "x", ModelTier: "m", MaxRuntime: wantSoft, Critic: run.CriticConfig{Enabled: true}},
|
||||||
|
tool.Invocation{RunID: "r"}, "go")
|
||||||
|
cc.mu.Lock()
|
||||||
|
got := cc.soft
|
||||||
|
cc.mu.Unlock()
|
||||||
|
if got != wantSoft {
|
||||||
|
t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v", got, wantSoft)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime: the agent enables the
|
||||||
|
// critic but the host Monitor returns NO handle (nil) — there is no deadline-watch,
|
||||||
|
// so the run is unsupervised. It must fall back to the nominal MaxRuntime hard cap
|
||||||
|
// (the slow 200ms tool outlasts the 20ms MaxRuntime → the run errors), NOT run free
|
||||||
|
// up to the generous CriticAbsoluteMax runaway ceiling.
|
||||||
|
func TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime(t *testing.T) {
|
||||||
|
m := slowModel()
|
||||||
|
cc := &capturingCritic{} // h is the nil interface → Monitor returns a nil handle
|
||||||
|
ex := run.New(run.Config{
|
||||||
|
Registry: tool.NewRegistry(),
|
||||||
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||||
|
Ports: run.Ports{Critic: cc},
|
||||||
|
Defaults: run.Defaults{CriticAbsoluteMax: time.Hour}, // generous ceiling; must NOT be what bounds the run
|
||||||
|
})
|
||||||
|
res := ex.Run(context.Background(),
|
||||||
|
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond,
|
||||||
|
Critic: run.CriticConfig{Enabled: true}},
|
||||||
|
slowToolInvocation("r", 200*time.Millisecond), "go")
|
||||||
|
if res.Err == nil {
|
||||||
|
t.Fatalf("critic-enabled run with a nil Monitor handle must fall back to the MaxRuntime hard cap; got output=%q err=nil", res.Output)
|
||||||
|
}
|
||||||
|
}
|
||||||
+2
-2
@@ -61,8 +61,8 @@ func TestCriticRaisesStepCeiling(t *testing.T) {
|
|||||||
Registry: tool.NewRegistry(),
|
Registry: tool.NewRegistry(),
|
||||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||||
Ports: run.Ports{Critic: &fakeCritic{h: h}},
|
Ports: run.Ports{Critic: &fakeCritic{h: h}},
|
||||||
// large soft timeout so the deadline-watch never interferes in the test
|
// The fake handle's Deadline() is zero (no hard deadline), so the
|
||||||
Defaults: run.Defaults{CriticSoftTimeout: time.Hour},
|
// deadline-watch never interferes regardless of the soft trigger.
|
||||||
})
|
})
|
||||||
res := ex.Run(context.Background(),
|
res := ex.Run(context.Background(),
|
||||||
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 1, Critic: run.CriticConfig{Enabled: true}},
|
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 1, Critic: run.CriticConfig{Enabled: true}},
|
||||||
|
|||||||
+68
-21
@@ -29,7 +29,17 @@ type Defaults struct {
|
|||||||
MaxConsecutiveToolErrors int // loop guard; default 3
|
MaxConsecutiveToolErrors int // loop guard; default 3
|
||||||
MaxSameToolCallRepeats int // retry-storm guard; default 3
|
MaxSameToolCallRepeats int // retry-storm guard; default 3
|
||||||
CompactionThresholdRatio float64 // fraction of model context to compact at; default 0.7
|
CompactionThresholdRatio float64 // fraction of model context to compact at; default 0.7
|
||||||
CriticSoftTimeout time.Duration // idle window before the critic wakes; default 90s
|
// CriticAbsoluteMax is the RUNAWAY ceiling for a critic-OWNED run (Ports.Critic
|
||||||
|
// set AND the agent enables it). For such a run MaxRuntime is the SOFT trigger,
|
||||||
|
// not a hard cap, and the critic's own extendable backstop is the normal
|
||||||
|
// deadline. This ceiling exists ONLY to stop a critic that never advances its
|
||||||
|
// deadline (a broken host handle) from running forever, so it is deliberately
|
||||||
|
// set FAR beyond any realistic backstop (default 24h): the host clamps its own
|
||||||
|
gitea-actions
commented
⚪ Comment references external system convar value maintainability · flagged by 1 model
🪰 Gadfly · advisory ⚪ **Comment references external system convar value**
_maintainability · flagged by 1 model_
- **`run/executor.go:37-39` — External system reference in comment**: The `CriticAbsoluteMax` field comment references `mort's agents.critic.absolute_max_seconds = 6h` — an external convar from a different codebase. While contextually helpful, embedding external system configuration details in kernel comments creates a maintenance coupling. Suggested fix: Rephrase to describe the relationship without hardcoding external values (e.g., "the host clamps its own backstop to a smaller absolute max").
<sub>🪰 Gadfly · advisory</sub>
|
|||||||
|
// backstop to a much smaller absolute max (e.g. a 6h host convar), so the ceiling
|
||||||
|
// never pre-empts a healthy supervised run. Keep it well above the host's
|
||||||
|
// absolute max. Never shorter than the run's MaxRuntime. Non-critic runs ignore
|
||||||
|
// it (they keep the literal MaxRuntime kill).
|
||||||
|
CriticAbsoluteMax time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d Defaults) withFallbacks() Defaults {
|
func (d Defaults) withFallbacks() Defaults {
|
||||||
@@ -51,8 +61,8 @@ func (d Defaults) withFallbacks() Defaults {
|
|||||||
if d.CompactionThresholdRatio <= 0 {
|
if d.CompactionThresholdRatio <= 0 {
|
||||||
d.CompactionThresholdRatio = 0.7
|
d.CompactionThresholdRatio = 0.7
|
||||||
}
|
}
|
||||||
if d.CriticSoftTimeout <= 0 {
|
if d.CriticAbsoluteMax <= 0 {
|
||||||
d.CriticSoftTimeout = 90 * time.Second
|
d.CriticAbsoluteMax = 24 * time.Hour
|
||||||
}
|
}
|
||||||
return d
|
return d
|
||||||
}
|
}
|
||||||
@@ -265,33 +275,70 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
|||||||
postRun = st.PostRun
|
postRun = st.PostRun
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run context: bound by MaxRuntime, detached from the caller's deadline so a
|
// Run context: detached from the caller's deadline so a lane/queue wait doesn't
|
||||||
// lane/queue wait doesn't eat the run budget (mort's V10 lesson). Caller
|
// eat the run budget (mort's V10 lesson). Caller cancellation still propagates
|
||||||
// cancellation still propagates via MergeCancellation. Created BEFORE the
|
// via MergeCancellation. Created BEFORE the step observer so the observer
|
||||||
// step observer so the observer forwards the merged run context (not a
|
// forwards the merged run context (not a possibly-cancelled caller ctx) to
|
||||||
// possibly-cancelled caller ctx) to OnStep consumers.
|
// OnStep consumers.
|
||||||
// MaxRuntime stays a WithTimeout so its DeadlineExceeded propagates through the
|
//
|
||||||
// child chain (→ "timeout"), preserving the run's-own-timeout vs caller-cancel
|
// Two-tier timeout: who owns the hard deadline depends on the critic.
|
||||||
// distinction. A NESTED cause-carrying layer lets a critic kill surface as a
|
// - NO critic (the default): MaxRuntime is a literal WithTimeout. Its
|
||||||
// distinct "killed" without disturbing that: only an ErrCriticKill cause is
|
// DeadlineExceeded propagates through the child chain (→ "timeout"),
|
||||||
// consulted in statusFor; a generic run error or a caller cancel is classified
|
// preserving the run's-own-timeout vs caller-cancel distinction.
|
||||||
// by the run error itself.
|
// - critic OWNS the deadline (Ports.Critic set + ra.Critic.Enabled):
|
||||||
timeoutCtx, cancelTimeout := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime)
|
// MaxRuntime becomes the SOFT trigger (passed to startCritic), and the
|
||||||
|
// critic's extendable backstop — watched in startCritic, which cancels via
|
||||||
|
// cancelCause — is the real deadline. A slow-but-progressing run is given
|
||||||
|
// room up to that backstop; only a stalled one is killed. The base context
|
||||||
|
// gets a WithTimeout at CriticAbsoluteMax (default 24h) purely as a RUNAWAY
|
||||||
|
// guard for a critic that never advances its deadline: it is set FAR beyond
|
||||||
|
// any realistic backstop (the host clamps its own backstop to a much smaller
|
||||||
|
// absolute max, e.g. a 6h host convar), so it does NOT pre-empt a healthy
|
||||||
|
// supervised run. If the host critic fails to ARM (nil handle), the run is
|
||||||
|
// unsupervised and we tighten the cap back down to MaxRuntime below.
|
||||||
|
// A NESTED cause-carrying layer (cancelCause) lets a critic kill surface as a
|
||||||
|
// distinct "killed": only an ErrCriticKill cause is consulted in statusFor; a
|
||||||
|
// generic run error, a backstop expiry, or a caller cancel is classified by the
|
||||||
|
// run error itself.
|
||||||
|
criticOwns := e.criticOwnsDeadline(ra)
|
||||||
|
hardCap := maxRuntime
|
||||||
|
if criticOwns {
|
||||||
|
// Runaway guard only — the critic's own (extendable) deadline-watch is the
|
||||||
|
// normal cap. max() keeps it from being shorter than the nominal budget if an
|
||||||
|
// operator sets MaxRuntime above the runaway ceiling (a degenerate config).
|
||||||
|
hardCap = max(e.cfg.Defaults.CriticAbsoluteMax, maxRuntime)
|
||||||
|
}
|
||||||
|
timeoutCtx, cancelTimeout := context.WithTimeout(context.WithoutCancel(ctx), hardCap)
|
||||||
defer cancelTimeout()
|
defer cancelTimeout()
|
||||||
runCtx, cancelCause := context.WithCancelCause(timeoutCtx)
|
runCtx, cancelCause := context.WithCancelCause(timeoutCtx)
|
||||||
defer cancelCause(nil)
|
defer cancelCause(nil)
|
||||||
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
|
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
|
||||||
|
gitea-actions
commented
🟠 Nested 24h timeout wastes timer resources in unsupervised-run failsafe path performance · flagged by 1 model
🪰 Gadfly · advisory 🟠 **Nested 24h timeout wastes timer resources in unsupervised-run failsafe path**
_performance · flagged by 1 model_
- **run/executor.go:315-340** — *Inefficient nested timeout in unsupervised-run failsafe.* When `criticOwns && critic == nil` (critic configured/enabled but host returned nil handle), the code first creates `timeoutCtx` with the generous 24h `CriticAbsoluteMax` (line 315), then wraps it with a nested `WithTimeout(runCtx, maxRuntime)` (line 338). The outer 24h timer remains scheduled even though it will never fire (the inner `maxRuntime` timeout fires first). In a high-throughput system where cri…
<sub>🪰 Gadfly · advisory</sub>
|
|||||||
defer mergeCancel()
|
defer mergeCancel()
|
||||||
// The finalize defer (top of Run) now has a run context to read the
|
|
||||||
// cancellation cause from (shutdown vs critic-kill vs deadline vs cancel).
|
|
||||||
checkpointCause = func() error { return context.Cause(runCtx) }
|
|
||||||
|
|
||||||
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
|
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
|
||||||
// its host Escalator. Its hard deadline is bound to runCtx (cancel on pass).
|
// its host Escalator. When it owns the deadline, MaxRuntime is its soft trigger
|
||||||
// nil-safe: no-op when no critic is configured or the agent doesn't enable it.
|
// (so a slow-but-progressing run survives past it); its extendable backstop is
|
||||||
critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info)
|
// bound to runCtx (cancel on pass). nil-safe: no-op when no critic is configured
|
||||||
|
// or the agent doesn't enable it.
|
||||||
|
critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info, maxRuntime)
|
||||||
defer stopCritic()
|
defer stopCritic()
|
||||||
|
|
||||||
|
// Unsupervised-run failsafe: the agent enabled the critic (so the base context
|
||||||
|
// got the generous runaway ceiling instead of MaxRuntime), but the host Monitor
|
||||||
|
// returned no handle — there is no deadline-watch. Without this the run would be
|
||||||
|
// bounded only by the 24h ceiling. Tighten it back to the nominal MaxRuntime so
|
||||||
|
// an unsupervised run can't hold its slot far past budget. mort's adapter always
|
||||||
|
// arms when the flag is set, so this is pure defence in depth.
|
||||||
|
if criticOwns && critic == nil {
|
||||||
|
var cancelUnsupervised context.CancelFunc
|
||||||
|
runCtx, cancelUnsupervised = context.WithTimeout(runCtx, maxRuntime)
|
||||||
|
defer cancelUnsupervised()
|
||||||
|
}
|
||||||
|
gitea-actions
commented
🟡 Unusual defer-in-conditional pattern in unsupervised failsafe maintainability · flagged by 1 model
🪰 Gadfly · advisory 🟡 **Unusual defer-in-conditional pattern in unsupervised failsafe**
_maintainability · flagged by 1 model_
- **`run/executor.go:336-340` — Unusual defer-in-conditional pattern**: The unsupervised-run failsafe wraps `runCtx` with a new timeout and defers cancellation inside the `if` block. While this works in Go (the defer is registered conditionally and captures the variable), this pattern is less common and slightly harder to read than the alternative of declaring the variable outside and using a named cleanup function. Suggested fix: Extract to a small helper or use a more conventional pattern: ```…
<sub>🪰 Gadfly · advisory</sub>
|
|||||||
|
// The finalize defer (top of Run) now has a run context to read the
|
||||||
|
// cancellation cause from (shutdown vs critic-kill vs deadline vs cancel). Set
|
||||||
|
// AFTER the unsupervised-failsafe re-wrap so it reads the context the loop runs on.
|
||||||
|
checkpointCause = func() error { return context.Cause(runCtx) }
|
||||||
|
|
||||||
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
|
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
|
||||||
// audit recorder, and keep the live iteration counter fresh. majordomo's
|
// audit recorder, and keep the live iteration counter fresh. majordomo's
|
||||||
// step observer hands us each completed iteration; we zip the model's tool
|
// step observer hands us each completed iteration; we zip the model's tool
|
||||||
|
|||||||
Reference in New Issue
Block a user
🟡 softTrigger=MaxRuntime is overloaded as the default battery's idle-stall window (critic.go:262), delaying hang detection from ~90s to a full MaxRuntime; comment 'reviews once the run exceeds its nominal budget' misdescribes the idle-based default behavior
correctness · flagged by 1 model
🪰 Gadfly · advisory