fix(run): address gadfly review of the critic-deadline PR
executus CI / test (pull_request) Successful in 1m45s
executus CI / test (pull_request) Successful in 1m45s
All 11 findings were real (3 clusters): - Failsafe ceiling could pre-empt the critic's backstop (e9c9483f, 9109317b, d5a9bf0d, 76ad171e): CriticAbsoluteMax was 6h, but the host's backstop (MaxRuntime × multiplier, or its own absolute max) can reach 6h+, so the ceiling fired first and reintroduced a premature hard cap. Now CriticAbsoluteMax is a 24h RUNAWAY guard set far beyond any realistic backstop (the host clamps its own backstop to a much smaller absolute max, e.g. mort's 6h convar), so it never pre-empts a healthy supervised run. Comments corrected. - nil Monitor handle lost the MaxRuntime cap (df016a6f, 9dd42827): a critic-enabled run whose host Monitor returned no handle had no deadline-watch and was bounded only by the generous ceiling. Added an unsupervised-run failsafe that re-wraps runCtx to the nominal MaxRuntime when the critic is enabled but didn't arm. New test TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime. - CriticSoftTimeout vestigial / dead fallback (f7764919, 9805bebe, 6864086f, b2b11721): the soft trigger is now always the resolved MaxRuntime (> 0), so the CriticSoftTimeout field + its startCritic fallback were unreachable. Removed the field entirely; the remaining 90s floor is documented as defensive-only. - DRY (f30ce827): extracted e.criticOwnsDeadline(ra), now the single predicate used by both Run and startCritic so they can't drift. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01Jo75sqmeVPgFUWZQBn179X
This commit is contained in:
@@ -106,7 +106,7 @@ func (c *capturingCritic) Monitor(_ context.Context, _ run.RunInfo, soft time.Du
|
||||
|
||||
// TestCriticSoftTriggerIsMaxRuntime: the soft trigger handed to the host critic is
|
||||
// the run's resolved MaxRuntime (mort's two-tier model — the critic first wakes once
|
||||
// the run exceeds its nominal budget), NOT the global Defaults.CriticSoftTimeout.
|
||||
// the run exceeds its nominal budget), not some global/default value.
|
||||
func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
|
||||
fp := fake.New("fake")
|
||||
fp.Enqueue("m", fake.Reply("done"))
|
||||
@@ -116,7 +116,6 @@ func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||
Ports: run.Ports{Critic: cc},
|
||||
Defaults: run.Defaults{CriticSoftTimeout: 90 * time.Second}, // distinct from MaxRuntime below
|
||||
})
|
||||
const wantSoft = 7 * time.Minute
|
||||
ex.Run(context.Background(),
|
||||
@@ -126,6 +125,29 @@ func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) {
|
||||
got := cc.soft
|
||||
cc.mu.Unlock()
|
||||
if got != wantSoft {
|
||||
t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v (not Defaults.CriticSoftTimeout)", got, wantSoft)
|
||||
t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v", got, wantSoft)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime: the agent enables the
|
||||
// critic but the host Monitor returns NO handle (nil) — there is no deadline-watch,
|
||||
// so the run is unsupervised. It must fall back to the nominal MaxRuntime hard cap
|
||||
// (the slow 200ms tool outlasts the 20ms MaxRuntime → the run errors), NOT run free
|
||||
// up to the generous CriticAbsoluteMax runaway ceiling.
|
||||
func TestCriticOwnsDeadline_NilHandleFallsBackToMaxRuntime(t *testing.T) {
|
||||
m := slowModel()
|
||||
cc := &capturingCritic{} // h is the nil interface → Monitor returns a nil handle
|
||||
ex := run.New(run.Config{
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||
Ports: run.Ports{Critic: cc},
|
||||
Defaults: run.Defaults{CriticAbsoluteMax: time.Hour}, // generous ceiling; must NOT be what bounds the run
|
||||
})
|
||||
res := ex.Run(context.Background(),
|
||||
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond,
|
||||
Critic: run.CriticConfig{Enabled: true}},
|
||||
slowToolInvocation("r", 200*time.Millisecond), "go")
|
||||
if res.Err == nil {
|
||||
t.Fatalf("critic-enabled run with a nil Monitor handle must fall back to the MaxRuntime hard cap; got output=%q err=nil", res.Output)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user