run: critic parity — fuller RecordStep + cause-carrying Kill (distinct status)
executus CI / test (pull_request) Successful in 46s
Adversarial Review (Gadfly) / review (pull_request) Successful in 22m30s

Completes the run-critic seam so a host adapter (mort's agentcritic) has full
fidelity, closing the two limitations gadfly surfaced on mort #1334.

- RecordStep(iter int, resp *llm.Response): the completed step's model response
  is now passed to the critic (was index-only), so a host that records a trace
  (mort's ProgressRecorder) can show what the agent actually produced, not just
  an iteration count. The executor forwards s.Response; the battery ignores it
  (its Progress is count-based).
- CriticHandle.KillCause() error + ErrCriticKill: the executor now distinguishes
  an explicit critic KILL from a natural backstop expiry. runCtx uses a
  cause-carrying cancel (WithCancelCause + a MaxRuntime timer cancelling with
  DeadlineExceeded); the deadline-watch cancels with ErrCriticKill when
  KillCause()!=nil, else DeadlineExceeded. statusFor reads context.Cause →
  killed / timeout / cancelled are now distinct (were all "cancelled"). The
  battery sets killCause from Decision.KillReason on a Kill.

Tests: statusFor "killed" case (cause=ErrCriticKill, err=Canceled); fake handle
+ battery RecordStep/KillCause signatures. Core stays battery-free.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-27 16:35:13 -04:00
parent 1a1d5e417b
commit 390e6cf905
7 changed files with 98 additions and 36 deletions
+19 -10
View File
@@ -197,15 +197,20 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
// cancellation still propagates via MergeCancellation. Created BEFORE the
// step observer so the observer forwards the merged run context (not a
// possibly-cancelled caller ctx) to OnStep consumers.
runCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime)
defer cancel()
// Cause-carrying cancel so a critic kill, a backstop timeout, and a caller
// cancel land as distinct statuses. MaxRuntime is enforced by a timer that
// cancels with DeadlineExceeded (preserving the old WithTimeout → "timeout").
runCtx, cancelCause := context.WithCancelCause(context.WithoutCancel(ctx))
defer cancelCause(nil)
runTimer := time.AfterFunc(maxRuntime, func() { cancelCause(context.DeadlineExceeded) })
defer runTimer.Stop()
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
defer mergeCancel()
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
// its host Escalator. Its hard deadline is bound to runCtx (cancel on pass).
// nil-safe: no-op when no critic is configured or the agent doesn't enable it.
critic, stopCritic := e.startCritic(runCtx, cancel, ra, info)
critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info)
defer stopCritic()
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
@@ -222,7 +227,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
if rec != nil {
rec.OnStep(s.Index, s.Response)
}
critic.recordStep(s.Index) // keep the critic's activity clock fresh
critic.recordStep(s.Index, s.Response) // keep the critic's activity clock fresh + carry the step payload
var calls []llm.ToolCall
if s.Response != nil {
calls = s.Response.ToolCalls
@@ -273,7 +278,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
ag := agent.New(model, e.systemPrompt(ra), opts...)
runRes, runErr := ag.Run(runCtx, input, critic.steerOptions()...)
status := statusFor(runErr)
status := statusFor(runCtx, runErr)
if runRes != nil {
res.Output = runRes.Output
res.Usage = runRes.Usage
@@ -289,14 +294,18 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
return res
}
// statusFor maps a run error to a RunStats.Status, distinguishing a deadline
// (timeout) and a cancellation (cancelled — caller cancel or shutdown) from a
// generic error so audit consumers can tell them apart.
func statusFor(runErr error) string {
// statusFor maps a run error to a RunStats.Status, distinguishing a critic kill
// (killed), a deadline (timeout), and a cancellation (cancelled — caller cancel
// or shutdown) from a generic error so audit consumers can tell them apart. The
// run context's cancellation cause carries the distinction (ErrCriticKill /
// DeadlineExceeded), since ctx.Err() alone only reports Canceled.
func statusFor(runCtx context.Context, runErr error) string {
switch {
case runErr == nil:
return "ok"
case errors.Is(runErr, context.DeadlineExceeded):
case errors.Is(context.Cause(runCtx), ErrCriticKill):
return "killed"
case errors.Is(runErr, context.DeadlineExceeded) || errors.Is(context.Cause(runCtx), context.DeadlineExceeded):
return "timeout"
case errors.Is(runErr, context.Canceled):
return "cancelled"