run: critic parity — fuller RecordStep + cause-carrying Kill (distinct status)

Completes the run-critic seam so a host adapter (mort's agentcritic) has full fidelity, closing the two limitations gadfly surfaced on mort #1334. - RecordStep(iter int, resp *llm.Response): the completed step's model response is now passed to the critic (was index-only), so a host that records a trace (mort's ProgressRecorder) can show what the agent actually produced, not just an iteration count. The executor forwards s.Response; the battery ignores it (its Progress is count-based). - CriticHandle.KillCause() error + ErrCriticKill: the executor now distinguishes an explicit critic KILL from a natural backstop expiry. runCtx uses a cause-carrying cancel (WithCancelCause + a MaxRuntime timer cancelling with DeadlineExceeded); the deadline-watch cancels with ErrCriticKill when KillCause()!=nil, else DeadlineExceeded. statusFor reads context.Cause → killed / timeout / cancelled are now distinct (were all "cancelled"). The battery sets killCause from Decision.KillReason on a Kill. Tests: statusFor "killed" case (cause=ErrCriticKill, err=Canceled); fake handle + battery RecordStep/KillCause signatures. Core stays battery-free. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 16:35:13 -04:00
parent 1a1d5e417b
commit 390e6cf905
7 changed files with 98 additions and 36 deletions
@@ -197,15 +197,20 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	// cancellation still propagates via MergeCancellation. Created BEFORE the
 	// step observer so the observer forwards the merged run context (not a
 	// possibly-cancelled caller ctx) to OnStep consumers.
-	runCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime)
-	defer cancel()
+	// Cause-carrying cancel so a critic kill, a backstop timeout, and a caller
+	// cancel land as distinct statuses. MaxRuntime is enforced by a timer that
+	// cancels with DeadlineExceeded (preserving the old WithTimeout → "timeout").
+	runCtx, cancelCause := context.WithCancelCause(context.WithoutCancel(ctx))
+	defer cancelCause(nil)
+	runTimer := time.AfterFunc(maxRuntime, func() { cancelCause(context.DeadlineExceeded) })
+	defer runTimer.Stop()
 	runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
 	defer mergeCancel()

 	// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
 	// its host Escalator. Its hard deadline is bound to runCtx (cancel on pass).
 	// nil-safe: no-op when no critic is configured or the agent doesn't enable it.
-	critic, stopCritic := e.startCritic(runCtx, cancel, ra, info)
+	critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info)
 	defer stopCritic()

 	// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
@@ -222,7 +227,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 		if rec != nil {
 			rec.OnStep(s.Index, s.Response)
 		}
-		critic.recordStep(s.Index) // keep the critic's activity clock fresh
+		critic.recordStep(s.Index, s.Response) // keep the critic's activity clock fresh + carry the step payload
 		var calls []llm.ToolCall
 		if s.Response != nil {
 			calls = s.Response.ToolCalls
@@ -273,7 +278,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	ag := agent.New(model, e.systemPrompt(ra), opts...)
 	runRes, runErr := ag.Run(runCtx, input, critic.steerOptions()...)

-	status := statusFor(runErr)
+	status := statusFor(runCtx, runErr)
 	if runRes != nil {
 		res.Output = runRes.Output
 		res.Usage = runRes.Usage
@@ -289,14 +294,18 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	return res
 }

-// statusFor maps a run error to a RunStats.Status, distinguishing a deadline
-// (timeout) and a cancellation (cancelled — caller cancel or shutdown) from a
-// generic error so audit consumers can tell them apart.
-func statusFor(runErr error) string {
+// statusFor maps a run error to a RunStats.Status, distinguishing a critic kill
+// (killed), a deadline (timeout), and a cancellation (cancelled — caller cancel
+// or shutdown) from a generic error so audit consumers can tell them apart. The
+// run context's cancellation cause carries the distinction (ErrCriticKill /
+// DeadlineExceeded), since ctx.Err() alone only reports Canceled.
+func statusFor(runCtx context.Context, runErr error) string {
 	switch {
 	case runErr == nil:
 		return "ok"
-	case errors.Is(runErr, context.DeadlineExceeded):
+	case errors.Is(context.Cause(runCtx), ErrCriticKill):
+		return "killed"
+	case errors.Is(runErr, context.DeadlineExceeded) || errors.Is(context.Cause(runCtx), context.DeadlineExceeded):
 		return "timeout"
 	case errors.Is(runErr, context.Canceled):
 		return "cancelled"