From 390e6cf905de5f0649c5b95c671d04025a62f3ad Mon Sep 17 00:00:00 2001
From: Steve Dudenhoeffer <steve@stevedudenhoeffer.com>
Date: Sat, 27 Jun 2026 16:35:13 -0400
Subject: [PATCH] =?UTF-8?q?run:=20critic=20parity=20=E2=80=94=20fuller=20R?=
 =?UTF-8?q?ecordStep=20+=20cause-carrying=20Kill=20(distinct=20status)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completes the run-critic seam so a host adapter (mort's agentcritic) has full
fidelity, closing the two limitations gadfly surfaced on mort #1334.

- RecordStep(iter int, resp *llm.Response): the completed step's model response
  is now passed to the critic (was index-only), so a host that records a trace
  (mort's ProgressRecorder) can show what the agent actually produced, not just
  an iteration count. The executor forwards s.Response; the battery ignores it
  (its Progress is count-based).
- CriticHandle.KillCause() error + ErrCriticKill: the executor now distinguishes
  an explicit critic KILL from a natural backstop expiry. runCtx uses a
  cause-carrying cancel (WithCancelCause + a MaxRuntime timer cancelling with
  DeadlineExceeded); the deadline-watch cancels with ErrCriticKill when
  KillCause()!=nil, else DeadlineExceeded. statusFor reads context.Cause →
  killed / timeout / cancelled are now distinct (were all "cancelled"). The
  battery sets killCause from Decision.KillReason on a Kill.

Tests: statusFor "killed" case (cause=ErrCriticKill, err=Canceled); fake handle
+ battery RecordStep/KillCause signatures. Core stays battery-free.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 critic/critic.go      | 23 +++++++++++++++++++----
 critic/critic_test.go |  2 +-
 run/critic.go         | 30 ++++++++++++++++++++----------
 run/critic_test.go    | 10 ++++++++--
 run/executor.go       | 29 +++++++++++++++++++----------
 run/executor_test.go  | 21 ++++++++++++++-------
 run/ports.go          | 19 +++++++++++++++++--
 7 files changed, 98 insertions(+), 36 deletions(-)

diff --git a/critic/critic.go b/critic/critic.go
index 0c948a5..acba9ad 100644
--- a/critic/critic.go
+++ b/critic/critic.go
@@ -18,6 +18,7 @@ package critic
 
 import (
 	"context"
+	"errors"
 	"log/slog"
 	"math"
 	"sync"
@@ -162,12 +163,15 @@ type handle struct {
 	iterations   int
 	maxSteps     int // current tool-dispatch ceiling (base MaxIterations, raised by RaiseStepsBy)
 	lastTool     string
-	killed       bool // sticky: once an Escalator kills, no later decision un-kills it
+	killed       bool  // sticky: once an Escalator kills, no later decision un-kills it
+	killCause    error // non-nil once killed; surfaced via KillCause for "killed" status
 	stopped      bool
 	stopCh       chan struct{}
 }
 
-func (h *handle) RecordStep(iter int) {
+func (h *handle) RecordStep(iter int, _ *llm.Response) {
+	// This battery's Progress tracks iteration count + activity, not per-step
+	// payload, so the response is unused here; a richer Escalator could record it.
 	h.mu.Lock()
 	h.iterations = iter
 	h.lastActivity = h.now()
@@ -204,6 +208,12 @@ func (h *handle) MaxSteps() int {
 	return h.maxSteps
 }
 
+func (h *handle) KillCause() error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	return h.killCause
+}
+
 func (h *handle) Stop() {
 	h.mu.Lock()
 	if !h.stopped {
@@ -266,8 +276,13 @@ func (h *handle) tick(ctx context.Context) {
 	}
 	if d.Kill {
 		h.killed = true
-		h.deadline = h.now() // immediate hard deadline → executor cancels
-		return               // ignore any Nudge/ExtendBy paired with a Kill
+		reason := d.KillReason
+		if reason == "" {
+			reason = "critic killed the run"
+		}
+		h.killCause = errors.New(reason) // surfaced via KillCause → "killed" status
+		h.deadline = h.now()             // immediate hard deadline → executor cancels
+		return                           // ignore any Nudge/ExtendBy paired with a Kill
 	}
 	if len(d.Nudge) > 0 {
 		h.steer = append(h.steer, d.Nudge...)
diff --git a/critic/critic_test.go b/critic/critic_test.go
index e9b4b2a..e2406d8 100644
--- a/critic/critic_test.go
+++ b/critic/critic_test.go
@@ -51,7 +51,7 @@ func TestMonitorEscalatesOncePerIdlePeriodAndExtends(t *testing.T) {
 		t.Error("deadline should have been extended past the original")
 	}
 	// A fresh step re-arms; another idle period escalates again.
-	h.RecordStep(1)
+	h.RecordStep(1, nil)
 	time.Sleep(60 * time.Millisecond)
 	mu.Lock()
 	c2 := calls
diff --git a/run/critic.go b/run/critic.go
index 420a476..a8798dc 100644
--- a/run/critic.go
+++ b/run/critic.go
@@ -2,9 +2,11 @@ package run
 
 import (
 	"context"
+	"fmt"
 	"time"
 
 	"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
 )
 
 // criticDeadlineCheck is how often the deadline-watch goroutine polls the
@@ -21,12 +23,15 @@ type criticBinding struct {
 }
 
 // startCritic begins critic monitoring for this run when one is configured and
-// the agent enables it. It launches a goroutine that cancels runCtx (via cancel)
-// the moment the critic's hard deadline passes — the critic may extend that
-// deadline, so a healthy-but-slow run is given room while a hung one is killed.
-// Returns (nil, no-op stop) when there is no critic. The caller MUST defer the
-// returned stop.
-func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc, ra RunnableAgent, info RunInfo) (*criticBinding, func()) {
+// the agent enables it. It launches a goroutine that cancels runCtx (via
+// cancelCause) the moment the critic's hard deadline passes — the critic may
+// extend that deadline, so a healthy-but-slow run is given room while a hung one
+// is killed. When the deadline passes because the critic KILLED the run
+// (KillCause() != nil), the cancellation cause is ErrCriticKill (→ status
+// "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→
+// "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST
+// defer the returned stop.
+func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo) (*criticBinding, func()) {
 	noop := func() {}
 	if e.cfg.Ports.Critic == nil || !ra.Critic.Enabled {
 		return nil, noop
@@ -55,9 +60,14 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc
 				return
 			case <-t.C:
 				// A zero deadline = no hard cap (not yet set); otherwise cancel
-				// once we're at or past it.
+				// once we're at or past it, distinguishing an explicit kill from a
+				// natural backstop expiry so the run gets the right status.
 				if d := h.Deadline(); !d.IsZero() && !time.Now().Before(d) {
-					cancel()
+					if cause := h.KillCause(); cause != nil {
+						cancelCause(fmt.Errorf("%w: %s", ErrCriticKill, cause.Error()))
+					} else {
+						cancelCause(context.DeadlineExceeded)
+					}
 					return
 				}
 			}
@@ -69,9 +79,9 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc
 	}
 }
 
-func (b *criticBinding) recordStep(iter int) {
+func (b *criticBinding) recordStep(iter int, resp *llm.Response) {
 	if b != nil {
-		b.h.RecordStep(iter)
+		b.h.RecordStep(iter, resp)
 	}
 }
 
diff --git a/run/critic_test.go b/run/critic_test.go
index cd4793f..e617568 100644
--- a/run/critic_test.go
+++ b/run/critic_test.go
@@ -23,10 +23,16 @@ type fakeCriticHandle struct {
 	mu                  sync.Mutex
 	steps, tools, stops int
 	steered             int
-	maxSteps            int // 0 => defer to the run's base MaxIterations
+	maxSteps            int   // 0 => defer to the run's base MaxIterations
+	killCause           error // non-nil simulates a critic kill
 }
 
-func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
+func (h *fakeCriticHandle) RecordStep(int, *llm.Response) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
+func (h *fakeCriticHandle) KillCause() error {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	return h.killCause
+}
 func (h *fakeCriticHandle) RecordToolStart(string, string) {
 	h.mu.Lock()
 	h.tools++
diff --git a/run/executor.go b/run/executor.go
index acd24af..61400e0 100644
--- a/run/executor.go
+++ b/run/executor.go
@@ -197,15 +197,20 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	// cancellation still propagates via MergeCancellation. Created BEFORE the
 	// step observer so the observer forwards the merged run context (not a
 	// possibly-cancelled caller ctx) to OnStep consumers.
-	runCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime)
-	defer cancel()
+	// Cause-carrying cancel so a critic kill, a backstop timeout, and a caller
+	// cancel land as distinct statuses. MaxRuntime is enforced by a timer that
+	// cancels with DeadlineExceeded (preserving the old WithTimeout → "timeout").
+	runCtx, cancelCause := context.WithCancelCause(context.WithoutCancel(ctx))
+	defer cancelCause(nil)
+	runTimer := time.AfterFunc(maxRuntime, func() { cancelCause(context.DeadlineExceeded) })
+	defer runTimer.Stop()
 	runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
 	defer mergeCancel()
 
 	// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
 	// its host Escalator. Its hard deadline is bound to runCtx (cancel on pass).
 	// nil-safe: no-op when no critic is configured or the agent doesn't enable it.
-	critic, stopCritic := e.startCritic(runCtx, cancel, ra, info)
+	critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info)
 	defer stopCritic()
 
 	// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
@@ -222,7 +227,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 		if rec != nil {
 			rec.OnStep(s.Index, s.Response)
 		}
-		critic.recordStep(s.Index) // keep the critic's activity clock fresh
+		critic.recordStep(s.Index, s.Response) // keep the critic's activity clock fresh + carry the step payload
 		var calls []llm.ToolCall
 		if s.Response != nil {
 			calls = s.Response.ToolCalls
@@ -273,7 +278,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	ag := agent.New(model, e.systemPrompt(ra), opts...)
 	runRes, runErr := ag.Run(runCtx, input, critic.steerOptions()...)
 
-	status := statusFor(runErr)
+	status := statusFor(runCtx, runErr)
 	if runRes != nil {
 		res.Output = runRes.Output
 		res.Usage = runRes.Usage
@@ -289,14 +294,18 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	return res
 }
 
-// statusFor maps a run error to a RunStats.Status, distinguishing a deadline
-// (timeout) and a cancellation (cancelled — caller cancel or shutdown) from a
-// generic error so audit consumers can tell them apart.
-func statusFor(runErr error) string {
+// statusFor maps a run error to a RunStats.Status, distinguishing a critic kill
+// (killed), a deadline (timeout), and a cancellation (cancelled — caller cancel
+// or shutdown) from a generic error so audit consumers can tell them apart. The
+// run context's cancellation cause carries the distinction (ErrCriticKill /
+// DeadlineExceeded), since ctx.Err() alone only reports Canceled.
+func statusFor(runCtx context.Context, runErr error) string {
 	switch {
 	case runErr == nil:
 		return "ok"
-	case errors.Is(runErr, context.DeadlineExceeded):
+	case errors.Is(context.Cause(runCtx), ErrCriticKill):
+		return "killed"
+	case errors.Is(runErr, context.DeadlineExceeded) || errors.Is(context.Cause(runCtx), context.DeadlineExceeded):
 		return "timeout"
 	case errors.Is(runErr, context.Canceled):
 		return "cancelled"
diff --git a/run/executor_test.go b/run/executor_test.go
index 250932c..85e498b 100644
--- a/run/executor_test.go
+++ b/run/executor_test.go
@@ -148,20 +148,27 @@ func TestExecutorNilModelNoPanic(t *testing.T) {
 	}
 }
 
-// TestStatusFor maps run errors to RunStats.Status (gadfly F3).
+// TestStatusFor maps run errors + cancellation cause to RunStats.Status (gadfly F3).
 func TestStatusFor(t *testing.T) {
+	bg := context.Background()
+	// A context cancelled with the critic-kill cause: ctx.Err() is Canceled, but
+	// context.Cause carries ErrCriticKill → "killed".
+	killCtx, killCancel := context.WithCancelCause(context.Background())
+	killCancel(fmt.Errorf("%w: hung", ErrCriticKill))
 	cases := []struct {
+		ctx  context.Context
 		err  error
 		want string
 	}{
-		{nil, "ok"},
-		{context.DeadlineExceeded, "timeout"},
-		{context.Canceled, "cancelled"},
-		{fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
-		{errors.New("boom"), "error"},
+		{bg, nil, "ok"},
+		{bg, context.DeadlineExceeded, "timeout"},
+		{bg, context.Canceled, "cancelled"},
+		{bg, fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
+		{bg, errors.New("boom"), "error"},
+		{killCtx, context.Canceled, "killed"},
 	}
 	for _, c := range cases {
-		if got := statusFor(c.err); got != c.want {
+		if got := statusFor(c.ctx, c.err); got != c.want {
 			t.Errorf("statusFor(%v) = %q, want %q", c.err, got, c.want)
 		}
 	}
diff --git a/run/ports.go b/run/ports.go
index fe66c32..75d8322 100644
--- a/run/ports.go
+++ b/run/ports.go
@@ -2,6 +2,7 @@ package run
 
 import (
 	"context"
+	"errors"
 	"time"
 
 	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
@@ -9,6 +10,12 @@ import (
 	"gitea.stevedudenhoeffer.com/steve/executus/deliver"
 )
 
+// ErrCriticKill is the cancellation cause the executor stamps on a run the
+// critic kills, so a critic kill surfaces as a distinct "killed" status (vs a
+// backstop "timeout" or a caller "cancelled"). A host CriticHandle signals a
+// kill via KillCause(); the executor wraps that reason with this sentinel.
+var ErrCriticKill = errors.New("run: critic killed the run")
+
 // Ports are the host seams the run executor consumes. Every field is nil-safe:
 // a light host passes the zero Ports and gets a bounded, in-memory run with no
 // persistence, audit, budget, critic, delegation, or delivery — which is
@@ -123,8 +130,10 @@ type Critic interface {
 // methods (the critic battery's handle guards its state with a mutex).
 type CriticHandle interface {
 	// RecordStep / RecordToolStart keep the critic's activity clock fresh so a
-	// healthy-but-slow run is not mistaken for a hang.
-	RecordStep(iter int)
+	// healthy-but-slow run is not mistaken for a hang. RecordStep also carries the
+	// completed step's model response (nil-safe) so the critic's Trace can show
+	// what the agent actually produced, not just an iteration count.
+	RecordStep(iter int, resp *llm.Response)
 	RecordToolStart(name, args string)
 	// Steer returns any messages the critic wants injected into the loop (a
 	// nudge), drained before each step — matches majordomo agent.WithSteer.
@@ -137,6 +146,12 @@ type CriticHandle interface {
 	// healthy-but-long run's iteration budget mid-flight. Return <= 0 to defer to
 	// the run's base MaxIterations.
 	MaxSteps() int
+	// KillCause returns a non-nil reason iff the critic has decided to KILL this
+	// run (as opposed to letting the hard-deadline backstop expire). The executor
+	// reads it when the deadline passes: non-nil → cancel the run with
+	// ErrCriticKill (status "killed"); nil → the backstop expired naturally
+	// (status "timeout"). Hosts that never distinguish the two may return nil.
+	KillCause() error
 	// Stop ends monitoring when the run finishes.
 	Stop()
 }