From 390e6cf905de5f0649c5b95c671d04025a62f3ad Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sat, 27 Jun 2026 16:35:13 -0400 Subject: [PATCH] =?UTF-8?q?run:=20critic=20parity=20=E2=80=94=20fuller=20R?= =?UTF-8?q?ecordStep=20+=20cause-carrying=20Kill=20(distinct=20status)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the run-critic seam so a host adapter (mort's agentcritic) has full fidelity, closing the two limitations gadfly surfaced on mort #1334. - RecordStep(iter int, resp *llm.Response): the completed step's model response is now passed to the critic (was index-only), so a host that records a trace (mort's ProgressRecorder) can show what the agent actually produced, not just an iteration count. The executor forwards s.Response; the battery ignores it (its Progress is count-based). - CriticHandle.KillCause() error + ErrCriticKill: the executor now distinguishes an explicit critic KILL from a natural backstop expiry. runCtx uses a cause-carrying cancel (WithCancelCause + a MaxRuntime timer cancelling with DeadlineExceeded); the deadline-watch cancels with ErrCriticKill when KillCause()!=nil, else DeadlineExceeded. statusFor reads context.Cause → killed / timeout / cancelled are now distinct (were all "cancelled"). The battery sets killCause from Decision.KillReason on a Kill. Tests: statusFor "killed" case (cause=ErrCriticKill, err=Canceled); fake handle + battery RecordStep/KillCause signatures. Core stays battery-free. Co-Authored-By: Claude Opus 4.8 (1M context) --- critic/critic.go | 23 +++++++++++++++++++---- critic/critic_test.go | 2 +- run/critic.go | 30 ++++++++++++++++++++---------- run/critic_test.go | 10 ++++++++-- run/executor.go | 29 +++++++++++++++++++---------- run/executor_test.go | 21 ++++++++++++++------- run/ports.go | 19 +++++++++++++++++-- 7 files changed, 98 insertions(+), 36 deletions(-) diff --git a/critic/critic.go b/critic/critic.go index 0c948a5..acba9ad 100644 --- a/critic/critic.go +++ b/critic/critic.go @@ -18,6 +18,7 @@ package critic import ( "context" + "errors" "log/slog" "math" "sync" @@ -162,12 +163,15 @@ type handle struct { iterations int maxSteps int // current tool-dispatch ceiling (base MaxIterations, raised by RaiseStepsBy) lastTool string - killed bool // sticky: once an Escalator kills, no later decision un-kills it + killed bool // sticky: once an Escalator kills, no later decision un-kills it + killCause error // non-nil once killed; surfaced via KillCause for "killed" status stopped bool stopCh chan struct{} } -func (h *handle) RecordStep(iter int) { +func (h *handle) RecordStep(iter int, _ *llm.Response) { + // This battery's Progress tracks iteration count + activity, not per-step + // payload, so the response is unused here; a richer Escalator could record it. h.mu.Lock() h.iterations = iter h.lastActivity = h.now() @@ -204,6 +208,12 @@ func (h *handle) MaxSteps() int { return h.maxSteps } +func (h *handle) KillCause() error { + h.mu.Lock() + defer h.mu.Unlock() + return h.killCause +} + func (h *handle) Stop() { h.mu.Lock() if !h.stopped { @@ -266,8 +276,13 @@ func (h *handle) tick(ctx context.Context) { } if d.Kill { h.killed = true - h.deadline = h.now() // immediate hard deadline → executor cancels - return // ignore any Nudge/ExtendBy paired with a Kill + reason := d.KillReason + if reason == "" { + reason = "critic killed the run" + } + h.killCause = errors.New(reason) // surfaced via KillCause → "killed" status + h.deadline = h.now() // immediate hard deadline → executor cancels + return // ignore any Nudge/ExtendBy paired with a Kill } if len(d.Nudge) > 0 { h.steer = append(h.steer, d.Nudge...) diff --git a/critic/critic_test.go b/critic/critic_test.go index e9b4b2a..e2406d8 100644 --- a/critic/critic_test.go +++ b/critic/critic_test.go @@ -51,7 +51,7 @@ func TestMonitorEscalatesOncePerIdlePeriodAndExtends(t *testing.T) { t.Error("deadline should have been extended past the original") } // A fresh step re-arms; another idle period escalates again. - h.RecordStep(1) + h.RecordStep(1, nil) time.Sleep(60 * time.Millisecond) mu.Lock() c2 := calls diff --git a/run/critic.go b/run/critic.go index 420a476..a8798dc 100644 --- a/run/critic.go +++ b/run/critic.go @@ -2,9 +2,11 @@ package run import ( "context" + "fmt" "time" "gitea.stevedudenhoeffer.com/steve/majordomo/agent" + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // criticDeadlineCheck is how often the deadline-watch goroutine polls the @@ -21,12 +23,15 @@ type criticBinding struct { } // startCritic begins critic monitoring for this run when one is configured and -// the agent enables it. It launches a goroutine that cancels runCtx (via cancel) -// the moment the critic's hard deadline passes — the critic may extend that -// deadline, so a healthy-but-slow run is given room while a hung one is killed. -// Returns (nil, no-op stop) when there is no critic. The caller MUST defer the -// returned stop. -func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc, ra RunnableAgent, info RunInfo) (*criticBinding, func()) { +// the agent enables it. It launches a goroutine that cancels runCtx (via +// cancelCause) the moment the critic's hard deadline passes — the critic may +// extend that deadline, so a healthy-but-slow run is given room while a hung one +// is killed. When the deadline passes because the critic KILLED the run +// (KillCause() != nil), the cancellation cause is ErrCriticKill (→ status +// "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→ +// "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST +// defer the returned stop. +func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo) (*criticBinding, func()) { noop := func() {} if e.cfg.Ports.Critic == nil || !ra.Critic.Enabled { return nil, noop @@ -55,9 +60,14 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc return case <-t.C: // A zero deadline = no hard cap (not yet set); otherwise cancel - // once we're at or past it. + // once we're at or past it, distinguishing an explicit kill from a + // natural backstop expiry so the run gets the right status. if d := h.Deadline(); !d.IsZero() && !time.Now().Before(d) { - cancel() + if cause := h.KillCause(); cause != nil { + cancelCause(fmt.Errorf("%w: %s", ErrCriticKill, cause.Error())) + } else { + cancelCause(context.DeadlineExceeded) + } return } } @@ -69,9 +79,9 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc } } -func (b *criticBinding) recordStep(iter int) { +func (b *criticBinding) recordStep(iter int, resp *llm.Response) { if b != nil { - b.h.RecordStep(iter) + b.h.RecordStep(iter, resp) } } diff --git a/run/critic_test.go b/run/critic_test.go index cd4793f..e617568 100644 --- a/run/critic_test.go +++ b/run/critic_test.go @@ -23,10 +23,16 @@ type fakeCriticHandle struct { mu sync.Mutex steps, tools, stops int steered int - maxSteps int // 0 => defer to the run's base MaxIterations + maxSteps int // 0 => defer to the run's base MaxIterations + killCause error // non-nil simulates a critic kill } -func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() } +func (h *fakeCriticHandle) RecordStep(int, *llm.Response) { h.mu.Lock(); h.steps++; h.mu.Unlock() } +func (h *fakeCriticHandle) KillCause() error { + h.mu.Lock() + defer h.mu.Unlock() + return h.killCause +} func (h *fakeCriticHandle) RecordToolStart(string, string) { h.mu.Lock() h.tools++ diff --git a/run/executor.go b/run/executor.go index acd24af..61400e0 100644 --- a/run/executor.go +++ b/run/executor.go @@ -197,15 +197,20 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio // cancellation still propagates via MergeCancellation. Created BEFORE the // step observer so the observer forwards the merged run context (not a // possibly-cancelled caller ctx) to OnStep consumers. - runCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime) - defer cancel() + // Cause-carrying cancel so a critic kill, a backstop timeout, and a caller + // cancel land as distinct statuses. MaxRuntime is enforced by a timer that + // cancels with DeadlineExceeded (preserving the old WithTimeout → "timeout"). + runCtx, cancelCause := context.WithCancelCause(context.WithoutCancel(ctx)) + defer cancelCause(nil) + runTimer := time.AfterFunc(maxRuntime, func() { cancelCause(context.DeadlineExceeded) }) + defer runTimer.Stop() runCtx, mergeCancel := MergeCancellation(runCtx, ctx) defer mergeCancel() // Critic (optional): monitors the run for a stall, can nudge/extend/kill via // its host Escalator. Its hard deadline is bound to runCtx (cancel on pass). // nil-safe: no-op when no critic is configured or the agent doesn't enable it. - critic, stopCritic := e.startCritic(runCtx, cancel, ra, info) + critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info) defer stopCritic() // Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the @@ -222,7 +227,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio if rec != nil { rec.OnStep(s.Index, s.Response) } - critic.recordStep(s.Index) // keep the critic's activity clock fresh + critic.recordStep(s.Index, s.Response) // keep the critic's activity clock fresh + carry the step payload var calls []llm.ToolCall if s.Response != nil { calls = s.Response.ToolCalls @@ -273,7 +278,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio ag := agent.New(model, e.systemPrompt(ra), opts...) runRes, runErr := ag.Run(runCtx, input, critic.steerOptions()...) - status := statusFor(runErr) + status := statusFor(runCtx, runErr) if runRes != nil { res.Output = runRes.Output res.Usage = runRes.Usage @@ -289,14 +294,18 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio return res } -// statusFor maps a run error to a RunStats.Status, distinguishing a deadline -// (timeout) and a cancellation (cancelled — caller cancel or shutdown) from a -// generic error so audit consumers can tell them apart. -func statusFor(runErr error) string { +// statusFor maps a run error to a RunStats.Status, distinguishing a critic kill +// (killed), a deadline (timeout), and a cancellation (cancelled — caller cancel +// or shutdown) from a generic error so audit consumers can tell them apart. The +// run context's cancellation cause carries the distinction (ErrCriticKill / +// DeadlineExceeded), since ctx.Err() alone only reports Canceled. +func statusFor(runCtx context.Context, runErr error) string { switch { case runErr == nil: return "ok" - case errors.Is(runErr, context.DeadlineExceeded): + case errors.Is(context.Cause(runCtx), ErrCriticKill): + return "killed" + case errors.Is(runErr, context.DeadlineExceeded) || errors.Is(context.Cause(runCtx), context.DeadlineExceeded): return "timeout" case errors.Is(runErr, context.Canceled): return "cancelled" diff --git a/run/executor_test.go b/run/executor_test.go index 250932c..85e498b 100644 --- a/run/executor_test.go +++ b/run/executor_test.go @@ -148,20 +148,27 @@ func TestExecutorNilModelNoPanic(t *testing.T) { } } -// TestStatusFor maps run errors to RunStats.Status (gadfly F3). +// TestStatusFor maps run errors + cancellation cause to RunStats.Status (gadfly F3). func TestStatusFor(t *testing.T) { + bg := context.Background() + // A context cancelled with the critic-kill cause: ctx.Err() is Canceled, but + // context.Cause carries ErrCriticKill → "killed". + killCtx, killCancel := context.WithCancelCause(context.Background()) + killCancel(fmt.Errorf("%w: hung", ErrCriticKill)) cases := []struct { + ctx context.Context err error want string }{ - {nil, "ok"}, - {context.DeadlineExceeded, "timeout"}, - {context.Canceled, "cancelled"}, - {fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"}, - {errors.New("boom"), "error"}, + {bg, nil, "ok"}, + {bg, context.DeadlineExceeded, "timeout"}, + {bg, context.Canceled, "cancelled"}, + {bg, fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"}, + {bg, errors.New("boom"), "error"}, + {killCtx, context.Canceled, "killed"}, } for _, c := range cases { - if got := statusFor(c.err); got != c.want { + if got := statusFor(c.ctx, c.err); got != c.want { t.Errorf("statusFor(%v) = %q, want %q", c.err, got, c.want) } } diff --git a/run/ports.go b/run/ports.go index fe66c32..75d8322 100644 --- a/run/ports.go +++ b/run/ports.go @@ -2,6 +2,7 @@ package run import ( "context" + "errors" "time" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" @@ -9,6 +10,12 @@ import ( "gitea.stevedudenhoeffer.com/steve/executus/deliver" ) +// ErrCriticKill is the cancellation cause the executor stamps on a run the +// critic kills, so a critic kill surfaces as a distinct "killed" status (vs a +// backstop "timeout" or a caller "cancelled"). A host CriticHandle signals a +// kill via KillCause(); the executor wraps that reason with this sentinel. +var ErrCriticKill = errors.New("run: critic killed the run") + // Ports are the host seams the run executor consumes. Every field is nil-safe: // a light host passes the zero Ports and gets a bounded, in-memory run with no // persistence, audit, budget, critic, delegation, or delivery — which is @@ -123,8 +130,10 @@ type Critic interface { // methods (the critic battery's handle guards its state with a mutex). type CriticHandle interface { // RecordStep / RecordToolStart keep the critic's activity clock fresh so a - // healthy-but-slow run is not mistaken for a hang. - RecordStep(iter int) + // healthy-but-slow run is not mistaken for a hang. RecordStep also carries the + // completed step's model response (nil-safe) so the critic's Trace can show + // what the agent actually produced, not just an iteration count. + RecordStep(iter int, resp *llm.Response) RecordToolStart(name, args string) // Steer returns any messages the critic wants injected into the loop (a // nudge), drained before each step — matches majordomo agent.WithSteer. @@ -137,6 +146,12 @@ type CriticHandle interface { // healthy-but-long run's iteration budget mid-flight. Return <= 0 to defer to // the run's base MaxIterations. MaxSteps() int + // KillCause returns a non-nil reason iff the critic has decided to KILL this + // run (as opposed to letting the hard-deadline backstop expire). The executor + // reads it when the deadline passes: non-nil → cancel the run with + // ErrCriticKill (status "killed"); nil → the backstop expired naturally + // (status "timeout"). Hosts that never distinguish the two may return nil. + KillCause() error // Stop ends monitoring when the run finishes. Stop() }