run: critic parity — fuller RecordStep + cause-carrying Kill (distinct status)
Completes the run-critic seam so a host adapter (mort's agentcritic) has full fidelity, closing the two limitations gadfly surfaced on mort #1334. - RecordStep(iter int, resp *llm.Response): the completed step's model response is now passed to the critic (was index-only), so a host that records a trace (mort's ProgressRecorder) can show what the agent actually produced, not just an iteration count. The executor forwards s.Response; the battery ignores it (its Progress is count-based). - CriticHandle.KillCause() error + ErrCriticKill: the executor now distinguishes an explicit critic KILL from a natural backstop expiry. runCtx uses a cause-carrying cancel (WithCancelCause + a MaxRuntime timer cancelling with DeadlineExceeded); the deadline-watch cancels with ErrCriticKill when KillCause()!=nil, else DeadlineExceeded. statusFor reads context.Cause → killed / timeout / cancelled are now distinct (were all "cancelled"). The battery sets killCause from Decision.KillReason on a Kill. Tests: statusFor "killed" case (cause=ErrCriticKill, err=Canceled); fake handle + battery RecordStep/KillCause signatures. Core stays battery-free. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+16
-1
@@ -18,6 +18,7 @@ package critic
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math"
|
"math"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -163,11 +164,14 @@ type handle struct {
|
|||||||
maxSteps int // current tool-dispatch ceiling (base MaxIterations, raised by RaiseStepsBy)
|
maxSteps int // current tool-dispatch ceiling (base MaxIterations, raised by RaiseStepsBy)
|
||||||
lastTool string
|
lastTool string
|
||||||
killed bool // sticky: once an Escalator kills, no later decision un-kills it
|
killed bool // sticky: once an Escalator kills, no later decision un-kills it
|
||||||
|
killCause error // non-nil once killed; surfaced via KillCause for "killed" status
|
||||||
stopped bool
|
stopped bool
|
||||||
stopCh chan struct{}
|
stopCh chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *handle) RecordStep(iter int) {
|
func (h *handle) RecordStep(iter int, _ *llm.Response) {
|
||||||
|
// This battery's Progress tracks iteration count + activity, not per-step
|
||||||
|
// payload, so the response is unused here; a richer Escalator could record it.
|
||||||
h.mu.Lock()
|
h.mu.Lock()
|
||||||
h.iterations = iter
|
h.iterations = iter
|
||||||
h.lastActivity = h.now()
|
h.lastActivity = h.now()
|
||||||
@@ -204,6 +208,12 @@ func (h *handle) MaxSteps() int {
|
|||||||
return h.maxSteps
|
return h.maxSteps
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handle) KillCause() error {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
return h.killCause
|
||||||
|
}
|
||||||
|
|
||||||
func (h *handle) Stop() {
|
func (h *handle) Stop() {
|
||||||
h.mu.Lock()
|
h.mu.Lock()
|
||||||
if !h.stopped {
|
if !h.stopped {
|
||||||
@@ -266,6 +276,11 @@ func (h *handle) tick(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
if d.Kill {
|
if d.Kill {
|
||||||
h.killed = true
|
h.killed = true
|
||||||
|
reason := d.KillReason
|
||||||
|
if reason == "" {
|
||||||
|
reason = "critic killed the run"
|
||||||
|
}
|
||||||
|
h.killCause = errors.New(reason) // surfaced via KillCause → "killed" status
|
||||||
h.deadline = h.now() // immediate hard deadline → executor cancels
|
h.deadline = h.now() // immediate hard deadline → executor cancels
|
||||||
return // ignore any Nudge/ExtendBy paired with a Kill
|
return // ignore any Nudge/ExtendBy paired with a Kill
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ func TestMonitorEscalatesOncePerIdlePeriodAndExtends(t *testing.T) {
|
|||||||
t.Error("deadline should have been extended past the original")
|
t.Error("deadline should have been extended past the original")
|
||||||
}
|
}
|
||||||
// A fresh step re-arms; another idle period escalates again.
|
// A fresh step re-arms; another idle period escalates again.
|
||||||
h.RecordStep(1)
|
h.RecordStep(1, nil)
|
||||||
time.Sleep(60 * time.Millisecond)
|
time.Sleep(60 * time.Millisecond)
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
c2 := calls
|
c2 := calls
|
||||||
|
|||||||
+20
-10
@@ -2,9 +2,11 @@ package run
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
|
"gitea.stevedudenhoeffer.com/steve/majordomo/agent"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
// criticDeadlineCheck is how often the deadline-watch goroutine polls the
|
// criticDeadlineCheck is how often the deadline-watch goroutine polls the
|
||||||
@@ -21,12 +23,15 @@ type criticBinding struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// startCritic begins critic monitoring for this run when one is configured and
|
// startCritic begins critic monitoring for this run when one is configured and
|
||||||
// the agent enables it. It launches a goroutine that cancels runCtx (via cancel)
|
// the agent enables it. It launches a goroutine that cancels runCtx (via
|
||||||
// the moment the critic's hard deadline passes — the critic may extend that
|
// cancelCause) the moment the critic's hard deadline passes — the critic may
|
||||||
// deadline, so a healthy-but-slow run is given room while a hung one is killed.
|
// extend that deadline, so a healthy-but-slow run is given room while a hung one
|
||||||
// Returns (nil, no-op stop) when there is no critic. The caller MUST defer the
|
// is killed. When the deadline passes because the critic KILLED the run
|
||||||
// returned stop.
|
// (KillCause() != nil), the cancellation cause is ErrCriticKill (→ status
|
||||||
func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc, ra RunnableAgent, info RunInfo) (*criticBinding, func()) {
|
// "killed"); when the backstop simply expired, it is context.DeadlineExceeded (→
|
||||||
|
// "timeout"). Returns (nil, no-op stop) when there is no critic. The caller MUST
|
||||||
|
// defer the returned stop.
|
||||||
|
func (e *Executor) startCritic(runCtx context.Context, cancelCause context.CancelCauseFunc, ra RunnableAgent, info RunInfo) (*criticBinding, func()) {
|
||||||
noop := func() {}
|
noop := func() {}
|
||||||
if e.cfg.Ports.Critic == nil || !ra.Critic.Enabled {
|
if e.cfg.Ports.Critic == nil || !ra.Critic.Enabled {
|
||||||
return nil, noop
|
return nil, noop
|
||||||
@@ -55,9 +60,14 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc
|
|||||||
return
|
return
|
||||||
case <-t.C:
|
case <-t.C:
|
||||||
// A zero deadline = no hard cap (not yet set); otherwise cancel
|
// A zero deadline = no hard cap (not yet set); otherwise cancel
|
||||||
// once we're at or past it.
|
// once we're at or past it, distinguishing an explicit kill from a
|
||||||
|
// natural backstop expiry so the run gets the right status.
|
||||||
if d := h.Deadline(); !d.IsZero() && !time.Now().Before(d) {
|
if d := h.Deadline(); !d.IsZero() && !time.Now().Before(d) {
|
||||||
cancel()
|
if cause := h.KillCause(); cause != nil {
|
||||||
|
cancelCause(fmt.Errorf("%w: %s", ErrCriticKill, cause.Error()))
|
||||||
|
} else {
|
||||||
|
cancelCause(context.DeadlineExceeded)
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -69,9 +79,9 @@ func (e *Executor) startCritic(runCtx context.Context, cancel context.CancelFunc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *criticBinding) recordStep(iter int) {
|
func (b *criticBinding) recordStep(iter int, resp *llm.Response) {
|
||||||
if b != nil {
|
if b != nil {
|
||||||
b.h.RecordStep(iter)
|
b.h.RecordStep(iter, resp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+7
-1
@@ -24,9 +24,15 @@ type fakeCriticHandle struct {
|
|||||||
steps, tools, stops int
|
steps, tools, stops int
|
||||||
steered int
|
steered int
|
||||||
maxSteps int // 0 => defer to the run's base MaxIterations
|
maxSteps int // 0 => defer to the run's base MaxIterations
|
||||||
|
killCause error // non-nil simulates a critic kill
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
|
func (h *fakeCriticHandle) RecordStep(int, *llm.Response) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
|
||||||
|
func (h *fakeCriticHandle) KillCause() error {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
return h.killCause
|
||||||
|
}
|
||||||
func (h *fakeCriticHandle) RecordToolStart(string, string) {
|
func (h *fakeCriticHandle) RecordToolStart(string, string) {
|
||||||
h.mu.Lock()
|
h.mu.Lock()
|
||||||
h.tools++
|
h.tools++
|
||||||
|
|||||||
+19
-10
@@ -197,15 +197,20 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
|||||||
// cancellation still propagates via MergeCancellation. Created BEFORE the
|
// cancellation still propagates via MergeCancellation. Created BEFORE the
|
||||||
// step observer so the observer forwards the merged run context (not a
|
// step observer so the observer forwards the merged run context (not a
|
||||||
// possibly-cancelled caller ctx) to OnStep consumers.
|
// possibly-cancelled caller ctx) to OnStep consumers.
|
||||||
runCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime)
|
// Cause-carrying cancel so a critic kill, a backstop timeout, and a caller
|
||||||
defer cancel()
|
// cancel land as distinct statuses. MaxRuntime is enforced by a timer that
|
||||||
|
// cancels with DeadlineExceeded (preserving the old WithTimeout → "timeout").
|
||||||
|
runCtx, cancelCause := context.WithCancelCause(context.WithoutCancel(ctx))
|
||||||
|
defer cancelCause(nil)
|
||||||
|
runTimer := time.AfterFunc(maxRuntime, func() { cancelCause(context.DeadlineExceeded) })
|
||||||
|
defer runTimer.Stop()
|
||||||
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
|
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
|
||||||
defer mergeCancel()
|
defer mergeCancel()
|
||||||
|
|
||||||
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
|
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
|
||||||
// its host Escalator. Its hard deadline is bound to runCtx (cancel on pass).
|
// its host Escalator. Its hard deadline is bound to runCtx (cancel on pass).
|
||||||
// nil-safe: no-op when no critic is configured or the agent doesn't enable it.
|
// nil-safe: no-op when no critic is configured or the agent doesn't enable it.
|
||||||
critic, stopCritic := e.startCritic(runCtx, cancel, ra, info)
|
critic, stopCritic := e.startCritic(runCtx, cancelCause, ra, info)
|
||||||
defer stopCritic()
|
defer stopCritic()
|
||||||
|
|
||||||
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
|
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
|
||||||
@@ -222,7 +227,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
|||||||
if rec != nil {
|
if rec != nil {
|
||||||
rec.OnStep(s.Index, s.Response)
|
rec.OnStep(s.Index, s.Response)
|
||||||
}
|
}
|
||||||
critic.recordStep(s.Index) // keep the critic's activity clock fresh
|
critic.recordStep(s.Index, s.Response) // keep the critic's activity clock fresh + carry the step payload
|
||||||
var calls []llm.ToolCall
|
var calls []llm.ToolCall
|
||||||
if s.Response != nil {
|
if s.Response != nil {
|
||||||
calls = s.Response.ToolCalls
|
calls = s.Response.ToolCalls
|
||||||
@@ -273,7 +278,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
|||||||
ag := agent.New(model, e.systemPrompt(ra), opts...)
|
ag := agent.New(model, e.systemPrompt(ra), opts...)
|
||||||
runRes, runErr := ag.Run(runCtx, input, critic.steerOptions()...)
|
runRes, runErr := ag.Run(runCtx, input, critic.steerOptions()...)
|
||||||
|
|
||||||
status := statusFor(runErr)
|
status := statusFor(runCtx, runErr)
|
||||||
if runRes != nil {
|
if runRes != nil {
|
||||||
res.Output = runRes.Output
|
res.Output = runRes.Output
|
||||||
res.Usage = runRes.Usage
|
res.Usage = runRes.Usage
|
||||||
@@ -289,14 +294,18 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
|||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
// statusFor maps a run error to a RunStats.Status, distinguishing a deadline
|
// statusFor maps a run error to a RunStats.Status, distinguishing a critic kill
|
||||||
// (timeout) and a cancellation (cancelled — caller cancel or shutdown) from a
|
// (killed), a deadline (timeout), and a cancellation (cancelled — caller cancel
|
||||||
// generic error so audit consumers can tell them apart.
|
// or shutdown) from a generic error so audit consumers can tell them apart. The
|
||||||
func statusFor(runErr error) string {
|
// run context's cancellation cause carries the distinction (ErrCriticKill /
|
||||||
|
// DeadlineExceeded), since ctx.Err() alone only reports Canceled.
|
||||||
|
func statusFor(runCtx context.Context, runErr error) string {
|
||||||
switch {
|
switch {
|
||||||
case runErr == nil:
|
case runErr == nil:
|
||||||
return "ok"
|
return "ok"
|
||||||
case errors.Is(runErr, context.DeadlineExceeded):
|
case errors.Is(context.Cause(runCtx), ErrCriticKill):
|
||||||
|
return "killed"
|
||||||
|
case errors.Is(runErr, context.DeadlineExceeded) || errors.Is(context.Cause(runCtx), context.DeadlineExceeded):
|
||||||
return "timeout"
|
return "timeout"
|
||||||
case errors.Is(runErr, context.Canceled):
|
case errors.Is(runErr, context.Canceled):
|
||||||
return "cancelled"
|
return "cancelled"
|
||||||
|
|||||||
+14
-7
@@ -148,20 +148,27 @@ func TestExecutorNilModelNoPanic(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestStatusFor maps run errors to RunStats.Status (gadfly F3).
|
// TestStatusFor maps run errors + cancellation cause to RunStats.Status (gadfly F3).
|
||||||
func TestStatusFor(t *testing.T) {
|
func TestStatusFor(t *testing.T) {
|
||||||
|
bg := context.Background()
|
||||||
|
// A context cancelled with the critic-kill cause: ctx.Err() is Canceled, but
|
||||||
|
// context.Cause carries ErrCriticKill → "killed".
|
||||||
|
killCtx, killCancel := context.WithCancelCause(context.Background())
|
||||||
|
killCancel(fmt.Errorf("%w: hung", ErrCriticKill))
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
|
ctx context.Context
|
||||||
err error
|
err error
|
||||||
want string
|
want string
|
||||||
}{
|
}{
|
||||||
{nil, "ok"},
|
{bg, nil, "ok"},
|
||||||
{context.DeadlineExceeded, "timeout"},
|
{bg, context.DeadlineExceeded, "timeout"},
|
||||||
{context.Canceled, "cancelled"},
|
{bg, context.Canceled, "cancelled"},
|
||||||
{fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
|
{bg, fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
|
||||||
{errors.New("boom"), "error"},
|
{bg, errors.New("boom"), "error"},
|
||||||
|
{killCtx, context.Canceled, "killed"},
|
||||||
}
|
}
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
if got := statusFor(c.err); got != c.want {
|
if got := statusFor(c.ctx, c.err); got != c.want {
|
||||||
t.Errorf("statusFor(%v) = %q, want %q", c.err, got, c.want)
|
t.Errorf("statusFor(%v) = %q, want %q", c.err, got, c.want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+17
-2
@@ -2,6 +2,7 @@ package run
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||||
@@ -9,6 +10,12 @@ import (
|
|||||||
"gitea.stevedudenhoeffer.com/steve/executus/deliver"
|
"gitea.stevedudenhoeffer.com/steve/executus/deliver"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ErrCriticKill is the cancellation cause the executor stamps on a run the
|
||||||
|
// critic kills, so a critic kill surfaces as a distinct "killed" status (vs a
|
||||||
|
// backstop "timeout" or a caller "cancelled"). A host CriticHandle signals a
|
||||||
|
// kill via KillCause(); the executor wraps that reason with this sentinel.
|
||||||
|
var ErrCriticKill = errors.New("run: critic killed the run")
|
||||||
|
|
||||||
// Ports are the host seams the run executor consumes. Every field is nil-safe:
|
// Ports are the host seams the run executor consumes. Every field is nil-safe:
|
||||||
// a light host passes the zero Ports and gets a bounded, in-memory run with no
|
// a light host passes the zero Ports and gets a bounded, in-memory run with no
|
||||||
// persistence, audit, budget, critic, delegation, or delivery — which is
|
// persistence, audit, budget, critic, delegation, or delivery — which is
|
||||||
@@ -123,8 +130,10 @@ type Critic interface {
|
|||||||
// methods (the critic battery's handle guards its state with a mutex).
|
// methods (the critic battery's handle guards its state with a mutex).
|
||||||
type CriticHandle interface {
|
type CriticHandle interface {
|
||||||
// RecordStep / RecordToolStart keep the critic's activity clock fresh so a
|
// RecordStep / RecordToolStart keep the critic's activity clock fresh so a
|
||||||
// healthy-but-slow run is not mistaken for a hang.
|
// healthy-but-slow run is not mistaken for a hang. RecordStep also carries the
|
||||||
RecordStep(iter int)
|
// completed step's model response (nil-safe) so the critic's Trace can show
|
||||||
|
// what the agent actually produced, not just an iteration count.
|
||||||
|
RecordStep(iter int, resp *llm.Response)
|
||||||
RecordToolStart(name, args string)
|
RecordToolStart(name, args string)
|
||||||
// Steer returns any messages the critic wants injected into the loop (a
|
// Steer returns any messages the critic wants injected into the loop (a
|
||||||
// nudge), drained before each step — matches majordomo agent.WithSteer.
|
// nudge), drained before each step — matches majordomo agent.WithSteer.
|
||||||
@@ -137,6 +146,12 @@ type CriticHandle interface {
|
|||||||
// healthy-but-long run's iteration budget mid-flight. Return <= 0 to defer to
|
// healthy-but-long run's iteration budget mid-flight. Return <= 0 to defer to
|
||||||
// the run's base MaxIterations.
|
// the run's base MaxIterations.
|
||||||
MaxSteps() int
|
MaxSteps() int
|
||||||
|
// KillCause returns a non-nil reason iff the critic has decided to KILL this
|
||||||
|
// run (as opposed to letting the hard-deadline backstop expire). The executor
|
||||||
|
// reads it when the deadline passes: non-nil → cancel the run with
|
||||||
|
// ErrCriticKill (status "killed"); nil → the backstop expired naturally
|
||||||
|
// (status "timeout"). Hosts that never distinguish the two may return nil.
|
||||||
|
KillCause() error
|
||||||
// Stop ends monitoring when the run finishes.
|
// Stop ends monitoring when the run finishes.
|
||||||
Stop()
|
Stop()
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user