run: critic can raise a run's step ceiling mid-flight (CriticHandle.MaxSteps)
Prerequisite for a full-fidelity mort agentcritic adapter (which adjusts a healthy-but-long run's iteration budget, not just its deadline). executus's CriticHandle was deadline+steer only; this adds the dynamic step ceiling above an unchanged majordomo (which already exposes WithMaxStepsFunc). - run.RunInfo += MaxIterations (the run's base ceiling, so a critic can raise it relative to the baseline). - run.CriticHandle += MaxSteps() int — polled by the executor each step via agent.WithMaxStepsFunc; <=0 defers to the base. The executor uses WithMaxStepsFunc(critic.MaxSteps) when a critic is active, else WithMaxSteps. - critic battery: handle.maxSteps (initialised from RunInfo.MaxIterations) + MaxSteps(); Decision gains RaiseStepsBy so an Escalator can raise the ceiling alongside ExtendBy. ExtendOnce default is unchanged (time-only). Test: a critic returning MaxSteps=5 lets a base-MaxIterations=1 run complete two tool-dispatch steps past the base ceiling. Core stays battery-free (run doesn't import critic). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+16
-2
@@ -10,8 +10,10 @@
|
|||||||
// Mort plugs its LLM critic-agent in as an Escalator; ExtendOnce is the
|
// Mort plugs its LLM critic-agent in as an Escalator; ExtendOnce is the
|
||||||
// zero-dependency default.
|
// zero-dependency default.
|
||||||
//
|
//
|
||||||
// NOTE: the executor's call into run.Ports.Critic is a P2 follow-up; this
|
// The executor wires run.Ports.Critic (C0b): it feeds the handle activity,
|
||||||
// battery provides the seam + impl ahead of that wiring.
|
// binds the run context to its extendable Deadline, drains its Steer, and polls
|
||||||
|
// MaxSteps each step so an Escalator can also raise a long run's step ceiling
|
||||||
|
// (Decision.RaiseStepsBy).
|
||||||
package critic
|
package critic
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -38,6 +40,7 @@ type Progress struct {
|
|||||||
type Decision struct {
|
type Decision struct {
|
||||||
Nudge []llm.Message // injected before the agent's next turn (a steer)
|
Nudge []llm.Message // injected before the agent's next turn (a steer)
|
||||||
ExtendBy time.Duration // push the hard deadline out by this much
|
ExtendBy time.Duration // push the hard deadline out by this much
|
||||||
|
RaiseStepsBy int // raise the run's tool-dispatch step ceiling by this
|
||||||
Kill bool // cancel the run now
|
Kill bool // cancel the run now
|
||||||
KillReason string
|
KillReason string
|
||||||
}
|
}
|
||||||
@@ -136,6 +139,7 @@ func (s *System) Monitor(ctx context.Context, info run.RunInfo, softTimeout time
|
|||||||
now: s.now,
|
now: s.now,
|
||||||
lastActivity: now,
|
lastActivity: now,
|
||||||
deadline: now.Add(time.Duration(float64(softTimeout) * s.backstopMul)),
|
deadline: now.Add(time.Duration(float64(softTimeout) * s.backstopMul)),
|
||||||
|
maxSteps: info.MaxIterations, // base ceiling; an Escalator may RaiseStepsBy
|
||||||
stopCh: make(chan struct{}),
|
stopCh: make(chan struct{}),
|
||||||
}
|
}
|
||||||
go h.watch(ctx, check)
|
go h.watch(ctx, check)
|
||||||
@@ -155,6 +159,7 @@ type handle struct {
|
|||||||
deadline time.Time
|
deadline time.Time
|
||||||
steer []llm.Message
|
steer []llm.Message
|
||||||
iterations int
|
iterations int
|
||||||
|
maxSteps int // current tool-dispatch ceiling (base MaxIterations, raised by RaiseStepsBy)
|
||||||
lastTool string
|
lastTool string
|
||||||
killed bool // sticky: once an Escalator kills, no later decision un-kills it
|
killed bool // sticky: once an Escalator kills, no later decision un-kills it
|
||||||
stopped bool
|
stopped bool
|
||||||
@@ -192,6 +197,12 @@ func (h *handle) Deadline() time.Time {
|
|||||||
return h.deadline
|
return h.deadline
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handle) MaxSteps() int {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
return h.maxSteps
|
||||||
|
}
|
||||||
|
|
||||||
func (h *handle) Stop() {
|
func (h *handle) Stop() {
|
||||||
h.mu.Lock()
|
h.mu.Lock()
|
||||||
if !h.stopped {
|
if !h.stopped {
|
||||||
@@ -263,4 +274,7 @@ func (h *handle) tick(ctx context.Context) {
|
|||||||
if d.ExtendBy > 0 {
|
if d.ExtendBy > 0 {
|
||||||
h.deadline = h.deadline.Add(d.ExtendBy)
|
h.deadline = h.deadline.Add(d.ExtendBy)
|
||||||
}
|
}
|
||||||
|
if d.RaiseStepsBy > 0 {
|
||||||
|
h.maxSteps += d.RaiseStepsBy
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -88,6 +88,22 @@ func (b *criticBinding) recordToolStart(name, args string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maxStepsOption returns the agent step-ceiling Option. With no critic it's a
|
||||||
|
// fixed WithMaxSteps(base); with a critic it's a DYNAMIC WithMaxStepsFunc that
|
||||||
|
// polls the handle each step (so the critic can raise a long run's budget),
|
||||||
|
// falling back to base when the handle defers (MaxSteps() <= 0).
|
||||||
|
func (b *criticBinding) maxStepsOption(base int) agent.Option {
|
||||||
|
if b == nil {
|
||||||
|
return agent.WithMaxSteps(base)
|
||||||
|
}
|
||||||
|
return agent.WithMaxStepsFunc(func() int {
|
||||||
|
if n := b.h.MaxSteps(); n > 0 {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
return base
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// steerOptions returns the agent RunOptions that drain the critic's steer
|
// steerOptions returns the agent RunOptions that drain the critic's steer
|
||||||
// messages into the loop. Empty when there is no critic.
|
// messages into the loop. Empty when there is no critic.
|
||||||
func (b *criticBinding) steerOptions() []agent.RunOption {
|
func (b *criticBinding) steerOptions() []agent.RunOption {
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ type fakeCriticHandle struct {
|
|||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
steps, tools, stops int
|
steps, tools, stops int
|
||||||
steered int
|
steered int
|
||||||
|
maxSteps int // 0 => defer to the run's base MaxIterations
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
|
func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
|
||||||
@@ -33,8 +34,41 @@ func (h *fakeCriticHandle) RecordToolStart(string, string) {
|
|||||||
}
|
}
|
||||||
func (h *fakeCriticHandle) Steer() []llm.Message { h.mu.Lock(); h.steered++; h.mu.Unlock(); return nil }
|
func (h *fakeCriticHandle) Steer() []llm.Message { h.mu.Lock(); h.steered++; h.mu.Unlock(); return nil }
|
||||||
func (h *fakeCriticHandle) Deadline() time.Time { return time.Time{} } // no hard deadline
|
func (h *fakeCriticHandle) Deadline() time.Time { return time.Time{} } // no hard deadline
|
||||||
|
func (h *fakeCriticHandle) MaxSteps() int { h.mu.Lock(); defer h.mu.Unlock(); return h.maxSteps }
|
||||||
func (h *fakeCriticHandle) Stop() { h.mu.Lock(); h.stops++; h.mu.Unlock() }
|
func (h *fakeCriticHandle) Stop() { h.mu.Lock(); h.stops++; h.mu.Unlock() }
|
||||||
|
|
||||||
|
// TestCriticRaisesStepCeiling: a critic returning a higher MaxSteps lets the agent
|
||||||
|
// run PAST its base MaxIterations (the dynamic step ceiling). With base=1 and no
|
||||||
|
// critic the run would hit ErrMaxSteps after the first tool-dispatch step; the
|
||||||
|
// critic raises it to 5 so the run completes.
|
||||||
|
func TestCriticRaisesStepCeiling(t *testing.T) {
|
||||||
|
h := &fakeCriticHandle{maxSteps: 5}
|
||||||
|
fp := fake.New("fake")
|
||||||
|
fp.Enqueue("m",
|
||||||
|
// two tool-call steps (unknown tool → tolerated error results), then answer
|
||||||
|
fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c1", Name: "noop", Arguments: []byte(`{}`)}}}),
|
||||||
|
fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c2", Name: "noop", Arguments: []byte(`{}`)}}}),
|
||||||
|
fake.Reply("done after 2 tool steps"),
|
||||||
|
)
|
||||||
|
m, _ := fp.Model("m")
|
||||||
|
ex := run.New(run.Config{
|
||||||
|
Registry: tool.NewRegistry(),
|
||||||
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||||
|
Ports: run.Ports{Critic: &fakeCritic{h: h}},
|
||||||
|
// large soft timeout so the deadline-watch never interferes in the test
|
||||||
|
Defaults: run.Defaults{CriticSoftTimeout: time.Hour},
|
||||||
|
})
|
||||||
|
res := ex.Run(context.Background(),
|
||||||
|
run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 1, Critic: run.CriticConfig{Enabled: true}},
|
||||||
|
tool.Invocation{RunID: "r"}, "go")
|
||||||
|
if res.Err != nil {
|
||||||
|
t.Fatalf("critic raised the ceiling to 5, run should complete past base=1: %v", res.Err)
|
||||||
|
}
|
||||||
|
if res.Output != "done after 2 tool steps" {
|
||||||
|
t.Errorf("output = %q", res.Output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestCriticWired: an agent with Critic.Enabled gets monitored — Monitor returns
|
// TestCriticWired: an agent with Critic.Enabled gets monitored — Monitor returns
|
||||||
// a handle the executor feeds (RecordStep), drains (Steer), and stops.
|
// a handle the executor feeds (RecordStep), drains (Steer), and stops.
|
||||||
func TestCriticWired(t *testing.T) {
|
func TestCriticWired(t *testing.T) {
|
||||||
|
|||||||
+5
-1
@@ -164,6 +164,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
|||||||
ParentRunID: inv.ParentRunID,
|
ParentRunID: inv.ParentRunID,
|
||||||
Inputs: inv.SkillInputs,
|
Inputs: inv.SkillInputs,
|
||||||
StartedAt: started,
|
StartedAt: started,
|
||||||
|
MaxIterations: maxIter,
|
||||||
}
|
}
|
||||||
var rec RunRecorder
|
var rec RunRecorder
|
||||||
var stateAcc *RunStateAccessor
|
var stateAcc *RunStateAccessor
|
||||||
@@ -243,7 +244,10 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
|||||||
|
|
||||||
opts := []agent.Option{
|
opts := []agent.Option{
|
||||||
agent.WithToolbox(toolbox),
|
agent.WithToolbox(toolbox),
|
||||||
agent.WithMaxSteps(maxIter),
|
// Step ceiling: a fixed WithMaxSteps(maxIter) normally, but when a critic is
|
||||||
|
// active it owns a DYNAMIC ceiling (WithMaxStepsFunc) so it can raise a
|
||||||
|
// healthy-but-long run's budget mid-flight. Falls back to maxIter.
|
||||||
|
critic.maxStepsOption(maxIter),
|
||||||
agent.WithToolErrorLimits(e.cfg.Defaults.MaxConsecutiveToolErrors, e.cfg.Defaults.MaxSameToolCallRepeats),
|
agent.WithToolErrorLimits(e.cfg.Defaults.MaxConsecutiveToolErrors, e.cfg.Defaults.MaxSameToolCallRepeats),
|
||||||
agent.WithStepObserver(stepObserver),
|
agent.WithStepObserver(stepObserver),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -48,6 +48,9 @@ type RunInfo struct {
|
|||||||
ParentRunID string
|
ParentRunID string
|
||||||
Inputs map[string]any
|
Inputs map[string]any
|
||||||
StartedAt time.Time
|
StartedAt time.Time
|
||||||
|
// MaxIterations is the run's base tool-dispatch step ceiling, so a critic can
|
||||||
|
// raise it relative to the baseline (see CriticHandle.MaxSteps).
|
||||||
|
MaxIterations int
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunStats is the terminal roll-up a recorder's Close writes. Mirrors mort's
|
// RunStats is the terminal roll-up a recorder's Close writes. Mirrors mort's
|
||||||
@@ -129,6 +132,11 @@ type CriticHandle interface {
|
|||||||
// Deadline returns the current hard-kill deadline (the critic may extend
|
// Deadline returns the current hard-kill deadline (the critic may extend
|
||||||
// it); the executor binds the run context to it. Zero = no hard deadline.
|
// it); the executor binds the run context to it. Zero = no hard deadline.
|
||||||
Deadline() time.Time
|
Deadline() time.Time
|
||||||
|
// MaxSteps returns the current tool-dispatch step ceiling, polled by the
|
||||||
|
// executor each step (via majordomo WithMaxStepsFunc) so a critic can raise a
|
||||||
|
// healthy-but-long run's iteration budget mid-flight. Return <= 0 to defer to
|
||||||
|
// the run's base MaxIterations.
|
||||||
|
MaxSteps() int
|
||||||
// Stop ends monitoring when the run finishes.
|
// Stop ends monitoring when the run finishes.
|
||||||
Stop()
|
Stop()
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user