run: critic can raise a run's step ceiling mid-flight (CriticHandle.MaxSteps)

Prerequisite for a full-fidelity mort agentcritic adapter (which adjusts a healthy-but-long run's iteration budget, not just its deadline). executus's CriticHandle was deadline+steer only; this adds the dynamic step ceiling above an unchanged majordomo (which already exposes WithMaxStepsFunc). - run.RunInfo += MaxIterations (the run's base ceiling, so a critic can raise it relative to the baseline). - run.CriticHandle += MaxSteps() int — polled by the executor each step via agent.WithMaxStepsFunc; <=0 defers to the base. The executor uses WithMaxStepsFunc(critic.MaxSteps) when a critic is active, else WithMaxSteps. - critic battery: handle.maxSteps (initialised from RunInfo.MaxIterations) + MaxSteps(); Decision gains RaiseStepsBy so an Escalator can raise the ceiling alongside ExtendBy. ExtendOnce default is unchanged (time-only). Test: a critic returning MaxSteps=5 lets a base-MaxIterations=1 run complete two tool-dispatch steps past the base ceiling. Core stays battery-free (run doesn't import critic). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 14:16:03 -04:00
parent a103cc5e9f
commit 4ba83ab905
5 changed files with 91 additions and 15 deletions
@@ -23,6 +23,7 @@ type fakeCriticHandle struct {
 	mu                  sync.Mutex
 	steps, tools, stops int
 	steered             int
+	maxSteps            int // 0 => defer to the run's base MaxIterations
 }

 func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
@@ -33,8 +34,41 @@ func (h *fakeCriticHandle) RecordToolStart(string, string) {
 }
 func (h *fakeCriticHandle) Steer() []llm.Message { h.mu.Lock(); h.steered++; h.mu.Unlock(); return nil }
 func (h *fakeCriticHandle) Deadline() time.Time  { return time.Time{} } // no hard deadline
+func (h *fakeCriticHandle) MaxSteps() int        { h.mu.Lock(); defer h.mu.Unlock(); return h.maxSteps }
 func (h *fakeCriticHandle) Stop()                { h.mu.Lock(); h.stops++; h.mu.Unlock() }

+// TestCriticRaisesStepCeiling: a critic returning a higher MaxSteps lets the agent
+// run PAST its base MaxIterations (the dynamic step ceiling). With base=1 and no
+// critic the run would hit ErrMaxSteps after the first tool-dispatch step; the
+// critic raises it to 5 so the run completes.
+func TestCriticRaisesStepCeiling(t *testing.T) {
+	h := &fakeCriticHandle{maxSteps: 5}
+	fp := fake.New("fake")
+	fp.Enqueue("m",
+		// two tool-call steps (unknown tool → tolerated error results), then answer
+		fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c1", Name: "noop", Arguments: []byte(`{}`)}}}),
+		fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c2", Name: "noop", Arguments: []byte(`{}`)}}}),
+		fake.Reply("done after 2 tool steps"),
+	)
+	m, _ := fp.Model("m")
+	ex := run.New(run.Config{
+		Registry: tool.NewRegistry(),
+		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
+		Ports:    run.Ports{Critic: &fakeCritic{h: h}},
+		// large soft timeout so the deadline-watch never interferes in the test
+		Defaults: run.Defaults{CriticSoftTimeout: time.Hour},
+	})
+	res := ex.Run(context.Background(),
+		run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 1, Critic: run.CriticConfig{Enabled: true}},
+		tool.Invocation{RunID: "r"}, "go")
+	if res.Err != nil {
+		t.Fatalf("critic raised the ceiling to 5, run should complete past base=1: %v", res.Err)
+	}
+	if res.Output != "done after 2 tool steps" {
+		t.Errorf("output = %q", res.Output)
+	}
+}
+
 // TestCriticWired: an agent with Critic.Enabled gets monitored — Monitor returns
 // a handle the executor feeds (RecordStep), drains (Steer), and stops.
 func TestCriticWired(t *testing.T) {