chore: go mod tidy (add missing go.sum entry; CI tidiness gate)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
ci(gadfly): drop the m1 reviewer (dead weight; keep m5)
2026-06-27 14:53:58 -04:00 · 2026-06-27 14:41:14 -04:00 · 2026-06-27 14:38:48 -04:00 · 2026-06-27 14:16:03 -04:00
7 changed files with 102 additions and 19 deletions
@@ -51,13 +51,12 @@ jobs:
          GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
          OLLAMA_CLOUD_API_KEY: ${{ secrets.OLLAMA_CLOUD_API_KEY }}
          # Local Macs, reached through their foreman queues (native Ollama on the
-          # wire). GADFLY_ENDPOINT_M1 registers provider "m1", _M5 registers "m5",
+          # wire). GADFLY_ENDPOINT_M5 registers provider "m5",
          # each a foreman-preset Ollama client at the secret's URL, of the form:
          #   foreman|https://<foreman-host>|<token>
          # Needs an image with foreman provider-type support (this one). If a Mac
          # is offline that model's comment shows an error and the others still post.
          # (Gitea secrets aren't auto-exposed — map each explicitly.)
-          GADFLY_ENDPOINT_M1: ${{ secrets.GADFLY_ENDPOINT_M1 }}
          GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }}
          # Full fleet: 3 cloud + M1 Pro + M5 Max. The Macs are back so the
          # gadfly-reports scoreboard can quantify whether they earn their keep
@@ -66,8 +65,8 @@ jobs:
          # (ollama-cloud=1) with its 3 lenses concurrent (LENS ollama-cloud=3) so
          # its comment lands sooner; each Mac runs one model, lenses serial (its
          # foreman queue serializes anyway). All three provider lanes run parallel.
-          GADFLY_MODELS: "minimax-m3:cloud,glm-5.2:cloud,glm-5.1:cloud,kimi-k2.7-code:cloud,deepseek-v4-pro:cloud,nemotron-3-super:cloud,gpt-oss:120b-cloud,qwen3-coder:480b-cloud,gemma4:cloud,m1/qwen3:14b,m5/qwen3.6:35b-mlx"
-          GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m1=1,m5=1"
+          GADFLY_MODELS: "minimax-m3:cloud,glm-5.2:cloud,glm-5.1:cloud,kimi-k2.7-code:cloud,deepseek-v4-pro:cloud,nemotron-3-super:cloud,gpt-oss:120b-cloud,qwen3-coder:480b-cloud,gemma4:cloud,m5/qwen3.6:35b-mlx"
+          GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m5=1"
          GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3"
          # Default => the 3-lens suite (security, correctness, error-handling).
          # Set the repo var GADFLY_SPECIALISTS to override (csv / "all" / "auto").
@@ -10,13 +10,16 @@
 // Mort plugs its LLM critic-agent in as an Escalator; ExtendOnce is the
 // zero-dependency default.
 //
-// NOTE: the executor's call into run.Ports.Critic is a P2 follow-up; this
-// battery provides the seam + impl ahead of that wiring.
+// The executor wires run.Ports.Critic (C0b): it feeds the handle activity,
+// binds the run context to its extendable Deadline, drains its Steer, and polls
+// MaxSteps each step so an Escalator can also raise a long run's step ceiling
+// (Decision.RaiseStepsBy).
 package critic

 import (
 	"context"
 	"log/slog"
+	"math"
 	"sync"
 	"time"

@@ -36,10 +39,11 @@ type Progress struct {
 // Decision is the Escalator's verdict for a stalled run. Zero value = do
 // nothing (let the hard backstop eventually kill a truly hung run).
 type Decision struct {
-	Nudge      []llm.Message // injected before the agent's next turn (a steer)
-	ExtendBy   time.Duration // push the hard deadline out by this much
-	Kill       bool          // cancel the run now
-	KillReason string
+	Nudge        []llm.Message // injected before the agent's next turn (a steer)
+	ExtendBy     time.Duration // push the hard deadline out by this much
+	RaiseStepsBy int           // raise the run's tool-dispatch step ceiling by this
+	Kill         bool          // cancel the run now
+	KillReason   string
 }

 // Escalator decides what to do when a run crosses its soft timeout. It is
@@ -136,6 +140,7 @@ func (s *System) Monitor(ctx context.Context, info run.RunInfo, softTimeout time
 		now:          s.now,
 		lastActivity: now,
 		deadline:     now.Add(time.Duration(float64(softTimeout) * s.backstopMul)),
+		maxSteps:     info.MaxIterations, // base ceiling; an Escalator may RaiseStepsBy
 		stopCh:       make(chan struct{}),
 	}
 	go h.watch(ctx, check)
@@ -155,6 +160,7 @@ type handle struct {
 	deadline     time.Time
 	steer        []llm.Message
 	iterations   int
+	maxSteps     int // current tool-dispatch ceiling (base MaxIterations, raised by RaiseStepsBy)
 	lastTool     string
 	killed       bool // sticky: once an Escalator kills, no later decision un-kills it
 	stopped      bool
@@ -192,6 +198,12 @@ func (h *handle) Deadline() time.Time {
 	return h.deadline
 }

+func (h *handle) MaxSteps() int {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	return h.maxSteps
+}
+
 func (h *handle) Stop() {
 	h.mu.Lock()
 	if !h.stopped {
@@ -263,4 +275,13 @@ func (h *handle) tick(ctx context.Context) {
 	if d.ExtendBy > 0 {
 		h.deadline = h.deadline.Add(d.ExtendBy)
 	}
+	if d.RaiseStepsBy > 0 {
+		// Overflow-safe: a buggy Escalator returning a huge delta must not wrap
+		// maxSteps negative (which the executor would read as "defer to base").
+		if d.RaiseStepsBy > math.MaxInt-h.maxSteps {
+			h.maxSteps = math.MaxInt
+		} else {
+			h.maxSteps += d.RaiseStepsBy
+		}
+	}
 }
@@ -125,6 +125,7 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
 google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
@@ -88,6 +88,22 @@ func (b *criticBinding) recordToolStart(name, args string) {
 	}
 }

+// maxStepsOption returns the agent step-ceiling Option. With no critic it's a
+// fixed WithMaxSteps(base); with a critic it's a DYNAMIC WithMaxStepsFunc that
+// polls the handle each step (so the critic can raise a long run's budget),
+// falling back to base when the handle defers (MaxSteps() <= 0).
+func (b *criticBinding) maxStepsOption(base int) agent.Option {
+	if b == nil {
+		return agent.WithMaxSteps(base)
+	}
+	return agent.WithMaxStepsFunc(func() int {
+		if n := b.h.MaxSteps(); n > 0 {
+			return n
+		}
+		return base
+	})
+}
+
 // steerOptions returns the agent RunOptions that drain the critic's steer
 // messages into the loop. Empty when there is no critic.
 func (b *criticBinding) steerOptions() []agent.RunOption {
@@ -23,6 +23,7 @@ type fakeCriticHandle struct {
 	mu                  sync.Mutex
 	steps, tools, stops int
 	steered             int
+	maxSteps            int // 0 => defer to the run's base MaxIterations
 }

 func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
@@ -33,8 +34,41 @@ func (h *fakeCriticHandle) RecordToolStart(string, string) {
 }
 func (h *fakeCriticHandle) Steer() []llm.Message { h.mu.Lock(); h.steered++; h.mu.Unlock(); return nil }
 func (h *fakeCriticHandle) Deadline() time.Time  { return time.Time{} } // no hard deadline
+func (h *fakeCriticHandle) MaxSteps() int        { h.mu.Lock(); defer h.mu.Unlock(); return h.maxSteps }
 func (h *fakeCriticHandle) Stop()                { h.mu.Lock(); h.stops++; h.mu.Unlock() }

+// TestCriticRaisesStepCeiling: a critic returning a higher MaxSteps lets the agent
+// run PAST its base MaxIterations (the dynamic step ceiling). With base=1 and no
+// critic the run would hit ErrMaxSteps after the first tool-dispatch step; the
+// critic raises it to 5 so the run completes.
+func TestCriticRaisesStepCeiling(t *testing.T) {
+	h := &fakeCriticHandle{maxSteps: 5}
+	fp := fake.New("fake")
+	fp.Enqueue("m",
+		// two tool-call steps (unknown tool → tolerated error results), then answer
+		fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c1", Name: "noop", Arguments: []byte(`{}`)}}}),
+		fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c2", Name: "noop", Arguments: []byte(`{}`)}}}),
+		fake.Reply("done after 2 tool steps"),
+	)
+	m, _ := fp.Model("m")
+	ex := run.New(run.Config{
+		Registry: tool.NewRegistry(),
+		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
+		Ports:    run.Ports{Critic: &fakeCritic{h: h}},
+		// large soft timeout so the deadline-watch never interferes in the test
+		Defaults: run.Defaults{CriticSoftTimeout: time.Hour},
+	})
+	res := ex.Run(context.Background(),
+		run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 1, Critic: run.CriticConfig{Enabled: true}},
+		tool.Invocation{RunID: "r"}, "go")
+	if res.Err != nil {
+		t.Fatalf("critic raised the ceiling to 5, run should complete past base=1: %v", res.Err)
+	}
+	if res.Output != "done after 2 tool steps" {
+		t.Errorf("output = %q", res.Output)
+	}
+}
+
 // TestCriticWired: an agent with Critic.Enabled gets monitored — Monitor returns
 // a handle the executor feeds (RecordStep), drains (Steer), and stops.
 func TestCriticWired(t *testing.T) {
@@ -156,14 +156,15 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	// Audit start (optional). The recorder satisfies RunTally; stamp it on the
 	// invocation so a self-status tool can read live progress.
 	info := RunInfo{
-		RunID:       inv.RunID,
-		SubjectID:   ra.ID,
-		Name:        ra.Name,
-		CallerID:    inv.CallerID,
-		ChannelID:   inv.ChannelID,
-		ParentRunID: inv.ParentRunID,
-		Inputs:      inv.SkillInputs,
-		StartedAt:   started,
+		RunID:         inv.RunID,
+		SubjectID:     ra.ID,
+		Name:          ra.Name,
+		CallerID:      inv.CallerID,
+		ChannelID:     inv.ChannelID,
+		ParentRunID:   inv.ParentRunID,
+		Inputs:        inv.SkillInputs,
+		StartedAt:     started,
+		MaxIterations: maxIter,
 	}
 	var rec RunRecorder
 	var stateAcc *RunStateAccessor
@@ -243,7 +244,10 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio

 	opts := []agent.Option{
 		agent.WithToolbox(toolbox),
-		agent.WithMaxSteps(maxIter),
+		// Step ceiling: a fixed WithMaxSteps(maxIter) normally, but when a critic is
+		// active it owns a DYNAMIC ceiling (WithMaxStepsFunc) so it can raise a
+		// healthy-but-long run's budget mid-flight. Falls back to maxIter.
+		critic.maxStepsOption(maxIter),
 		agent.WithToolErrorLimits(e.cfg.Defaults.MaxConsecutiveToolErrors, e.cfg.Defaults.MaxSameToolCallRepeats),
 		agent.WithStepObserver(stepObserver),
 	}
@@ -48,6 +48,9 @@ type RunInfo struct {
 	ParentRunID string
 	Inputs      map[string]any
 	StartedAt   time.Time
+	// MaxIterations is the run's base tool-dispatch step ceiling, so a critic can
+	// raise it relative to the baseline (see CriticHandle.MaxSteps).
+	MaxIterations int
 }

 // RunStats is the terminal roll-up a recorder's Close writes. Mirrors mort's
@@ -129,6 +132,11 @@ type CriticHandle interface {
 	// Deadline returns the current hard-kill deadline (the critic may extend
 	// it); the executor binds the run context to it. Zero = no hard deadline.
 	Deadline() time.Time
+	// MaxSteps returns the current tool-dispatch step ceiling, polled by the
+	// executor each step (via majordomo WithMaxStepsFunc) so a critic can raise a
+	// healthy-but-long run's iteration budget mid-flight. Return <= 0 to defer to
+	// the run's base MaxIterations.
+	MaxSteps() int
 	// Stop ends monitoring when the run finishes.
 	Stop()
 }
Author	SHA1	Message	Date
steve	1a1d5e417b	chore: go mod tidy (add missing go.sum entry; CI tidiness gate) executus CI / test (pull_request) Successful in 2m8s Details executus CI / test (push) Successful in 1m45s Details Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-27 14:53:58 -04:00
steve	f3bd43b726	ci(gadfly): drop the m1 reviewer (dead weight; keep m5) executus CI / test (pull_request) Failing after 1m1s Details m1/qwen3:14b proved consistently low-value + slowest in the pool over multiple PRs. Removed from GADFLY_MODELS + GADFLY_PROVIDER_CONCURRENCY + its endpoint so it never fires again. m5 retained. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-27 14:41:14 -04:00
steve	306d575c31	critic: overflow-guard maxSteps += RaiseStepsBy (gadfly 5-model convergence) executus CI / test (pull_request) Has been cancelled Details A buggy/hostile Escalator returning a huge RaiseStepsBy could wrap handle.maxSteps negative (which the executor reads as defer-to-base). Clamp at math.MaxInt. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-27 14:38:48 -04:00
steve	4ba83ab905	run: critic can raise a run's step ceiling mid-flight (CriticHandle.MaxSteps) executus CI / test (pull_request) Failing after 1m1s Details Adversarial Review (Gadfly) / review (pull_request) Successful in 21m8s Details Prerequisite for a full-fidelity mort agentcritic adapter (which adjusts a healthy-but-long run's iteration budget, not just its deadline). executus's CriticHandle was deadline+steer only; this adds the dynamic step ceiling above an unchanged majordomo (which already exposes WithMaxStepsFunc). - run.RunInfo += MaxIterations (the run's base ceiling, so a critic can raise it relative to the baseline). - run.CriticHandle += MaxSteps() int — polled by the executor each step via agent.WithMaxStepsFunc; <=0 defers to the base. The executor uses WithMaxStepsFunc(critic.MaxSteps) when a critic is active, else WithMaxSteps. - critic battery: handle.maxSteps (initialised from RunInfo.MaxIterations) + MaxSteps(); Decision gains RaiseStepsBy so an Escalator can raise the ceiling alongside ExtendBy. ExtendOnce default is unchanged (time-only). Test: a critic returning MaxSteps=5 lets a base-MaxIterations=1 run complete two tool-dispatch steps past the base ceiling. Core stays battery-free (run doesn't import critic). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-27 14:16:03 -04:00