C0b: wire Critic + Delivery into run.Executor

Continues finishing the executor's run.Ports wiring (after C0's Palette). Critic (run/critic.go): when Ports.Critic is set and the agent enables it, the executor calls Monitor at run start, feeds RecordStep/RecordToolStart from the step observer, drains the critic's Steer messages into the loop via agent.WithSteer, and binds the run's hard cancellation to the critic's (extendable) Deadline through a watch goroutine — a healthy-but-slow run gets room while a hung one is killed. Stop() on run end. Soft timeout from Defaults.CriticSoftTimeout (default 90s). nil-safe: no critic / not-enabled = no-op. Delivery (run/executor.go deliver): after the run, when Ports.Delivery is set and inv.DeliveryID is non-empty, the executor posts Result.Output (or DeliverError on failure) to a host-interpreted deliver.Target {inv.DeliveryKind, inv.DeliveryID}. Empty target = caller reads Result.Output itself (the synchronous default; the `.agent run` canary). Best-effort + detached. tool.Invocation gains DeliveryKind/DeliveryID (host-set egress target). Tests: critic monitored/fed/steered/stopped when enabled, untouched when not; delivery posts on a target, skips without one. Deferred: Checkpointer (needs a majordomo hook to snapshot the running message history). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 10:00:05 -04:00
parent 0c80679719
commit 43b2471737
6 changed files with 317 additions and 14 deletions
@@ -0,0 +1,88 @@
+package run_test
+
+import (
+	"context"
+	"sync"
+	"testing"
+	"time"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
+
+	"gitea.stevedudenhoeffer.com/steve/executus/run"
+	"gitea.stevedudenhoeffer.com/steve/executus/tool"
+)
+
+type fakeCritic struct{ h *fakeCriticHandle }
+
+func (c *fakeCritic) Monitor(_ context.Context, _ run.RunInfo, _ time.Duration) run.CriticHandle {
+	return c.h
+}
+
+type fakeCriticHandle struct {
+	mu                  sync.Mutex
+	steps, tools, stops int
+	steered             int
+}
+
+func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
+func (h *fakeCriticHandle) RecordToolStart(string, string) {
+	h.mu.Lock()
+	h.tools++
+	h.mu.Unlock()
+}
+func (h *fakeCriticHandle) Steer() []llm.Message { h.mu.Lock(); h.steered++; h.mu.Unlock(); return nil }
+func (h *fakeCriticHandle) Deadline() time.Time  { return time.Time{} } // no hard deadline
+func (h *fakeCriticHandle) Stop()                { h.mu.Lock(); h.stops++; h.mu.Unlock() }
+
+// TestCriticWired: an agent with Critic.Enabled gets monitored — Monitor returns
+// a handle the executor feeds (RecordStep), drains (Steer), and stops.
+func TestCriticWired(t *testing.T) {
+	h := &fakeCriticHandle{}
+	fp := fake.New("fake")
+	fp.Enqueue("m", fake.Reply("done"))
+	m, _ := fp.Model("m")
+	ex := run.New(run.Config{
+		Registry: tool.NewRegistry(),
+		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
+		Ports:    run.Ports{Critic: &fakeCritic{h: h}},
+	})
+	res := ex.Run(context.Background(),
+		run.RunnableAgent{Name: "watched", ModelTier: "m", Critic: run.CriticConfig{Enabled: true}},
+		tool.Invocation{RunID: "r"}, "go")
+	if res.Err != nil {
+		t.Fatalf("run error: %v", res.Err)
+	}
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	if h.steps < 1 {
+		t.Errorf("critic should have seen >=1 step, got %d", h.steps)
+	}
+	if h.steered < 1 {
+		t.Errorf("critic Steer should be drained at least once, got %d", h.steered)
+	}
+	if h.stops != 1 {
+		t.Errorf("critic Stop should be called exactly once, got %d", h.stops)
+	}
+}
+
+// TestCriticDisabledNotMonitored: Critic.Enabled=false → Monitor never called.
+func TestCriticDisabledNotMonitored(t *testing.T) {
+	h := &fakeCriticHandle{}
+	fp := fake.New("fake")
+	fp.Enqueue("m", fake.Reply("done"))
+	m, _ := fp.Model("m")
+	ex := run.New(run.Config{
+		Registry: tool.NewRegistry(),
+		Models:   func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
+		Ports:    run.Ports{Critic: &fakeCritic{h: h}},
+	})
+	ex.Run(context.Background(),
+		run.RunnableAgent{Name: "x", ModelTier: "m"}, // Critic.Enabled=false
+		tool.Invocation{RunID: "r"}, "go")
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	if h.stops != 0 || h.steps != 0 {
+		t.Errorf("disabled critic should not be monitored: steps=%d stops=%d", h.steps, h.stops)
+	}
+}