Files
executus/run/critic_test.go
T
steve 43b2471737
executus CI / test (pull_request) Failing after 1m0s
Adversarial Review (Gadfly) / review (pull_request) Successful in 5m9s
C0b: wire Critic + Delivery into run.Executor
Continues finishing the executor's run.Ports wiring (after C0's Palette).

Critic (run/critic.go): when Ports.Critic is set and the agent enables it, the
executor calls Monitor at run start, feeds RecordStep/RecordToolStart from the
step observer, drains the critic's Steer messages into the loop via
agent.WithSteer, and binds the run's hard cancellation to the critic's
(extendable) Deadline through a watch goroutine — a healthy-but-slow run gets
room while a hung one is killed. Stop() on run end. Soft timeout from
Defaults.CriticSoftTimeout (default 90s). nil-safe: no critic / not-enabled =
no-op.

Delivery (run/executor.go deliver): after the run, when Ports.Delivery is set
and inv.DeliveryID is non-empty, the executor posts Result.Output (or
DeliverError on failure) to a host-interpreted deliver.Target
{inv.DeliveryKind, inv.DeliveryID}. Empty target = caller reads Result.Output
itself (the synchronous default; the `.agent run` canary). Best-effort +
detached.

tool.Invocation gains DeliveryKind/DeliveryID (host-set egress target).

Tests: critic monitored/fed/steered/stopped when enabled, untouched when not;
delivery posts on a target, skips without one. Deferred: Checkpointer (needs a
majordomo hook to snapshot the running message history).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 10:00:05 -04:00

89 lines
2.8 KiB
Go

package run_test
import (
"context"
"sync"
"testing"
"time"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
"gitea.stevedudenhoeffer.com/steve/executus/run"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
type fakeCritic struct{ h *fakeCriticHandle }
func (c *fakeCritic) Monitor(_ context.Context, _ run.RunInfo, _ time.Duration) run.CriticHandle {
return c.h
}
type fakeCriticHandle struct {
mu sync.Mutex
steps, tools, stops int
steered int
}
func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
func (h *fakeCriticHandle) RecordToolStart(string, string) {
h.mu.Lock()
h.tools++
h.mu.Unlock()
}
func (h *fakeCriticHandle) Steer() []llm.Message { h.mu.Lock(); h.steered++; h.mu.Unlock(); return nil }
func (h *fakeCriticHandle) Deadline() time.Time { return time.Time{} } // no hard deadline
func (h *fakeCriticHandle) Stop() { h.mu.Lock(); h.stops++; h.mu.Unlock() }
// TestCriticWired: an agent with Critic.Enabled gets monitored — Monitor returns
// a handle the executor feeds (RecordStep), drains (Steer), and stops.
func TestCriticWired(t *testing.T) {
h := &fakeCriticHandle{}
fp := fake.New("fake")
fp.Enqueue("m", fake.Reply("done"))
m, _ := fp.Model("m")
ex := run.New(run.Config{
Registry: tool.NewRegistry(),
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
Ports: run.Ports{Critic: &fakeCritic{h: h}},
})
res := ex.Run(context.Background(),
run.RunnableAgent{Name: "watched", ModelTier: "m", Critic: run.CriticConfig{Enabled: true}},
tool.Invocation{RunID: "r"}, "go")
if res.Err != nil {
t.Fatalf("run error: %v", res.Err)
}
h.mu.Lock()
defer h.mu.Unlock()
if h.steps < 1 {
t.Errorf("critic should have seen >=1 step, got %d", h.steps)
}
if h.steered < 1 {
t.Errorf("critic Steer should be drained at least once, got %d", h.steered)
}
if h.stops != 1 {
t.Errorf("critic Stop should be called exactly once, got %d", h.stops)
}
}
// TestCriticDisabledNotMonitored: Critic.Enabled=false → Monitor never called.
func TestCriticDisabledNotMonitored(t *testing.T) {
h := &fakeCriticHandle{}
fp := fake.New("fake")
fp.Enqueue("m", fake.Reply("done"))
m, _ := fp.Model("m")
ex := run.New(run.Config{
Registry: tool.NewRegistry(),
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
Ports: run.Ports{Critic: &fakeCritic{h: h}},
})
ex.Run(context.Background(),
run.RunnableAgent{Name: "x", ModelTier: "m"}, // Critic.Enabled=false
tool.Invocation{RunID: "r"}, "go")
h.mu.Lock()
defer h.mu.Unlock()
if h.stops != 0 || h.steps != 0 {
t.Errorf("disabled critic should not be monitored: steps=%d stops=%d", h.steps, h.stops)
}
}