C0b: wire Critic + Delivery into run.Executor
Continues finishing the executor's run.Ports wiring (after C0's Palette).
Critic (run/critic.go): when Ports.Critic is set and the agent enables it, the
executor calls Monitor at run start, feeds RecordStep/RecordToolStart from the
step observer, drains the critic's Steer messages into the loop via
agent.WithSteer, and binds the run's hard cancellation to the critic's
(extendable) Deadline through a watch goroutine — a healthy-but-slow run gets
room while a hung one is killed. Stop() on run end. Soft timeout from
Defaults.CriticSoftTimeout (default 90s). nil-safe: no critic / not-enabled =
no-op.
Delivery (run/executor.go deliver): after the run, when Ports.Delivery is set
and inv.DeliveryID is non-empty, the executor posts Result.Output (or
DeliverError on failure) to a host-interpreted deliver.Target
{inv.DeliveryKind, inv.DeliveryID}. Empty target = caller reads Result.Output
itself (the synchronous default; the `.agent run` canary). Best-effort +
detached.
tool.Invocation gains DeliveryKind/DeliveryID (host-set egress target).
Tests: critic monitored/fed/steered/stopped when enabled, untouched when not;
delivery posts on a target, skips without one. Deferred: Checkpointer (needs a
majordomo hook to snapshot the running message history).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
package run_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/executus/run"
|
||||
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||||
)
|
||||
|
||||
type fakeCritic struct{ h *fakeCriticHandle }
|
||||
|
||||
func (c *fakeCritic) Monitor(_ context.Context, _ run.RunInfo, _ time.Duration) run.CriticHandle {
|
||||
return c.h
|
||||
}
|
||||
|
||||
type fakeCriticHandle struct {
|
||||
mu sync.Mutex
|
||||
steps, tools, stops int
|
||||
steered int
|
||||
}
|
||||
|
||||
func (h *fakeCriticHandle) RecordStep(int) { h.mu.Lock(); h.steps++; h.mu.Unlock() }
|
||||
func (h *fakeCriticHandle) RecordToolStart(string, string) {
|
||||
h.mu.Lock()
|
||||
h.tools++
|
||||
h.mu.Unlock()
|
||||
}
|
||||
func (h *fakeCriticHandle) Steer() []llm.Message { h.mu.Lock(); h.steered++; h.mu.Unlock(); return nil }
|
||||
func (h *fakeCriticHandle) Deadline() time.Time { return time.Time{} } // no hard deadline
|
||||
func (h *fakeCriticHandle) Stop() { h.mu.Lock(); h.stops++; h.mu.Unlock() }
|
||||
|
||||
// TestCriticWired: an agent with Critic.Enabled gets monitored — Monitor returns
|
||||
// a handle the executor feeds (RecordStep), drains (Steer), and stops.
|
||||
func TestCriticWired(t *testing.T) {
|
||||
h := &fakeCriticHandle{}
|
||||
fp := fake.New("fake")
|
||||
fp.Enqueue("m", fake.Reply("done"))
|
||||
m, _ := fp.Model("m")
|
||||
ex := run.New(run.Config{
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||
Ports: run.Ports{Critic: &fakeCritic{h: h}},
|
||||
})
|
||||
res := ex.Run(context.Background(),
|
||||
run.RunnableAgent{Name: "watched", ModelTier: "m", Critic: run.CriticConfig{Enabled: true}},
|
||||
tool.Invocation{RunID: "r"}, "go")
|
||||
if res.Err != nil {
|
||||
t.Fatalf("run error: %v", res.Err)
|
||||
}
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
if h.steps < 1 {
|
||||
t.Errorf("critic should have seen >=1 step, got %d", h.steps)
|
||||
}
|
||||
if h.steered < 1 {
|
||||
t.Errorf("critic Steer should be drained at least once, got %d", h.steered)
|
||||
}
|
||||
if h.stops != 1 {
|
||||
t.Errorf("critic Stop should be called exactly once, got %d", h.stops)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCriticDisabledNotMonitored: Critic.Enabled=false → Monitor never called.
|
||||
func TestCriticDisabledNotMonitored(t *testing.T) {
|
||||
h := &fakeCriticHandle{}
|
||||
fp := fake.New("fake")
|
||||
fp.Enqueue("m", fake.Reply("done"))
|
||||
m, _ := fp.Model("m")
|
||||
ex := run.New(run.Config{
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil },
|
||||
Ports: run.Ports{Critic: &fakeCritic{h: h}},
|
||||
})
|
||||
ex.Run(context.Background(),
|
||||
run.RunnableAgent{Name: "x", ModelTier: "m"}, // Critic.Enabled=false
|
||||
tool.Invocation{RunID: "r"}, "go")
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
if h.stops != 0 || h.steps != 0 {
|
||||
t.Errorf("disabled critic should not be monitored: steps=%d stops=%d", h.steps, h.stops)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user