Files
executus/run/executor_test.go
T
steve e76eed0011
executus CI / test (pull_request) Successful in 1m0s
Adversarial Review (Gadfly) / review (pull_request) Successful in 28m58s
P2: run.Executor — executus is runnable
The capstone of the run kernel: run.Executor.Run(ctx, RunnableAgent, inv)
ties model resolution + the tool registry + majordomo's agent loop +
context compaction + run-bounding + step/audit instrumentation into one
path, with every host concern behind the nil-safe run.Ports.

- run/executor.go: New(Config{Registry, Models, Defaults, Ports, Compactor,
  ContextTokens, SystemHeader}) + Run -> Result{RunID, Output, Steps, Usage,
  Err}. Budget gate (pre-run), model resolve, Audit StartRun/recorder
  (satisfies RunTally, stamped on inv.RunState), toolbox build, step observer
  (zips tool calls/results -> emitter + recorder.OnStep/OnTool), V10
  detached-MaxRuntime context with caller-cancel merged back, compaction wired
  from ContextTokens×ratio, audit Close + Budget Commit on a detached cleanup
  ctx. Zero Ports = a bounded in-memory run (gadfly's case).
- run/executor_test.go: hermetic end-to-end run against majordomo's fake
  provider (hello-world), Budget-rejection (no model call), Audit-port wiring
  (StartRun + Close with terminal status/output). All green under -race.
- examples/minimal upgraded to the real "hello, agentic world" (~15 lines:
  Configure tiers -> run.New -> Run -> print). README/CLAUDE.md updated.

Remaining P2 follow-ups (incremental): wire Critic/Checkpointer/PaletteSource/
Delivery into the loop, multi-phase Pipelines, and the no-tools direct path.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 20:45:10 -04:00

133 lines
4.1 KiB
Go

package run
import (
"context"
"errors"
"testing"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
// fakeModels returns a ModelResolver backed by a fake provider scripted to
// reply with the given text (no tool calls — the loop terminates immediately).
func fakeModels(t *testing.T, reply string) ModelResolver {
t.Helper()
fp := fake.New("fake")
fp.Enqueue("test-model", fake.Reply(reply))
m, err := fp.Model("test-model")
if err != nil {
t.Fatalf("fake model: %v", err)
}
return func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
return ctx, m, nil
}
}
// TestExecutorRunHelloWorld is the milestone: executus runs an agent end-to-end
// against the fake provider and returns its output. Proves the kernel is
// runnable with the zero Ports (no persistence/audit/budget/critic).
func TestExecutorRunHelloWorld(t *testing.T) {
ex := New(Config{
Registry: tool.NewRegistry(),
Models: fakeModels(t, "hello from executus"),
})
res := ex.Run(context.Background(),
RunnableAgent{Name: "greeter", SystemPrompt: "be brief", ModelTier: "test-model"},
tool.Invocation{RunID: "run-1", CallerID: "caller-1"},
"say hi")
if res.Err != nil {
t.Fatalf("run error: %v", res.Err)
}
if res.Output != "hello from executus" {
t.Fatalf("output = %q, want %q", res.Output, "hello from executus")
}
if res.RunID != "run-1" {
t.Errorf("RunID = %q, want run-1", res.RunID)
}
}
// TestExecutorBudgetRejection: a Budget that denies makes no model call.
func TestExecutorBudgetRejection(t *testing.T) {
denied := errors.New("over budget")
var modelCalled bool
models := func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
modelCalled = true
return ctx, nil, nil
}
ex := New(Config{
Registry: tool.NewRegistry(),
Models: models,
Ports: Ports{Budget: budgetFunc{check: func(string) error { return denied }}},
})
res := ex.Run(context.Background(),
RunnableAgent{ModelTier: "test-model"},
tool.Invocation{RunID: "r", CallerID: "broke"}, "hi")
if !errors.Is(res.Err, denied) {
t.Fatalf("err = %v, want budget denial", res.Err)
}
if modelCalled {
t.Error("model must not be resolved/called when budget denies")
}
}
// TestExecutorAuditWiring: the Audit port receives StartRun + Close with the
// terminal status/output.
func TestExecutorAuditWiring(t *testing.T) {
rec := &captureRecorder{}
ex := New(Config{
Registry: tool.NewRegistry(),
Models: fakeModels(t, "done"),
Ports: Ports{Audit: auditFunc{start: func(RunInfo) RunRecorder { return rec }}},
})
res := ex.Run(context.Background(),
RunnableAgent{ModelTier: "test-model"},
tool.Invocation{RunID: "r2", CallerID: "c"}, "go")
if res.Err != nil {
t.Fatalf("run error: %v", res.Err)
}
if !rec.closed {
t.Fatal("recorder.Close was not called")
}
if rec.stats.Status != "ok" {
t.Errorf("close status = %q, want ok", rec.stats.Status)
}
if rec.stats.Output != "done" {
t.Errorf("close output = %q, want done", rec.stats.Output)
}
}
// --- test doubles ---
type budgetFunc struct{ check func(callerID string) error }
func (b budgetFunc) Check(_ context.Context, callerID string) error { return b.check(callerID) }
func (b budgetFunc) Commit(context.Context, string, float64) {}
type auditFunc struct{ start func(RunInfo) RunRecorder }
func (a auditFunc) StartRun(_ context.Context, info RunInfo) RunRecorder { return a.start(info) }
type captureRecorder struct {
closed bool
stats RunStats
steps int
tools int
}
func (r *captureRecorder) TokenStats() (in, out, thinking int64) { return 0, 0, 0 }
func (r *captureRecorder) ToolCallsCount() int { return r.tools }
func (r *captureRecorder) OnStep(int, *llm.Response) { r.steps++ }
func (r *captureRecorder) OnTool(llm.ToolCall, string) { r.tools++ }
func (r *captureRecorder) LogEvent(string, map[string]any) {}
func (r *captureRecorder) LogError(string) {}
func (r *captureRecorder) Close(_ context.Context, s RunStats) { r.closed = true; r.stats = s }