be4bbbcad5
The WithCancelCause+timer rewrite made MaxRuntime surface as Canceled (not DeadlineExceeded), so statusFor's context.Cause(DeadlineExceeded) check could relabel (a) a genuine run error as 'timeout' and (b) a caller cancel/deadline as 'timeout' (was 'cancelled'). Convergent gadfly finding (glm-5.2 + cluster). Fix: keep MaxRuntime as WithTimeout (its DeadlineExceeded propagates → 'timeout', preserving own-timeout vs caller-cancel), add a NESTED WithCancelCause layer only for the kill. statusFor consults context.Cause ONLY for ErrCriticKill; everything else is classified by the run error itself. Tests: generic-error-not-relabeled + caller-cancel-stays-cancelled. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
182 lines
6.0 KiB
Go
182 lines
6.0 KiB
Go
package run
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
|
)
|
|
|
|
// fakeModels returns a ModelResolver backed by a fake provider scripted to
|
|
// reply with the given text (no tool calls — the loop terminates immediately).
|
|
func fakeModels(t *testing.T, reply string) ModelResolver {
|
|
t.Helper()
|
|
fp := fake.New("fake")
|
|
fp.Enqueue("test-model", fake.Reply(reply))
|
|
m, err := fp.Model("test-model")
|
|
if err != nil {
|
|
t.Fatalf("fake model: %v", err)
|
|
}
|
|
return func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
|
return ctx, m, nil
|
|
}
|
|
}
|
|
|
|
// TestExecutorRunHelloWorld is the milestone: executus runs an agent end-to-end
|
|
// against the fake provider and returns its output. Proves the kernel is
|
|
// runnable with the zero Ports (no persistence/audit/budget/critic).
|
|
func TestExecutorRunHelloWorld(t *testing.T) {
|
|
ex := New(Config{
|
|
Registry: tool.NewRegistry(),
|
|
Models: fakeModels(t, "hello from executus"),
|
|
})
|
|
|
|
res := ex.Run(context.Background(),
|
|
RunnableAgent{Name: "greeter", SystemPrompt: "be brief", ModelTier: "test-model"},
|
|
tool.Invocation{RunID: "run-1", CallerID: "caller-1"},
|
|
"say hi")
|
|
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
if res.Output != "hello from executus" {
|
|
t.Fatalf("output = %q, want %q", res.Output, "hello from executus")
|
|
}
|
|
if res.RunID != "run-1" {
|
|
t.Errorf("RunID = %q, want run-1", res.RunID)
|
|
}
|
|
}
|
|
|
|
// TestExecutorBudgetRejection: a Budget that denies makes no model call.
|
|
func TestExecutorBudgetRejection(t *testing.T) {
|
|
denied := errors.New("over budget")
|
|
var modelCalled bool
|
|
models := func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
|
modelCalled = true
|
|
return ctx, nil, nil
|
|
}
|
|
ex := New(Config{
|
|
Registry: tool.NewRegistry(),
|
|
Models: models,
|
|
Ports: Ports{Budget: budgetFunc{check: func(string) error { return denied }}},
|
|
})
|
|
|
|
res := ex.Run(context.Background(),
|
|
RunnableAgent{ModelTier: "test-model"},
|
|
tool.Invocation{RunID: "r", CallerID: "broke"}, "hi")
|
|
|
|
if !errors.Is(res.Err, denied) {
|
|
t.Fatalf("err = %v, want budget denial", res.Err)
|
|
}
|
|
if modelCalled {
|
|
t.Error("model must not be resolved/called when budget denies")
|
|
}
|
|
}
|
|
|
|
// TestExecutorAuditWiring: the Audit port receives StartRun + Close with the
|
|
// terminal status/output.
|
|
func TestExecutorAuditWiring(t *testing.T) {
|
|
rec := &captureRecorder{}
|
|
ex := New(Config{
|
|
Registry: tool.NewRegistry(),
|
|
Models: fakeModels(t, "done"),
|
|
Ports: Ports{Audit: auditFunc{start: func(RunInfo) RunRecorder { return rec }}},
|
|
})
|
|
|
|
res := ex.Run(context.Background(),
|
|
RunnableAgent{ModelTier: "test-model"},
|
|
tool.Invocation{RunID: "r2", CallerID: "c"}, "go")
|
|
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
if !rec.closed {
|
|
t.Fatal("recorder.Close was not called")
|
|
}
|
|
if rec.stats.Status != "ok" {
|
|
t.Errorf("close status = %q, want ok", rec.stats.Status)
|
|
}
|
|
if rec.stats.Output != "done" {
|
|
t.Errorf("close output = %q, want done", rec.stats.Output)
|
|
}
|
|
}
|
|
|
|
// --- test doubles ---
|
|
|
|
type budgetFunc struct{ check func(callerID string) error }
|
|
|
|
func (b budgetFunc) Check(_ context.Context, callerID string) error { return b.check(callerID) }
|
|
func (b budgetFunc) Commit(context.Context, string, float64) {}
|
|
|
|
type auditFunc struct{ start func(RunInfo) RunRecorder }
|
|
|
|
func (a auditFunc) StartRun(_ context.Context, info RunInfo) RunRecorder { return a.start(info) }
|
|
|
|
type captureRecorder struct {
|
|
closed bool
|
|
stats RunStats
|
|
steps int
|
|
tools int
|
|
}
|
|
|
|
func (r *captureRecorder) TokenStats() (in, out, thinking int64) { return 0, 0, 0 }
|
|
func (r *captureRecorder) ToolCallsCount() int { return r.tools }
|
|
func (r *captureRecorder) OnStep(int, *llm.Response) { r.steps++ }
|
|
func (r *captureRecorder) OnTool(llm.ToolCall, string) { r.tools++ }
|
|
func (r *captureRecorder) LogEvent(string, map[string]any) {}
|
|
func (r *captureRecorder) LogError(string) {}
|
|
func (r *captureRecorder) Close(_ context.Context, s RunStats) { r.closed = true; r.stats = s }
|
|
|
|
// TestExecutorNilModelNoPanic: a resolver returning (ctx, nil, nil) yields a
|
|
// clean error, not a nil-pointer panic (gadfly F1, high severity).
|
|
func TestExecutorNilModelNoPanic(t *testing.T) {
|
|
ex := New(Config{
|
|
Registry: tool.NewRegistry(),
|
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
|
return ctx, nil, nil // nil model, nil error
|
|
},
|
|
})
|
|
res := ex.Run(context.Background(),
|
|
RunnableAgent{ModelTier: "x"}, tool.Invocation{RunID: "r"}, "hi")
|
|
if res.Err == nil {
|
|
t.Fatal("expected an error for a nil model, got nil (would have panicked in the loop)")
|
|
}
|
|
}
|
|
|
|
// TestStatusFor maps run errors + cancellation cause to RunStats.Status (gadfly F3).
|
|
func TestStatusFor(t *testing.T) {
|
|
bg := context.Background()
|
|
// A context cancelled with the critic-kill cause: ctx.Err() is Canceled, but
|
|
// context.Cause carries ErrCriticKill → "killed".
|
|
killCtx, killCancel := context.WithCancelCause(context.Background())
|
|
killCancel(fmt.Errorf("%w: hung", ErrCriticKill))
|
|
// A context cancelled with a non-kill cause must NOT relabel a genuine run
|
|
// error: a real error stays "error" even though the ctx was later cancelled.
|
|
cancelledCtx, cc := context.WithCancelCause(context.Background())
|
|
cc(context.DeadlineExceeded)
|
|
cases := []struct {
|
|
ctx context.Context
|
|
err error
|
|
want string
|
|
}{
|
|
{bg, nil, "ok"},
|
|
{bg, context.DeadlineExceeded, "timeout"},
|
|
{bg, context.Canceled, "cancelled"},
|
|
{bg, fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
|
|
{bg, errors.New("boom"), "error"},
|
|
{killCtx, context.Canceled, "killed"},
|
|
{cancelledCtx, errors.New("boom"), "error"}, // generic error not relabeled by cause
|
|
{cancelledCtx, context.Canceled, "cancelled"}, // caller cancel stays cancelled, not timeout
|
|
}
|
|
for _, c := range cases {
|
|
if got := statusFor(c.ctx, c.err); got != c.want {
|
|
t.Errorf("statusFor(%v) = %q, want %q", c.err, got, c.want)
|
|
}
|
|
}
|
|
}
|