executus/run/executor_test.go

package run

import (
	"context"
	"errors"
	"fmt"
	"testing"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"

	"gitea.stevedudenhoeffer.com/steve/executus/tool"
)

// fakeModels returns a ModelResolver backed by a fake provider scripted to
// reply with the given text (no tool calls — the loop terminates immediately).
func fakeModels(t *testing.T, reply string) ModelResolver {
	t.Helper()
	fp := fake.New("fake")
	fp.Enqueue("test-model", fake.Reply(reply))
	m, err := fp.Model("test-model")
	if err != nil {
		t.Fatalf("fake model: %v", err)
	}
	return func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
		return ctx, m, nil
	}
}

// TestExecutorRunHelloWorld is the milestone: executus runs an agent end-to-end
// against the fake provider and returns its output. Proves the kernel is
// runnable with the zero Ports (no persistence/audit/budget/critic).
func TestExecutorRunHelloWorld(t *testing.T) {
	ex := New(Config{
		Registry: tool.NewRegistry(),
		Models:   fakeModels(t, "hello from executus"),
	})

	res := ex.Run(context.Background(),
		RunnableAgent{Name: "greeter", SystemPrompt: "be brief", ModelTier: "test-model"},
		tool.Invocation{RunID: "run-1", CallerID: "caller-1"},
		"say hi")

	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	if res.Output != "hello from executus" {
		t.Fatalf("output = %q, want %q", res.Output, "hello from executus")
	}
	if res.RunID != "run-1" {
		t.Errorf("RunID = %q, want run-1", res.RunID)
	}
}

// TestExecutorBudgetRejection: a Budget that denies makes no model call.
func TestExecutorBudgetRejection(t *testing.T) {
	denied := errors.New("over budget")
	var modelCalled bool
	models := func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
		modelCalled = true
		return ctx, nil, nil
	}
	ex := New(Config{
		Registry: tool.NewRegistry(),
		Models:   models,
		Ports:    Ports{Budget: budgetFunc{check: func(string) error { return denied }}},
	})

	res := ex.Run(context.Background(),
		RunnableAgent{ModelTier: "test-model"},
		tool.Invocation{RunID: "r", CallerID: "broke"}, "hi")

	if !errors.Is(res.Err, denied) {
		t.Fatalf("err = %v, want budget denial", res.Err)
	}
	if modelCalled {
		t.Error("model must not be resolved/called when budget denies")
	}
}

// TestExecutorAuditWiring: the Audit port receives StartRun + Close with the
// terminal status/output.
func TestExecutorAuditWiring(t *testing.T) {
	rec := &captureRecorder{}
	ex := New(Config{
		Registry: tool.NewRegistry(),
		Models:   fakeModels(t, "done"),
		Ports:    Ports{Audit: auditFunc{start: func(RunInfo) RunRecorder { return rec }}},
	})

	res := ex.Run(context.Background(),
		RunnableAgent{ModelTier: "test-model"},
		tool.Invocation{RunID: "r2", CallerID: "c"}, "go")

	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	if !rec.closed {
		t.Fatal("recorder.Close was not called")
	}
	if rec.stats.Status != "ok" {
		t.Errorf("close status = %q, want ok", rec.stats.Status)
	}
	if rec.stats.Output != "done" {
		t.Errorf("close output = %q, want done", rec.stats.Output)
	}
}

// --- test doubles ---

type budgetFunc struct{ check func(callerID string) error }

func (b budgetFunc) Check(_ context.Context, callerID string) error { return b.check(callerID) }
func (b budgetFunc) Commit(context.Context, string, float64)        {}

type auditFunc struct{ start func(RunInfo) RunRecorder }

func (a auditFunc) StartRun(_ context.Context, info RunInfo) RunRecorder { return a.start(info) }

type captureRecorder struct {
	closed bool
	stats  RunStats
	steps  int
	tools  int
}

func (r *captureRecorder) TokenStats() (in, out, thinking int64) { return 0, 0, 0 }
func (r *captureRecorder) ToolCallsCount() int                   { return r.tools }
func (r *captureRecorder) OnStep(int, *llm.Response)             { r.steps++ }
func (r *captureRecorder) OnTool(llm.ToolCall, string)           { r.tools++ }
func (r *captureRecorder) LogEvent(string, map[string]any)       {}
func (r *captureRecorder) LogError(string)                       {}
func (r *captureRecorder) Close(_ context.Context, s RunStats)   { r.closed = true; r.stats = s }

// TestExecutorNilModelNoPanic: a resolver returning (ctx, nil, nil) yields a
// clean error, not a nil-pointer panic (gadfly F1, high severity).
func TestExecutorNilModelNoPanic(t *testing.T) {
	ex := New(Config{
		Registry: tool.NewRegistry(),
		Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
			return ctx, nil, nil // nil model, nil error
		},
	})
	res := ex.Run(context.Background(),
		RunnableAgent{ModelTier: "x"}, tool.Invocation{RunID: "r"}, "hi")
	if res.Err == nil {
		t.Fatal("expected an error for a nil model, got nil (would have panicked in the loop)")
	}
}

// TestStatusFor maps run errors + cancellation cause to RunStats.Status (gadfly F3).
func TestStatusFor(t *testing.T) {
	bg := context.Background()
	// A context cancelled with the critic-kill cause: ctx.Err() is Canceled, but
	// context.Cause carries ErrCriticKill → "killed".
	killCtx, killCancel := context.WithCancelCause(context.Background())
	killCancel(fmt.Errorf("%w: hung", ErrCriticKill))
	cases := []struct {
		ctx  context.Context
		err  error
		want string
	}{
		{bg, nil, "ok"},
		{bg, context.DeadlineExceeded, "timeout"},
		{bg, context.Canceled, "cancelled"},
		{bg, fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
		{bg, errors.New("boom"), "error"},
		{killCtx, context.Canceled, "killed"},
	}
	for _, c := range cases {
		if got := statusFor(c.ctx, c.err); got != c.want {
			t.Errorf("statusFor(%v) = %q, want %q", c.err, got, c.want)
		}
	}
}