executus/tools/integration_test.go

package tools_test

import (
	"context"
	"encoding/json"
	"testing"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"

	"gitea.stevedudenhoeffer.com/steve/executus/run"
	"gitea.stevedudenhoeffer.com/steve/executus/tool"
	"gitea.stevedudenhoeffer.com/steve/executus/tools"
)

// TestExecutorRunsToolUsingAgent is the end-to-end proof that a host can
// register a generic tool and the executor runs an agent that CALLS it: the
// fake model emits a `think` tool call, the executor dispatches it through the
// registered tool, then the model finalises. Exercises the full tool-dispatch
// loop + step instrumentation.
func TestExecutorRunsToolUsingAgent(t *testing.T) {
	reg := tool.NewRegistry()
	if err := tools.Register(reg); err != nil {
		t.Fatalf("register tools: %v", err)
	}

	fp := fake.New("fake")
	fp.Enqueue("test-model",
		// Step 1: the model decides to call `think`.
		fake.ReplyWith(llm.Response{
			ToolCalls: []llm.ToolCall{{
				ID:        "call-1",
				Name:      "think",
				Arguments: json.RawMessage(`{"thought":"plan: answer briefly"}`),
			}},
		}),
		// Step 2: with the tool result in hand, the model finalises.
		fake.Reply("all done"),
	)
	m, err := fp.Model("test-model")
	if err != nil {
		t.Fatalf("fake model: %v", err)
	}

	ex := run.New(run.Config{
		Registry: reg,
		Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
			return ctx, m, nil
		},
	})

	res := ex.Run(context.Background(),
		run.RunnableAgent{Name: "thinker", ModelTier: "test-model", LowLevelTools: []string{"think"}},
		tool.Invocation{RunID: "run-tool-1", CallerID: "c"},
		"do the thing")

	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	if res.Output != "all done" {
		t.Fatalf("output = %q, want %q", res.Output, "all done")
	}
	// The step instrumentation should have captured the think call.
	var sawThink bool
	for _, s := range res.Steps {
		if s.Title == "think" {
			sawThink = true
		}
	}
	if !sawThink {
		t.Errorf("expected a `think` step in Result.Steps, got %d steps: %+v", len(res.Steps), res.Steps)
	}
}