From df95425bb5e9a50c0cb4028943e6a01510de3804 Mon Sep 17 00:00:00 2001
From: Steve Dudenhoeffer <steve@stevedudenhoeffer.com>
Date: Fri, 26 Jun 2026 20:54:28 -0400
Subject: [PATCH] P3 (kickoff): generic tools/ library + end-to-end
 tool-using-agent test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stand up executus/tools — the generic, host-agnostic tool library — and prove
the full pattern end to end:

- tools/tools.go: Register(reg) adds the always-available zero-dependency tools
  (currently `think`). A light host calls it and is immediately useful; backed
  tools (web/store/meta groups) will register via grouped registrars with
  nil-safe Deps as they land.
- tools/think.go: the `think` tool moved from mort (imports only executus/tool).
- tools/integration_test.go: end-to-end proof that the executor runs an agent
  which CALLS a registered tool — the fake model emits a `think` tool call, the
  executor dispatches it through the registry, the model finalises, and the step
  instrumentation captures the `think` step. Exercises the full tool-dispatch
  loop through run.Executor.

Stacked on phase-2-run-kernel (P3 needs run.Executor). Remaining P3: the
meta/web/net/store/compose groups + their Deps + default backends (splitting
mort's default.go grab-bag).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                 |  6 +++-
 tools/integration_test.go | 73 +++++++++++++++++++++++++++++++++++++++
 tools/think.go            | 72 ++++++++++++++++++++++++++++++++++++++
 tools/tools.go            | 30 ++++++++++++++++
 4 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 tools/integration_test.go
 create mode 100644 tools/think.go
 create mode 100644 tools/tools.go

diff --git a/CLAUDE.md b/CLAUDE.md
index 86a7be9..5a7be76 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -58,7 +58,11 @@ CORE (majordomo + stdlib):
              structured output — no separate structured/ pkg)
   llmmeta/  shared meta-LLM helper over model/            [P1 ✓]
   compact/  context compactor (WithCompactor hook)       [P2 ✓]
-  tools/{web,net,store,compose,meta,comms}  generic tools [P3]
+  tools/    generic tool library + Register entrypoint;  [P3 wip]
+            think moved; end-to-end "agent calls a tool"
+            test green. Remaining: meta/web/net/store/
+            compose groups + their nil-safe Deps + default
+            backends (the default.go grab-bag split)      [P3]
 
 BATTERIES (opt-in siblings, each nil-safe + a default):
   persona/   Agent noun + AgentStore seam + yml loader   [P4]
diff --git a/tools/integration_test.go b/tools/integration_test.go
new file mode 100644
index 0000000..d9cf39e
--- /dev/null
+++ b/tools/integration_test.go
@@ -0,0 +1,73 @@
+package tools_test
+
+import (
+	"context"
+	"encoding/json"
+	"testing"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
+
+	"gitea.stevedudenhoeffer.com/steve/executus/run"
+	"gitea.stevedudenhoeffer.com/steve/executus/tool"
+	"gitea.stevedudenhoeffer.com/steve/executus/tools"
+)
+
+// TestExecutorRunsToolUsingAgent is the end-to-end proof that a host can
+// register a generic tool and the executor runs an agent that CALLS it: the
+// fake model emits a `think` tool call, the executor dispatches it through the
+// registered tool, then the model finalises. Exercises the full tool-dispatch
+// loop + step instrumentation.
+func TestExecutorRunsToolUsingAgent(t *testing.T) {
+	reg := tool.NewRegistry()
+	if err := tools.Register(reg); err != nil {
+		t.Fatalf("register tools: %v", err)
+	}
+
+	fp := fake.New("fake")
+	fp.Enqueue("test-model",
+		// Step 1: the model decides to call `think`.
+		fake.ReplyWith(llm.Response{
+			ToolCalls: []llm.ToolCall{{
+				ID:        "call-1",
+				Name:      "think",
+				Arguments: json.RawMessage(`{"thought":"plan: answer briefly"}`),
+			}},
+		}),
+		// Step 2: with the tool result in hand, the model finalises.
+		fake.Reply("all done"),
+	)
+	m, err := fp.Model("test-model")
+	if err != nil {
+		t.Fatalf("fake model: %v", err)
+	}
+
+	ex := run.New(run.Config{
+		Registry: reg,
+		Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
+			return ctx, m, nil
+		},
+	})
+
+	res := ex.Run(context.Background(),
+		run.RunnableAgent{Name: "thinker", ModelTier: "test-model", LowLevelTools: []string{"think"}},
+		tool.Invocation{RunID: "run-tool-1", CallerID: "c"},
+		"do the thing")
+
+	if res.Err != nil {
+		t.Fatalf("run error: %v", res.Err)
+	}
+	if res.Output != "all done" {
+		t.Fatalf("output = %q, want %q", res.Output, "all done")
+	}
+	// The step instrumentation should have captured the think call.
+	var sawThink bool
+	for _, s := range res.Steps {
+		if s.Title == "think" {
+			sawThink = true
+		}
+	}
+	if !sawThink {
+		t.Errorf("expected a `think` step in Result.Steps, got %d steps: %+v", len(res.Steps), res.Steps)
+	}
+}
diff --git a/tools/think.go b/tools/think.go
new file mode 100644
index 0000000..d0dd2c0
--- /dev/null
+++ b/tools/think.go
@@ -0,0 +1,72 @@
+// Package tools — v11 think.
+//
+// Pure prompt-engineering tool: the agent's "thought" is recorded
+// to skill_run_logs (via the audit hook the gated wrapper applies
+// transparently) but produces no side effect. The literature on
+// agent design notes that giving an agent an explicit `think` tool
+// keeps it on plan better than giving it nothing — without one,
+// agents tend to either skip planning OR babble into the final
+// output. With one, planning lands in tool calls and the final
+// output stays clean.
+//
+// V11 deliberately rejects empty thoughts. An agent that learns
+// "calling think with empty args is free" will spam it; a
+// rejection forces the call to actually carry reasoning.
+package tools
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/executus/tool"
+)
+
+type thinkParams struct {
+	Thought string `json:"thought" description:"Your reasoning. May be a plan, a working hypothesis, an analysis of a tool result, or anything else you'd note in a private scratchpad. Empty input is rejected — make this load-bearing."`
+}
+
+// thinkResponse is intentionally minimal. The agent doesn't need
+// machine-readable output; the value is the audit trail + the
+// implicit "now you've planned, what's next" prompting the call
+// gives the agent loop.
+type thinkResponse struct {
+	OK    bool   `json:"ok"`
+	Error string `json:"error,omitempty"`
+}
+
+// NewThink constructs the v11 think tool. No deps — the audit
+// hook wrapper handles persistence transparently.
+func NewThink() tool.Tool {
+	return tool.NewGatedTool[thinkParams](
+		"think",
+		"Record a thought / plan / working hypothesis. The thought is logged to the run trace but does NOT affect any external state. Use to slow down before a tricky tool call, sketch a multi-step plan, or summarise findings before continuing. Empty thoughts are rejected.",
+		tool.Permission{
+			AuthoringRequirement: tool.RequirementAnyone,
+			OperatesOn:           tool.ScopeGlobal,
+			SafeForShare:         true,
+			Categories:           []string{"utility"},
+		},
+		func(_ context.Context, _ tool.Invocation, p thinkParams) (string, error) {
+			if strings.TrimSpace(p.Thought) == "" {
+				// Returns ok:false in a structured envelope rather
+				// than an error so the agent loop continues with a
+				// recoverable signal.
+				return `{"ok":false,"error":"empty_thought"}`, nil
+			}
+			// Successful think emits a flat JSON. The audit hook
+			// (auto-injected by NewGatedTool) writes the args + result
+			// pair so the trace UI shows the thought verbatim.
+			return `{"ok":true}`, nil
+		},
+	)
+}
+
+// Note: returning a hand-rolled JSON literal instead of a marshaller
+// keeps think the cheapest possible tool — no heap allocation, no
+// json.Marshal call, no goroutine-local buffer churn. The two output
+// shapes are static. If a future field is added to thinkResponse,
+// switch back to json.Marshal — but until then, the literal is the
+// idiom that matches the tool's "do nothing" intent.
+var _ = thinkResponse{} // declared so vet doesn't flag the unused struct
+var _ = fmt.Errorf
diff --git a/tools/tools.go b/tools/tools.go
new file mode 100644
index 0000000..3c0045a
--- /dev/null
+++ b/tools/tools.go
@@ -0,0 +1,30 @@
+// Package tools is executus's library of generic, host-agnostic agent tools.
+//
+// A host registers the tools it wants against a tool.Registry, then runs an
+// agent whose RunnableAgent.LowLevelTools name them. Tools split two ways:
+//
+//   - Always-available, zero-dependency tools (think, ...) need no host backend
+//     and register via Register. A light host (gadfly) can call Register and be
+//     immediately useful.
+//   - Backed tools (web search, file/kv storage, summarize, ...) take a nil-safe
+//     Deps describing their host backend; they register via grouped registrars
+//     (RegisterWeb, RegisterStore, ...) as those land.
+//
+// Every tool ships with the same three-stage permission model as mort's, and a
+// host adds its own domain tools against the SAME registry.
+package tools
+
+import "gitea.stevedudenhoeffer.com/steve/executus/tool"
+
+// Register adds the always-available, zero-dependency generic tools to reg
+// (currently: think). Returns the first registration error, if any.
+func Register(reg tool.Registry) error {
+	for _, t := range []tool.Tool{
+		NewThink(),
+	} {
+		if err := reg.Register(t); err != nil {
+			return err
+		}
+	}
+	return nil
+}