P3 (kickoff): generic tools/ library + end-to-end tool-using-agent test
Stand up executus/tools — the generic, host-agnostic tool library — and prove the full pattern end to end: - tools/tools.go: Register(reg) adds the always-available zero-dependency tools (currently `think`). A light host calls it and is immediately useful; backed tools (web/store/meta groups) will register via grouped registrars with nil-safe Deps as they land. - tools/think.go: the `think` tool moved from mort (imports only executus/tool). - tools/integration_test.go: end-to-end proof that the executor runs an agent which CALLS a registered tool — the fake model emits a `think` tool call, the executor dispatches it through the registry, the model finalises, and the step instrumentation captures the `think` step. Exercises the full tool-dispatch loop through run.Executor. Stacked on phase-2-run-kernel (P3 needs run.Executor). Remaining P3: the meta/web/net/store/compose groups + their Deps + default backends (splitting mort's default.go grab-bag). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -58,7 +58,11 @@ CORE (majordomo + stdlib):
|
|||||||
structured output — no separate structured/ pkg)
|
structured output — no separate structured/ pkg)
|
||||||
llmmeta/ shared meta-LLM helper over model/ [P1 ✓]
|
llmmeta/ shared meta-LLM helper over model/ [P1 ✓]
|
||||||
compact/ context compactor (WithCompactor hook) [P2 ✓]
|
compact/ context compactor (WithCompactor hook) [P2 ✓]
|
||||||
tools/{web,net,store,compose,meta,comms} generic tools [P3]
|
tools/ generic tool library + Register entrypoint; [P3 wip]
|
||||||
|
think moved; end-to-end "agent calls a tool"
|
||||||
|
test green. Remaining: meta/web/net/store/
|
||||||
|
compose groups + their nil-safe Deps + default
|
||||||
|
backends (the default.go grab-bag split) [P3]
|
||||||
|
|
||||||
BATTERIES (opt-in siblings, each nil-safe + a default):
|
BATTERIES (opt-in siblings, each nil-safe + a default):
|
||||||
persona/ Agent noun + AgentStore seam + yml loader [P4]
|
persona/ Agent noun + AgentStore seam + yml loader [P4]
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
package tools_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/executus/run"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/executus/tools"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestExecutorRunsToolUsingAgent is the end-to-end proof that a host can
|
||||||
|
// register a generic tool and the executor runs an agent that CALLS it: the
|
||||||
|
// fake model emits a `think` tool call, the executor dispatches it through the
|
||||||
|
// registered tool, then the model finalises. Exercises the full tool-dispatch
|
||||||
|
// loop + step instrumentation.
|
||||||
|
func TestExecutorRunsToolUsingAgent(t *testing.T) {
|
||||||
|
reg := tool.NewRegistry()
|
||||||
|
if err := tools.Register(reg); err != nil {
|
||||||
|
t.Fatalf("register tools: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fp := fake.New("fake")
|
||||||
|
fp.Enqueue("test-model",
|
||||||
|
// Step 1: the model decides to call `think`.
|
||||||
|
fake.ReplyWith(llm.Response{
|
||||||
|
ToolCalls: []llm.ToolCall{{
|
||||||
|
ID: "call-1",
|
||||||
|
Name: "think",
|
||||||
|
Arguments: json.RawMessage(`{"thought":"plan: answer briefly"}`),
|
||||||
|
}},
|
||||||
|
}),
|
||||||
|
// Step 2: with the tool result in hand, the model finalises.
|
||||||
|
fake.Reply("all done"),
|
||||||
|
)
|
||||||
|
m, err := fp.Model("test-model")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("fake model: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ex := run.New(run.Config{
|
||||||
|
Registry: reg,
|
||||||
|
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
||||||
|
return ctx, m, nil
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
res := ex.Run(context.Background(),
|
||||||
|
run.RunnableAgent{Name: "thinker", ModelTier: "test-model", LowLevelTools: []string{"think"}},
|
||||||
|
tool.Invocation{RunID: "run-tool-1", CallerID: "c"},
|
||||||
|
"do the thing")
|
||||||
|
|
||||||
|
if res.Err != nil {
|
||||||
|
t.Fatalf("run error: %v", res.Err)
|
||||||
|
}
|
||||||
|
if res.Output != "all done" {
|
||||||
|
t.Fatalf("output = %q, want %q", res.Output, "all done")
|
||||||
|
}
|
||||||
|
// The step instrumentation should have captured the think call.
|
||||||
|
var sawThink bool
|
||||||
|
for _, s := range res.Steps {
|
||||||
|
if s.Title == "think" {
|
||||||
|
sawThink = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !sawThink {
|
||||||
|
t.Errorf("expected a `think` step in Result.Steps, got %d steps: %+v", len(res.Steps), res.Steps)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
// Package tools — v11 think.
|
||||||
|
//
|
||||||
|
// Pure prompt-engineering tool: the agent's "thought" is recorded
|
||||||
|
// to skill_run_logs (via the audit hook the gated wrapper applies
|
||||||
|
// transparently) but produces no side effect. The literature on
|
||||||
|
// agent design notes that giving an agent an explicit `think` tool
|
||||||
|
// keeps it on plan better than giving it nothing — without one,
|
||||||
|
// agents tend to either skip planning OR babble into the final
|
||||||
|
// output. With one, planning lands in tool calls and the final
|
||||||
|
// output stays clean.
|
||||||
|
//
|
||||||
|
// V11 deliberately rejects empty thoughts. An agent that learns
|
||||||
|
// "calling think with empty args is free" will spam it; a
|
||||||
|
// rejection forces the call to actually carry reasoning.
|
||||||
|
package tools
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||||||
|
)
|
||||||
|
|
||||||
|
type thinkParams struct {
|
||||||
|
Thought string `json:"thought" description:"Your reasoning. May be a plan, a working hypothesis, an analysis of a tool result, or anything else you'd note in a private scratchpad. Empty input is rejected — make this load-bearing."`
|
||||||
|
}
|
||||||
|
|
||||||
|
// thinkResponse is intentionally minimal. The agent doesn't need
|
||||||
|
// machine-readable output; the value is the audit trail + the
|
||||||
|
// implicit "now you've planned, what's next" prompting the call
|
||||||
|
// gives the agent loop.
|
||||||
|
type thinkResponse struct {
|
||||||
|
OK bool `json:"ok"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewThink constructs the v11 think tool. No deps — the audit
|
||||||
|
// hook wrapper handles persistence transparently.
|
||||||
|
func NewThink() tool.Tool {
|
||||||
|
return tool.NewGatedTool[thinkParams](
|
||||||
|
"think",
|
||||||
|
"Record a thought / plan / working hypothesis. The thought is logged to the run trace but does NOT affect any external state. Use to slow down before a tricky tool call, sketch a multi-step plan, or summarise findings before continuing. Empty thoughts are rejected.",
|
||||||
|
tool.Permission{
|
||||||
|
AuthoringRequirement: tool.RequirementAnyone,
|
||||||
|
OperatesOn: tool.ScopeGlobal,
|
||||||
|
SafeForShare: true,
|
||||||
|
Categories: []string{"utility"},
|
||||||
|
},
|
||||||
|
func(_ context.Context, _ tool.Invocation, p thinkParams) (string, error) {
|
||||||
|
if strings.TrimSpace(p.Thought) == "" {
|
||||||
|
// Returns ok:false in a structured envelope rather
|
||||||
|
// than an error so the agent loop continues with a
|
||||||
|
// recoverable signal.
|
||||||
|
return `{"ok":false,"error":"empty_thought"}`, nil
|
||||||
|
}
|
||||||
|
// Successful think emits a flat JSON. The audit hook
|
||||||
|
// (auto-injected by NewGatedTool) writes the args + result
|
||||||
|
// pair so the trace UI shows the thought verbatim.
|
||||||
|
return `{"ok":true}`, nil
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: returning a hand-rolled JSON literal instead of a marshaller
|
||||||
|
// keeps think the cheapest possible tool — no heap allocation, no
|
||||||
|
// json.Marshal call, no goroutine-local buffer churn. The two output
|
||||||
|
// shapes are static. If a future field is added to thinkResponse,
|
||||||
|
// switch back to json.Marshal — but until then, the literal is the
|
||||||
|
// idiom that matches the tool's "do nothing" intent.
|
||||||
|
var _ = thinkResponse{} // declared so vet doesn't flag the unused struct
|
||||||
|
var _ = fmt.Errorf
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
// Package tools is executus's library of generic, host-agnostic agent tools.
|
||||||
|
//
|
||||||
|
// A host registers the tools it wants against a tool.Registry, then runs an
|
||||||
|
// agent whose RunnableAgent.LowLevelTools name them. Tools split two ways:
|
||||||
|
//
|
||||||
|
// - Always-available, zero-dependency tools (think, ...) need no host backend
|
||||||
|
// and register via Register. A light host (gadfly) can call Register and be
|
||||||
|
// immediately useful.
|
||||||
|
// - Backed tools (web search, file/kv storage, summarize, ...) take a nil-safe
|
||||||
|
// Deps describing their host backend; they register via grouped registrars
|
||||||
|
// (RegisterWeb, RegisterStore, ...) as those land.
|
||||||
|
//
|
||||||
|
// Every tool ships with the same three-stage permission model as mort's, and a
|
||||||
|
// host adds its own domain tools against the SAME registry.
|
||||||
|
package tools
|
||||||
|
|
||||||
|
import "gitea.stevedudenhoeffer.com/steve/executus/tool"
|
||||||
|
|
||||||
|
// Register adds the always-available, zero-dependency generic tools to reg
|
||||||
|
// (currently: think). Returns the first registration error, if any.
|
||||||
|
func Register(reg tool.Registry) error {
|
||||||
|
for _, t := range []tool.Tool{
|
||||||
|
NewThink(),
|
||||||
|
} {
|
||||||
|
if err := reg.Register(t); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user