dc28b63ad8
executus CI / test (push) Successful in 36s
The tool registry core (registry, permission model, Invocation, gated-tool wrapper, ssrf guard, hmac, encryption, argcoerce, helpers, rootrun, session_tools, webhook_rate_limit) had zero mort coupling — it imports only majordomo/llm + x/crypto/hkdf — so it moves verbatim with a package rename (skilltools -> tool). All same-package tests came along and pass; the SSRF, gated-wrapper, encryption and output-pattern invariants are re-anchored here. majordomo re-enters the module graph (now pinned to the latest, incl. the front-loaded-output fix). model/ + llmmeta + structured follow next. Docs: CLAUDE.md now requires README/examples to stay in sync with changes in the same commit; CI skips docs/example-only pushes via paths-ignore. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
402 lines
13 KiB
Go
402 lines
13 KiB
Go
package tool
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
|
|
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// gatedTestParams is a typed param struct used by the gated_tool tests.
|
|
// Mirrors a real production tool: a couple of strings the LLM supplies.
|
|
type gatedTestParams struct {
|
|
Question string `json:"question" description:"The question to answer."`
|
|
Detail string `json:"detail,omitempty" description:"Optional detail level."`
|
|
}
|
|
|
|
// recordingAudit captures every AuditCall the wrapper emits so tests
|
|
// can assert exactly what the wrapper logged. Concurrent-safe in case a
|
|
// future test parallelises across goroutines.
|
|
type recordingAudit struct {
|
|
mu sync.Mutex
|
|
calls []AuditCall
|
|
}
|
|
|
|
func (r *recordingAudit) hook() AuditHook {
|
|
return func(call AuditCall) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
r.calls = append(r.calls, call)
|
|
}
|
|
}
|
|
|
|
func (r *recordingAudit) snapshot() []AuditCall {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
out := make([]AuditCall, len(r.calls))
|
|
copy(out, r.calls)
|
|
return out
|
|
}
|
|
|
|
// buildAndExecute is the test-only convenience for going from a
|
|
// constructed Tool to an llm.Tool result. Mirrors how the production
|
|
// registry's Build call wires inv.gate / inv.audit.
|
|
func buildAndExecute(t *testing.T, tool Tool, inv Invocation, vis Visibility, audit AuditHook, args string) (string, error) {
|
|
t.Helper()
|
|
r := NewRegistry()
|
|
if err := r.Register(tool); err != nil {
|
|
t.Fatalf("register: %v", err)
|
|
}
|
|
box, err := r.Build([]string{tool.Name()}, inv, vis, audit)
|
|
if err != nil {
|
|
t.Fatalf("build: %v", err)
|
|
}
|
|
return execBox(box, toolCall{Name: tool.Name(), Arguments: args})
|
|
}
|
|
|
|
// TestNewGatedTool_GateRejection verifies that the wrapper auto-injects
|
|
// CheckGate: if the invocation's SkillName doesn't match the tool's
|
|
// SkillNameGate, fn never runs and the audit row is emitted with the
|
|
// gate error. This is the core contract that v1 hotfix #4 had to
|
|
// retrofit by hand.
|
|
func TestNewGatedTool_GateRejection(t *testing.T) {
|
|
called := false
|
|
tool := NewGatedTool[gatedTestParams](
|
|
"gated_test_tool",
|
|
"A test tool gated to my-skill.",
|
|
Permission{
|
|
AuthoringRequirement: RequirementAnyone,
|
|
OperatesOn: ScopeGlobal,
|
|
SafeForShare: true,
|
|
SkillNameGate: "my-skill",
|
|
},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
|
|
called = true
|
|
return "should not be reached", nil
|
|
},
|
|
)
|
|
|
|
rec := &recordingAudit{}
|
|
out, err := buildAndExecute(t, tool,
|
|
Invocation{SkillName: "other-skill"},
|
|
VisibilityPrivate, rec.hook(),
|
|
`{"question":"hi"}`)
|
|
|
|
if err == nil {
|
|
t.Fatalf("expected gate-rejection error, got out=%q err=nil", out)
|
|
}
|
|
if !strings.Contains(err.Error(), "restricted to") {
|
|
t.Fatalf("expected error containing 'restricted to', got %v", err)
|
|
}
|
|
if called {
|
|
t.Errorf("fn was called despite gate rejection — wrapper failed to inject CheckGate")
|
|
}
|
|
|
|
calls := rec.snapshot()
|
|
if len(calls) != 1 {
|
|
t.Fatalf("expected exactly 1 audit call, got %d: %+v", len(calls), calls)
|
|
}
|
|
if calls[0].Err == nil {
|
|
t.Errorf("audit call.Err was nil; expected the gate error")
|
|
}
|
|
if calls[0].Args != "{}" {
|
|
t.Errorf("audit call.Args=%q, want \"{}\" (no args parsed pre-gate)", calls[0].Args)
|
|
}
|
|
}
|
|
|
|
// TestNewGatedTool_HappyPath verifies the wrapper passes args to fn,
|
|
// returns fn's result, and emits a successful audit row with the
|
|
// re-marshaled args.
|
|
func TestNewGatedTool_HappyPath(t *testing.T) {
|
|
var seen gatedTestParams
|
|
var seenInv Invocation
|
|
|
|
tool := NewGatedTool[gatedTestParams](
|
|
"gated_happy_tool",
|
|
"A test tool with no gate.",
|
|
Permission{
|
|
AuthoringRequirement: RequirementAnyone,
|
|
OperatesOn: ScopeGlobal,
|
|
SafeForShare: true,
|
|
},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
|
|
seen = args
|
|
seenInv = inv
|
|
return "answered: " + args.Question, nil
|
|
},
|
|
)
|
|
|
|
rec := &recordingAudit{}
|
|
out, err := buildAndExecute(t, tool,
|
|
Invocation{SkillName: "any-skill", CallerID: "user-7"},
|
|
VisibilityPrivate, rec.hook(),
|
|
`{"question":"what is the time?","detail":"verbose"}`)
|
|
|
|
if err != nil {
|
|
t.Fatalf("execute: %v", err)
|
|
}
|
|
if out != "answered: what is the time?" {
|
|
t.Errorf("unexpected output: %q", out)
|
|
}
|
|
if seen.Question != "what is the time?" || seen.Detail != "verbose" {
|
|
t.Errorf("fn received %+v, want question/detail populated", seen)
|
|
}
|
|
if seenInv.CallerID != "user-7" {
|
|
t.Errorf("fn saw CallerID=%q, want user-7", seenInv.CallerID)
|
|
}
|
|
|
|
calls := rec.snapshot()
|
|
if len(calls) != 1 {
|
|
t.Fatalf("expected exactly 1 audit call, got %d", len(calls))
|
|
}
|
|
if calls[0].Err != nil {
|
|
t.Errorf("audit call.Err=%v, want nil", calls[0].Err)
|
|
}
|
|
if calls[0].Result != "answered: what is the time?" {
|
|
t.Errorf("audit call.Result=%q, want match output", calls[0].Result)
|
|
}
|
|
// The wrapper re-marshals the args — verify the JSON is well-formed
|
|
// and contains the expected fields.
|
|
var argsBack gatedTestParams
|
|
if err := json.Unmarshal([]byte(calls[0].Args), &argsBack); err != nil {
|
|
t.Fatalf("audit args not valid JSON: %q (%v)", calls[0].Args, err)
|
|
}
|
|
if argsBack.Question != "what is the time?" || argsBack.Detail != "verbose" {
|
|
t.Errorf("audit args round-trip mismatch: %+v", argsBack)
|
|
}
|
|
}
|
|
|
|
// TestNewGatedTool_FnError verifies the wrapper surfaces fn's error
|
|
// AND captures the partial result + error in the audit row.
|
|
func TestNewGatedTool_FnError(t *testing.T) {
|
|
tool := NewGatedTool[gatedTestParams](
|
|
"gated_fn_err_tool",
|
|
"A test tool whose handler always errors.",
|
|
Permission{
|
|
AuthoringRequirement: RequirementAnyone,
|
|
OperatesOn: ScopeGlobal,
|
|
SafeForShare: true,
|
|
},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
|
|
return "partial output", errors.New("boom")
|
|
},
|
|
)
|
|
|
|
rec := &recordingAudit{}
|
|
out, err := buildAndExecute(t, tool,
|
|
Invocation{SkillName: "any-skill"},
|
|
VisibilityPrivate, rec.hook(),
|
|
`{"question":"x"}`)
|
|
|
|
// llm.Define's Execute returns ("", err) when the handler returns a
|
|
// non-nil error — out is dropped on the LLM side. But the wrapper's
|
|
// audit row should still capture both partial result + error.
|
|
if err == nil || !strings.Contains(err.Error(), "boom") {
|
|
t.Fatalf("expected boom error, got out=%q err=%v", out, err)
|
|
}
|
|
|
|
calls := rec.snapshot()
|
|
if len(calls) != 1 {
|
|
t.Fatalf("expected exactly 1 audit call, got %d", len(calls))
|
|
}
|
|
if calls[0].Err == nil || !strings.Contains(calls[0].Err.Error(), "boom") {
|
|
t.Errorf("audit call.Err=%v, want boom", calls[0].Err)
|
|
}
|
|
if calls[0].Result != "partial output" {
|
|
t.Errorf("audit call.Result=%q, want 'partial output' (partial captured)", calls[0].Result)
|
|
}
|
|
}
|
|
|
|
// TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted documents the
|
|
// behaviour at the wrapper boundary: when the LLM sends malformed JSON
|
|
// args, llm.Define's Execute fails BEFORE the wrapper's inner closure
|
|
// runs. The wrapper does NOT emit an audit row in that case — it never
|
|
// got the chance. This is intentional: arg-parse failure is a
|
|
// tool-call wiring problem, not a tool-handler problem; the audit log
|
|
// reflects what the handler did, and on parse failure no handler ran.
|
|
//
|
|
// The test exists so future readers see this invariant documented in
|
|
// code and don't re-introduce a "log everything" path that breaks the
|
|
// wrapper's contract with the audit storage layer.
|
|
func TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted(t *testing.T) {
|
|
tool := NewGatedTool[gatedTestParams](
|
|
"gated_parse_err_tool",
|
|
"A test tool that should never receive bad JSON.",
|
|
Permission{
|
|
AuthoringRequirement: RequirementAnyone,
|
|
OperatesOn: ScopeGlobal,
|
|
SafeForShare: true,
|
|
},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
|
|
t.Fatalf("fn ran despite malformed JSON — should never happen")
|
|
return "", nil
|
|
},
|
|
)
|
|
|
|
rec := &recordingAudit{}
|
|
_, err := buildAndExecute(t, tool,
|
|
Invocation{SkillName: "any-skill"},
|
|
VisibilityPrivate, rec.hook(),
|
|
`{"question":not-quoted}`) // intentionally malformed
|
|
|
|
if err == nil {
|
|
t.Fatalf("expected JSON parse error, got nil")
|
|
}
|
|
if calls := rec.snapshot(); len(calls) != 0 {
|
|
t.Errorf("audit emitted %d calls on parse error; expected 0 (parse-fail is pre-handler)", len(calls))
|
|
}
|
|
}
|
|
|
|
// TestIsGatedTool_DetectsWrapped confirms that NewGatedTool's return
|
|
// value satisfies the gatedToolMarker interface so the meta-test can
|
|
// distinguish wrapped from unwrapped tools.
|
|
func TestIsGatedTool_DetectsWrapped(t *testing.T) {
|
|
tool := NewGatedTool[gatedTestParams](
|
|
"gated_marker_tool", "marker test",
|
|
Permission{AuthoringRequirement: RequirementAnyone},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
|
|
return "", nil
|
|
},
|
|
)
|
|
if !IsGatedTool(tool) {
|
|
t.Fatalf("IsGatedTool returned false for a NewGatedTool result")
|
|
}
|
|
}
|
|
|
|
// TestIsGatedTool_DetectsNonWrapped is the negative half of the
|
|
// detection test: a hand-rolled Tool that does NOT go through
|
|
// NewGatedTool must fail IsGatedTool. This guards the meta-test
|
|
// against trivially passing for everything.
|
|
func TestIsGatedTool_DetectsNonWrapped(t *testing.T) {
|
|
stub := manualToolStub{}
|
|
if IsGatedTool(stub) {
|
|
t.Fatalf("IsGatedTool returned true for a non-wrapped Tool — detection broken")
|
|
}
|
|
}
|
|
|
|
// manualToolStub satisfies skilltools.Tool by hand without going
|
|
// through NewGatedTool. Used only to prove IsGatedTool rejects
|
|
// non-wrapped implementations.
|
|
type manualToolStub struct{}
|
|
|
|
func (manualToolStub) Name() string { return "manual_stub" }
|
|
func (manualToolStub) Description() string { return "manual stub" }
|
|
func (manualToolStub) Permission() Permission { return Permission{} }
|
|
func (manualToolStub) BuildLLM(Invocation) llm.Tool {
|
|
type p struct{}
|
|
return llm.DefineTool("manual_stub", "manual stub",
|
|
func(ctx context.Context, _ p) (any, error) { return "", nil })
|
|
}
|
|
|
|
// TestNewGatedToolWithAudit_RedactsAuditResult covers the variant used
|
|
// by paste_create: the LLM receives a sensitive string (e.g. URL with
|
|
// fragment-encoded key) but the audit row records only a redacted
|
|
// summary. Confirms LLMResult ↔ AuditResult separation works.
|
|
func TestNewGatedToolWithAudit_RedactsAuditResult(t *testing.T) {
|
|
tool := NewGatedToolWithAudit[gatedTestParams](
|
|
"audited_tool",
|
|
"A tool whose audit result is redacted from its LLM result.",
|
|
Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
|
|
return AuditedResult{
|
|
LLMResult: "secret-fragment-12345",
|
|
AuditArgs: "redacted",
|
|
AuditResult: "[redacted]",
|
|
}, nil
|
|
},
|
|
)
|
|
if !IsGatedTool(tool) {
|
|
t.Fatalf("audited variant must satisfy IsGatedTool")
|
|
}
|
|
|
|
rec := &recordingAudit{}
|
|
out, err := buildAndExecute(t, tool,
|
|
Invocation{SkillName: "any"},
|
|
VisibilityPrivate, rec.hook(),
|
|
`{"question":"x"}`)
|
|
if err != nil {
|
|
t.Fatalf("execute: %v", err)
|
|
}
|
|
if out != "secret-fragment-12345" {
|
|
t.Errorf("LLM saw %q, want secret-fragment-12345", out)
|
|
}
|
|
calls := rec.snapshot()
|
|
if len(calls) != 1 {
|
|
t.Fatalf("expected 1 audit call, got %d", len(calls))
|
|
}
|
|
if calls[0].Args != "redacted" {
|
|
t.Errorf("audit args=%q, want redacted", calls[0].Args)
|
|
}
|
|
if calls[0].Result != "[redacted]" {
|
|
t.Errorf("audit result=%q, want [redacted]", calls[0].Result)
|
|
}
|
|
if strings.Contains(calls[0].Result, "secret-fragment-12345") {
|
|
t.Fatalf("audit leaked LLM result into Result field: %q", calls[0].Result)
|
|
}
|
|
}
|
|
|
|
// TestNewGatedToolWithAudit_GateRejection mirrors the gate-rejection
|
|
// test for the default wrapper to anchor the same contract for the
|
|
// audited variant.
|
|
func TestNewGatedToolWithAudit_GateRejection(t *testing.T) {
|
|
tool := NewGatedToolWithAudit[gatedTestParams](
|
|
"audited_gated_tool", "gated tool",
|
|
Permission{
|
|
AuthoringRequirement: RequirementAnyone,
|
|
SkillNameGate: "my-skill",
|
|
},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
|
|
t.Fatalf("fn should not run on gate rejection")
|
|
return AuditedResult{}, nil
|
|
},
|
|
)
|
|
rec := &recordingAudit{}
|
|
_, err := buildAndExecute(t, tool,
|
|
Invocation{SkillName: "other"},
|
|
VisibilityPrivate, rec.hook(),
|
|
`{}`)
|
|
if err == nil || !strings.Contains(err.Error(), "restricted to") {
|
|
t.Fatalf("expected gate rejection, got %v", err)
|
|
}
|
|
calls := rec.snapshot()
|
|
if len(calls) != 1 || calls[0].Err == nil {
|
|
t.Fatalf("expected gate-rejection audit row, got %+v", calls)
|
|
}
|
|
}
|
|
|
|
// TestNewGatedToolWithAudit_FallbackArgs verifies that an empty
|
|
// AuditArgs falls back to the JSON-marshaled typed args (matching the
|
|
// default wrapper's behaviour).
|
|
func TestNewGatedToolWithAudit_FallbackArgs(t *testing.T) {
|
|
tool := NewGatedToolWithAudit[gatedTestParams](
|
|
"audited_fallback_tool", "fallback args test",
|
|
Permission{AuthoringRequirement: RequirementAnyone},
|
|
func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
|
|
return AuditedResult{
|
|
LLMResult: "ok",
|
|
AuditResult: "ok",
|
|
// AuditArgs intentionally empty
|
|
}, nil
|
|
},
|
|
)
|
|
rec := &recordingAudit{}
|
|
_, err := buildAndExecute(t, tool,
|
|
Invocation{SkillName: "x"},
|
|
VisibilityPrivate, rec.hook(),
|
|
`{"question":"hi"}`)
|
|
if err != nil {
|
|
t.Fatalf("execute: %v", err)
|
|
}
|
|
calls := rec.snapshot()
|
|
if len(calls) != 1 {
|
|
t.Fatalf("expected 1 audit call, got %d", len(calls))
|
|
}
|
|
if !strings.Contains(calls[0].Args, "hi") {
|
|
t.Errorf("expected fallback to JSON args containing 'hi', got %q", calls[0].Args)
|
|
}
|
|
}
|