package tool import ( "context" "encoding/json" "errors" "strings" "sync" "testing" llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // gatedTestParams is a typed param struct used by the gated_tool tests. // Mirrors a real production tool: a couple of strings the LLM supplies. type gatedTestParams struct { Question string `json:"question" description:"The question to answer."` Detail string `json:"detail,omitempty" description:"Optional detail level."` } // recordingAudit captures every AuditCall the wrapper emits so tests // can assert exactly what the wrapper logged. Concurrent-safe in case a // future test parallelises across goroutines. type recordingAudit struct { mu sync.Mutex calls []AuditCall } func (r *recordingAudit) hook() AuditHook { return func(call AuditCall) { r.mu.Lock() defer r.mu.Unlock() r.calls = append(r.calls, call) } } func (r *recordingAudit) snapshot() []AuditCall { r.mu.Lock() defer r.mu.Unlock() out := make([]AuditCall, len(r.calls)) copy(out, r.calls) return out } // buildAndExecute is the test-only convenience for going from a // constructed Tool to an llm.Tool result. Mirrors how the production // registry's Build call wires inv.gate / inv.audit. func buildAndExecute(t *testing.T, tool Tool, inv Invocation, vis Visibility, audit AuditHook, args string) (string, error) { t.Helper() r := NewRegistry() if err := r.Register(tool); err != nil { t.Fatalf("register: %v", err) } box, err := r.Build([]string{tool.Name()}, inv, vis, audit) if err != nil { t.Fatalf("build: %v", err) } return execBox(box, toolCall{Name: tool.Name(), Arguments: args}) } // TestNewGatedTool_GateRejection verifies that the wrapper auto-injects // CheckGate: if the invocation's SkillName doesn't match the tool's // SkillNameGate, fn never runs and the audit row is emitted with the // gate error. This is the core contract that v1 hotfix #4 had to // retrofit by hand. func TestNewGatedTool_GateRejection(t *testing.T) { called := false tool := NewGatedTool[gatedTestParams]( "gated_test_tool", "A test tool gated to my-skill.", Permission{ AuthoringRequirement: RequirementAnyone, OperatesOn: ScopeGlobal, SafeForShare: true, SkillNameGate: "my-skill", }, func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) { called = true return "should not be reached", nil }, ) rec := &recordingAudit{} out, err := buildAndExecute(t, tool, Invocation{SkillName: "other-skill"}, VisibilityPrivate, rec.hook(), `{"question":"hi"}`) if err == nil { t.Fatalf("expected gate-rejection error, got out=%q err=nil", out) } if !strings.Contains(err.Error(), "restricted to") { t.Fatalf("expected error containing 'restricted to', got %v", err) } if called { t.Errorf("fn was called despite gate rejection — wrapper failed to inject CheckGate") } calls := rec.snapshot() if len(calls) != 1 { t.Fatalf("expected exactly 1 audit call, got %d: %+v", len(calls), calls) } if calls[0].Err == nil { t.Errorf("audit call.Err was nil; expected the gate error") } if calls[0].Args != "{}" { t.Errorf("audit call.Args=%q, want \"{}\" (no args parsed pre-gate)", calls[0].Args) } } // TestNewGatedTool_HappyPath verifies the wrapper passes args to fn, // returns fn's result, and emits a successful audit row with the // re-marshaled args. func TestNewGatedTool_HappyPath(t *testing.T) { var seen gatedTestParams var seenInv Invocation tool := NewGatedTool[gatedTestParams]( "gated_happy_tool", "A test tool with no gate.", Permission{ AuthoringRequirement: RequirementAnyone, OperatesOn: ScopeGlobal, SafeForShare: true, }, func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) { seen = args seenInv = inv return "answered: " + args.Question, nil }, ) rec := &recordingAudit{} out, err := buildAndExecute(t, tool, Invocation{SkillName: "any-skill", CallerID: "user-7"}, VisibilityPrivate, rec.hook(), `{"question":"what is the time?","detail":"verbose"}`) if err != nil { t.Fatalf("execute: %v", err) } if out != "answered: what is the time?" { t.Errorf("unexpected output: %q", out) } if seen.Question != "what is the time?" || seen.Detail != "verbose" { t.Errorf("fn received %+v, want question/detail populated", seen) } if seenInv.CallerID != "user-7" { t.Errorf("fn saw CallerID=%q, want user-7", seenInv.CallerID) } calls := rec.snapshot() if len(calls) != 1 { t.Fatalf("expected exactly 1 audit call, got %d", len(calls)) } if calls[0].Err != nil { t.Errorf("audit call.Err=%v, want nil", calls[0].Err) } if calls[0].Result != "answered: what is the time?" { t.Errorf("audit call.Result=%q, want match output", calls[0].Result) } // The wrapper re-marshals the args — verify the JSON is well-formed // and contains the expected fields. var argsBack gatedTestParams if err := json.Unmarshal([]byte(calls[0].Args), &argsBack); err != nil { t.Fatalf("audit args not valid JSON: %q (%v)", calls[0].Args, err) } if argsBack.Question != "what is the time?" || argsBack.Detail != "verbose" { t.Errorf("audit args round-trip mismatch: %+v", argsBack) } } // TestNewGatedTool_FnError verifies the wrapper surfaces fn's error // AND captures the partial result + error in the audit row. func TestNewGatedTool_FnError(t *testing.T) { tool := NewGatedTool[gatedTestParams]( "gated_fn_err_tool", "A test tool whose handler always errors.", Permission{ AuthoringRequirement: RequirementAnyone, OperatesOn: ScopeGlobal, SafeForShare: true, }, func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) { return "partial output", errors.New("boom") }, ) rec := &recordingAudit{} out, err := buildAndExecute(t, tool, Invocation{SkillName: "any-skill"}, VisibilityPrivate, rec.hook(), `{"question":"x"}`) // llm.Define's Execute returns ("", err) when the handler returns a // non-nil error — out is dropped on the LLM side. But the wrapper's // audit row should still capture both partial result + error. if err == nil || !strings.Contains(err.Error(), "boom") { t.Fatalf("expected boom error, got out=%q err=%v", out, err) } calls := rec.snapshot() if len(calls) != 1 { t.Fatalf("expected exactly 1 audit call, got %d", len(calls)) } if calls[0].Err == nil || !strings.Contains(calls[0].Err.Error(), "boom") { t.Errorf("audit call.Err=%v, want boom", calls[0].Err) } if calls[0].Result != "partial output" { t.Errorf("audit call.Result=%q, want 'partial output' (partial captured)", calls[0].Result) } } // TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted documents the // behaviour at the wrapper boundary: when the LLM sends malformed JSON // args, llm.Define's Execute fails BEFORE the wrapper's inner closure // runs. The wrapper does NOT emit an audit row in that case — it never // got the chance. This is intentional: arg-parse failure is a // tool-call wiring problem, not a tool-handler problem; the audit log // reflects what the handler did, and on parse failure no handler ran. // // The test exists so future readers see this invariant documented in // code and don't re-introduce a "log everything" path that breaks the // wrapper's contract with the audit storage layer. func TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted(t *testing.T) { tool := NewGatedTool[gatedTestParams]( "gated_parse_err_tool", "A test tool that should never receive bad JSON.", Permission{ AuthoringRequirement: RequirementAnyone, OperatesOn: ScopeGlobal, SafeForShare: true, }, func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) { t.Fatalf("fn ran despite malformed JSON — should never happen") return "", nil }, ) rec := &recordingAudit{} _, err := buildAndExecute(t, tool, Invocation{SkillName: "any-skill"}, VisibilityPrivate, rec.hook(), `{"question":not-quoted}`) // intentionally malformed if err == nil { t.Fatalf("expected JSON parse error, got nil") } if calls := rec.snapshot(); len(calls) != 0 { t.Errorf("audit emitted %d calls on parse error; expected 0 (parse-fail is pre-handler)", len(calls)) } } // TestIsGatedTool_DetectsWrapped confirms that NewGatedTool's return // value satisfies the gatedToolMarker interface so the meta-test can // distinguish wrapped from unwrapped tools. func TestIsGatedTool_DetectsWrapped(t *testing.T) { tool := NewGatedTool[gatedTestParams]( "gated_marker_tool", "marker test", Permission{AuthoringRequirement: RequirementAnyone}, func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) { return "", nil }, ) if !IsGatedTool(tool) { t.Fatalf("IsGatedTool returned false for a NewGatedTool result") } } // TestIsGatedTool_DetectsNonWrapped is the negative half of the // detection test: a hand-rolled Tool that does NOT go through // NewGatedTool must fail IsGatedTool. This guards the meta-test // against trivially passing for everything. func TestIsGatedTool_DetectsNonWrapped(t *testing.T) { stub := manualToolStub{} if IsGatedTool(stub) { t.Fatalf("IsGatedTool returned true for a non-wrapped Tool — detection broken") } } // manualToolStub satisfies skilltools.Tool by hand without going // through NewGatedTool. Used only to prove IsGatedTool rejects // non-wrapped implementations. type manualToolStub struct{} func (manualToolStub) Name() string { return "manual_stub" } func (manualToolStub) Description() string { return "manual stub" } func (manualToolStub) Permission() Permission { return Permission{} } func (manualToolStub) BuildLLM(Invocation) llm.Tool { type p struct{} return llm.DefineTool("manual_stub", "manual stub", func(ctx context.Context, _ p) (any, error) { return "", nil }) } // TestNewGatedToolWithAudit_RedactsAuditResult covers the variant used // by paste_create: the LLM receives a sensitive string (e.g. URL with // fragment-encoded key) but the audit row records only a redacted // summary. Confirms LLMResult ↔ AuditResult separation works. func TestNewGatedToolWithAudit_RedactsAuditResult(t *testing.T) { tool := NewGatedToolWithAudit[gatedTestParams]( "audited_tool", "A tool whose audit result is redacted from its LLM result.", Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true}, func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) { return AuditedResult{ LLMResult: "secret-fragment-12345", AuditArgs: "redacted", AuditResult: "[redacted]", }, nil }, ) if !IsGatedTool(tool) { t.Fatalf("audited variant must satisfy IsGatedTool") } rec := &recordingAudit{} out, err := buildAndExecute(t, tool, Invocation{SkillName: "any"}, VisibilityPrivate, rec.hook(), `{"question":"x"}`) if err != nil { t.Fatalf("execute: %v", err) } if out != "secret-fragment-12345" { t.Errorf("LLM saw %q, want secret-fragment-12345", out) } calls := rec.snapshot() if len(calls) != 1 { t.Fatalf("expected 1 audit call, got %d", len(calls)) } if calls[0].Args != "redacted" { t.Errorf("audit args=%q, want redacted", calls[0].Args) } if calls[0].Result != "[redacted]" { t.Errorf("audit result=%q, want [redacted]", calls[0].Result) } if strings.Contains(calls[0].Result, "secret-fragment-12345") { t.Fatalf("audit leaked LLM result into Result field: %q", calls[0].Result) } } // TestNewGatedToolWithAudit_GateRejection mirrors the gate-rejection // test for the default wrapper to anchor the same contract for the // audited variant. func TestNewGatedToolWithAudit_GateRejection(t *testing.T) { tool := NewGatedToolWithAudit[gatedTestParams]( "audited_gated_tool", "gated tool", Permission{ AuthoringRequirement: RequirementAnyone, SkillNameGate: "my-skill", }, func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) { t.Fatalf("fn should not run on gate rejection") return AuditedResult{}, nil }, ) rec := &recordingAudit{} _, err := buildAndExecute(t, tool, Invocation{SkillName: "other"}, VisibilityPrivate, rec.hook(), `{}`) if err == nil || !strings.Contains(err.Error(), "restricted to") { t.Fatalf("expected gate rejection, got %v", err) } calls := rec.snapshot() if len(calls) != 1 || calls[0].Err == nil { t.Fatalf("expected gate-rejection audit row, got %+v", calls) } } // TestNewGatedToolWithAudit_FallbackArgs verifies that an empty // AuditArgs falls back to the JSON-marshaled typed args (matching the // default wrapper's behaviour). func TestNewGatedToolWithAudit_FallbackArgs(t *testing.T) { tool := NewGatedToolWithAudit[gatedTestParams]( "audited_fallback_tool", "fallback args test", Permission{AuthoringRequirement: RequirementAnyone}, func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) { return AuditedResult{ LLMResult: "ok", AuditResult: "ok", // AuditArgs intentionally empty }, nil }, ) rec := &recordingAudit{} _, err := buildAndExecute(t, tool, Invocation{SkillName: "x"}, VisibilityPrivate, rec.hook(), `{"question":"hi"}`) if err != nil { t.Fatalf("execute: %v", err) } calls := rec.snapshot() if len(calls) != 1 { t.Fatalf("expected 1 audit call, got %d", len(calls)) } if !strings.Contains(calls[0].Args, "hi") { t.Errorf("expected fallback to JSON args containing 'hi', got %q", calls[0].Args) } }