executus/tool/gated_tool_test.go

package tool

import (
	"context"
	"encoding/json"
	"errors"
	"strings"
	"sync"
	"testing"

	llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

// gatedTestParams is a typed param struct used by the gated_tool tests.
// Mirrors a real production tool: a couple of strings the LLM supplies.
type gatedTestParams struct {
	Question string `json:"question" description:"The question to answer."`
	Detail   string `json:"detail,omitempty" description:"Optional detail level."`
}

// recordingAudit captures every AuditCall the wrapper emits so tests
// can assert exactly what the wrapper logged. Concurrent-safe in case a
// future test parallelises across goroutines.
type recordingAudit struct {
	mu    sync.Mutex
	calls []AuditCall
}

func (r *recordingAudit) hook() AuditHook {
	return func(call AuditCall) {
		r.mu.Lock()
		defer r.mu.Unlock()
		r.calls = append(r.calls, call)
	}
}

func (r *recordingAudit) snapshot() []AuditCall {
	r.mu.Lock()
	defer r.mu.Unlock()
	out := make([]AuditCall, len(r.calls))
	copy(out, r.calls)
	return out
}

// buildAndExecute is the test-only convenience for going from a
// constructed Tool to an llm.Tool result. Mirrors how the production
// registry's Build call wires inv.gate / inv.audit.
func buildAndExecute(t *testing.T, tool Tool, inv Invocation, vis Visibility, audit AuditHook, args string) (string, error) {
	t.Helper()
	r := NewRegistry()
	if err := r.Register(tool); err != nil {
		t.Fatalf("register: %v", err)
	}
	box, err := r.Build([]string{tool.Name()}, inv, vis, audit)
	if err != nil {
		t.Fatalf("build: %v", err)
	}
	return execBox(box, toolCall{Name: tool.Name(), Arguments: args})
}

// TestNewGatedTool_GateRejection verifies that the wrapper auto-injects
// CheckGate: if the invocation's SkillName doesn't match the tool's
// SkillNameGate, fn never runs and the audit row is emitted with the
// gate error. This is the core contract that v1 hotfix #4 had to
// retrofit by hand.
func TestNewGatedTool_GateRejection(t *testing.T) {
	called := false
	tool := NewGatedTool[gatedTestParams](
		"gated_test_tool",
		"A test tool gated to my-skill.",
		Permission{
			AuthoringRequirement: RequirementAnyone,
			OperatesOn:           ScopeGlobal,
			SafeForShare:         true,
			SkillNameGate:        "my-skill",
		},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
			called = true
			return "should not be reached", nil
		},
	)

	rec := &recordingAudit{}
	out, err := buildAndExecute(t, tool,
		Invocation{SkillName: "other-skill"},
		VisibilityPrivate, rec.hook(),
		`{"question":"hi"}`)

	if err == nil {
		t.Fatalf("expected gate-rejection error, got out=%q err=nil", out)
	}
	if !strings.Contains(err.Error(), "restricted to") {
		t.Fatalf("expected error containing 'restricted to', got %v", err)
	}
	if called {
		t.Errorf("fn was called despite gate rejection — wrapper failed to inject CheckGate")
	}

	calls := rec.snapshot()
	if len(calls) != 1 {
		t.Fatalf("expected exactly 1 audit call, got %d: %+v", len(calls), calls)
	}
	if calls[0].Err == nil {
		t.Errorf("audit call.Err was nil; expected the gate error")
	}
	if calls[0].Args != "{}" {
		t.Errorf("audit call.Args=%q, want \"{}\" (no args parsed pre-gate)", calls[0].Args)
	}
}

// TestNewGatedTool_HappyPath verifies the wrapper passes args to fn,
// returns fn's result, and emits a successful audit row with the
// re-marshaled args.
func TestNewGatedTool_HappyPath(t *testing.T) {
	var seen gatedTestParams
	var seenInv Invocation

	tool := NewGatedTool[gatedTestParams](
		"gated_happy_tool",
		"A test tool with no gate.",
		Permission{
			AuthoringRequirement: RequirementAnyone,
			OperatesOn:           ScopeGlobal,
			SafeForShare:         true,
		},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
			seen = args
			seenInv = inv
			return "answered: " + args.Question, nil
		},
	)

	rec := &recordingAudit{}
	out, err := buildAndExecute(t, tool,
		Invocation{SkillName: "any-skill", CallerID: "user-7"},
		VisibilityPrivate, rec.hook(),
		`{"question":"what is the time?","detail":"verbose"}`)

	if err != nil {
		t.Fatalf("execute: %v", err)
	}
	if out != "answered: what is the time?" {
		t.Errorf("unexpected output: %q", out)
	}
	if seen.Question != "what is the time?" || seen.Detail != "verbose" {
		t.Errorf("fn received %+v, want question/detail populated", seen)
	}
	if seenInv.CallerID != "user-7" {
		t.Errorf("fn saw CallerID=%q, want user-7", seenInv.CallerID)
	}

	calls := rec.snapshot()
	if len(calls) != 1 {
		t.Fatalf("expected exactly 1 audit call, got %d", len(calls))
	}
	if calls[0].Err != nil {
		t.Errorf("audit call.Err=%v, want nil", calls[0].Err)
	}
	if calls[0].Result != "answered: what is the time?" {
		t.Errorf("audit call.Result=%q, want match output", calls[0].Result)
	}
	// The wrapper re-marshals the args — verify the JSON is well-formed
	// and contains the expected fields.
	var argsBack gatedTestParams
	if err := json.Unmarshal([]byte(calls[0].Args), &argsBack); err != nil {
		t.Fatalf("audit args not valid JSON: %q (%v)", calls[0].Args, err)
	}
	if argsBack.Question != "what is the time?" || argsBack.Detail != "verbose" {
		t.Errorf("audit args round-trip mismatch: %+v", argsBack)
	}
}

// TestNewGatedTool_FnError verifies the wrapper surfaces fn's error
// AND captures the partial result + error in the audit row.
func TestNewGatedTool_FnError(t *testing.T) {
	tool := NewGatedTool[gatedTestParams](
		"gated_fn_err_tool",
		"A test tool whose handler always errors.",
		Permission{
			AuthoringRequirement: RequirementAnyone,
			OperatesOn:           ScopeGlobal,
			SafeForShare:         true,
		},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
			return "partial output", errors.New("boom")
		},
	)

	rec := &recordingAudit{}
	out, err := buildAndExecute(t, tool,
		Invocation{SkillName: "any-skill"},
		VisibilityPrivate, rec.hook(),
		`{"question":"x"}`)

	// llm.Define's Execute returns ("", err) when the handler returns a
	// non-nil error — out is dropped on the LLM side. But the wrapper's
	// audit row should still capture both partial result + error.
	if err == nil || !strings.Contains(err.Error(), "boom") {
		t.Fatalf("expected boom error, got out=%q err=%v", out, err)
	}

	calls := rec.snapshot()
	if len(calls) != 1 {
		t.Fatalf("expected exactly 1 audit call, got %d", len(calls))
	}
	if calls[0].Err == nil || !strings.Contains(calls[0].Err.Error(), "boom") {
		t.Errorf("audit call.Err=%v, want boom", calls[0].Err)
	}
	if calls[0].Result != "partial output" {
		t.Errorf("audit call.Result=%q, want 'partial output' (partial captured)", calls[0].Result)
	}
}

// TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted documents the
// behaviour at the wrapper boundary: when the LLM sends malformed JSON
// args, llm.Define's Execute fails BEFORE the wrapper's inner closure
// runs. The wrapper does NOT emit an audit row in that case — it never
// got the chance. This is intentional: arg-parse failure is a
// tool-call wiring problem, not a tool-handler problem; the audit log
// reflects what the handler did, and on parse failure no handler ran.
//
// The test exists so future readers see this invariant documented in
// code and don't re-introduce a "log everything" path that breaks the
// wrapper's contract with the audit storage layer.
func TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted(t *testing.T) {
	tool := NewGatedTool[gatedTestParams](
		"gated_parse_err_tool",
		"A test tool that should never receive bad JSON.",
		Permission{
			AuthoringRequirement: RequirementAnyone,
			OperatesOn:           ScopeGlobal,
			SafeForShare:         true,
		},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
			t.Fatalf("fn ran despite malformed JSON — should never happen")
			return "", nil
		},
	)

	rec := &recordingAudit{}
	_, err := buildAndExecute(t, tool,
		Invocation{SkillName: "any-skill"},
		VisibilityPrivate, rec.hook(),
		`{"question":not-quoted}`) // intentionally malformed

	if err == nil {
		t.Fatalf("expected JSON parse error, got nil")
	}
	if calls := rec.snapshot(); len(calls) != 0 {
		t.Errorf("audit emitted %d calls on parse error; expected 0 (parse-fail is pre-handler)", len(calls))
	}
}

// TestIsGatedTool_DetectsWrapped confirms that NewGatedTool's return
// value satisfies the gatedToolMarker interface so the meta-test can
// distinguish wrapped from unwrapped tools.
func TestIsGatedTool_DetectsWrapped(t *testing.T) {
	tool := NewGatedTool[gatedTestParams](
		"gated_marker_tool", "marker test",
		Permission{AuthoringRequirement: RequirementAnyone},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
			return "", nil
		},
	)
	if !IsGatedTool(tool) {
		t.Fatalf("IsGatedTool returned false for a NewGatedTool result")
	}
}

// TestIsGatedTool_DetectsNonWrapped is the negative half of the
// detection test: a hand-rolled Tool that does NOT go through
// NewGatedTool must fail IsGatedTool. This guards the meta-test
// against trivially passing for everything.
func TestIsGatedTool_DetectsNonWrapped(t *testing.T) {
	stub := manualToolStub{}
	if IsGatedTool(stub) {
		t.Fatalf("IsGatedTool returned true for a non-wrapped Tool — detection broken")
	}
}

// manualToolStub satisfies skilltools.Tool by hand without going
// through NewGatedTool. Used only to prove IsGatedTool rejects
// non-wrapped implementations.
type manualToolStub struct{}

func (manualToolStub) Name() string           { return "manual_stub" }
func (manualToolStub) Description() string    { return "manual stub" }
func (manualToolStub) Permission() Permission { return Permission{} }
func (manualToolStub) BuildLLM(Invocation) llm.Tool {
	type p struct{}
	return llm.DefineTool("manual_stub", "manual stub",
		func(ctx context.Context, _ p) (any, error) { return "", nil })
}

// TestNewGatedToolWithAudit_RedactsAuditResult covers the variant used
// by paste_create: the LLM receives a sensitive string (e.g. URL with
// fragment-encoded key) but the audit row records only a redacted
// summary. Confirms LLMResult ↔ AuditResult separation works.
func TestNewGatedToolWithAudit_RedactsAuditResult(t *testing.T) {
	tool := NewGatedToolWithAudit[gatedTestParams](
		"audited_tool",
		"A tool whose audit result is redacted from its LLM result.",
		Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
			return AuditedResult{
				LLMResult:   "secret-fragment-12345",
				AuditArgs:   "redacted",
				AuditResult: "[redacted]",
			}, nil
		},
	)
	if !IsGatedTool(tool) {
		t.Fatalf("audited variant must satisfy IsGatedTool")
	}

	rec := &recordingAudit{}
	out, err := buildAndExecute(t, tool,
		Invocation{SkillName: "any"},
		VisibilityPrivate, rec.hook(),
		`{"question":"x"}`)
	if err != nil {
		t.Fatalf("execute: %v", err)
	}
	if out != "secret-fragment-12345" {
		t.Errorf("LLM saw %q, want secret-fragment-12345", out)
	}
	calls := rec.snapshot()
	if len(calls) != 1 {
		t.Fatalf("expected 1 audit call, got %d", len(calls))
	}
	if calls[0].Args != "redacted" {
		t.Errorf("audit args=%q, want redacted", calls[0].Args)
	}
	if calls[0].Result != "[redacted]" {
		t.Errorf("audit result=%q, want [redacted]", calls[0].Result)
	}
	if strings.Contains(calls[0].Result, "secret-fragment-12345") {
		t.Fatalf("audit leaked LLM result into Result field: %q", calls[0].Result)
	}
}

// TestNewGatedToolWithAudit_GateRejection mirrors the gate-rejection
// test for the default wrapper to anchor the same contract for the
// audited variant.
func TestNewGatedToolWithAudit_GateRejection(t *testing.T) {
	tool := NewGatedToolWithAudit[gatedTestParams](
		"audited_gated_tool", "gated tool",
		Permission{
			AuthoringRequirement: RequirementAnyone,
			SkillNameGate:        "my-skill",
		},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
			t.Fatalf("fn should not run on gate rejection")
			return AuditedResult{}, nil
		},
	)
	rec := &recordingAudit{}
	_, err := buildAndExecute(t, tool,
		Invocation{SkillName: "other"},
		VisibilityPrivate, rec.hook(),
		`{}`)
	if err == nil || !strings.Contains(err.Error(), "restricted to") {
		t.Fatalf("expected gate rejection, got %v", err)
	}
	calls := rec.snapshot()
	if len(calls) != 1 || calls[0].Err == nil {
		t.Fatalf("expected gate-rejection audit row, got %+v", calls)
	}
}

// TestNewGatedToolWithAudit_FallbackArgs verifies that an empty
// AuditArgs falls back to the JSON-marshaled typed args (matching the
// default wrapper's behaviour).
func TestNewGatedToolWithAudit_FallbackArgs(t *testing.T) {
	tool := NewGatedToolWithAudit[gatedTestParams](
		"audited_fallback_tool", "fallback args test",
		Permission{AuthoringRequirement: RequirementAnyone},
		func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
			return AuditedResult{
				LLMResult:   "ok",
				AuditResult: "ok",
				// AuditArgs intentionally empty
			}, nil
		},
	)
	rec := &recordingAudit{}
	_, err := buildAndExecute(t, tool,
		Invocation{SkillName: "x"},
		VisibilityPrivate, rec.hook(),
		`{"question":"hi"}`)
	if err != nil {
		t.Fatalf("execute: %v", err)
	}
	calls := rec.snapshot()
	if len(calls) != 1 {
		t.Fatalf("expected 1 audit call, got %d", len(calls))
	}
	if !strings.Contains(calls[0].Args, "hi") {
		t.Errorf("expected fallback to JSON args containing 'hi', got %q", calls[0].Args)
	}
}