package tool import ( "context" "encoding/json" "fmt" llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // gatedToolMarker is the unexported interface implemented by every Tool // constructed via NewGatedTool. The IsGatedTool helper performs a type // assertion against this marker so the meta-test in default_test.go // (and the wizardtools meta-test) can enforce that every registered // production tool uses the wrapper. // // Why an unexported method (vs a public marker): the goal is to make it // IMPOSSIBLE for an external caller to lie about being gated. Only the // implementation in this file can satisfy the interface, so the // type-assertion in IsGatedTool is a real proof of provenance, not a // pinky-swear from a struct that opts in. type gatedToolMarker interface { isGatedTool() } // gatedTool is the concrete Tool returned by NewGatedTool. It carries // the per-tool metadata (Name/Description/Permission) and the typed // handler closure; BuildLLM wraps the handler with CheckGate + // EmitAudit so tool authors literally cannot forget either call. // // Why generic on Args (vs accepting any-shaped JSON): each tool's // handler is typed against its own param struct. defineTypedTool // derives a JSON schema for the LLM from Args (llm.SchemaFor) and // parses the args before invoking the handler. We re-marshal args to // JSON once for the audit row so the captured shape matches exactly // what the handler ran with (post-coercion). type gatedTool[Args any] struct { name string description string permission Permission fn func(ctx context.Context, inv Invocation, args Args) (string, error) } // isGatedTool implements gatedToolMarker for the meta-test. func (g *gatedTool[Args]) isGatedTool() {} // defineTypedTool builds the majordomo llm.Tool for a typed handler: // schema derived from Args, arguments decoded leniently (string→number/ // boolean coercion preserved from the legacy gollm era — see argcoerce.go) // before the handler runs. An unparseable arguments object returns the // decode error WITHOUT running fn, framing arg-parse-error as a // tool-call wiring failure rather than a tool-handler failure. // // Why not majordomo's llm.DefineTool: its decode is strict by design; // mort's tool catalog keeps the lenient dialect for parity with years // of model traffic that emits "3" where the schema says integer. func defineTypedTool[Args any](name, description string, fn func(ctx context.Context, args Args) (string, error)) llm.Tool { schema, err := llm.SchemaFor[Args]() if err != nil { panic(fmt.Sprintf("skilltools: defineTypedTool(%q): %v", name, err)) } return llm.Tool{ Name: name, Description: description, Parameters: schema, Handler: func(ctx context.Context, raw json.RawMessage) (any, error) { var args Args if err := unmarshalArgsLenient(raw, &args); err != nil { return nil, fmt.Errorf("invalid arguments for %s: %w", name, err) } return fn(ctx, args) }, } } // NewGatedTool wraps a typed handler so it automatically: // 1. Calls CheckGate(inv) before the handler runs. On gate rejection // emits EmitAudit(inv, "{}", "", err) and returns the gate error. // 2. Calls fn(ctx, inv, args) once gate passes. // 3. Re-marshals args to JSON for the audit row (so the captured args // reflect any coercion performed during deserialisation), then // emits EmitAudit(inv, argsJSON, result, err) once the handler // returns. // // Production tools SHOULD use NewGatedTool unless they have a strong // reason to handle gating manually. The wrapper exists because the // previous per-tool pattern repeated four lines of boilerplate // (CheckGate at the top, EmitAudit on every return path), and that // boilerplate is easy to forget — wizard tools in v1 hotfix #4 had to // be retrofitted because the author overlooked CheckGate. Centralising // the calls makes them impossible to skip and the meta-test in // tools/default_test.go enforces the discipline. // // The typed define layer handles JSON parsing and arg coercion before // fn runs; if the args JSON is unparseable, the decode error is // returned directly (the wrapper's audit emission does NOT fire on // parse error — arg-parse-error is a tool-call wiring failure rather // than a tool-handler failure). // // Test: pkg/skilltools/gated_tool_test.go covers gate rejection, // happy path, fn-returned error, and the IsGatedTool assertion. The // meta-test in pkg/skilltools/tools/default_test.go walks the registry // and asserts every production tool implements gatedToolMarker. func NewGatedTool[Args any]( name, description string, permission Permission, fn func(ctx context.Context, inv Invocation, args Args) (string, error), ) Tool { return &gatedTool[Args]{ name: name, description: description, permission: permission, fn: fn, } } // Name returns the tool's registry key. func (g *gatedTool[Args]) Name() string { return g.name } // Description is shown to the LLM. func (g *gatedTool[Args]) Description() string { return g.description } // Permission classifies the tool for save-time / share-time gating. func (g *gatedTool[Args]) Permission() Permission { return g.permission } // BuildLLM produces the per-invocation llm.Tool. The returned tool's // handler: // - Runs CheckGate(inv) FIRST (before any handler logic). On gate // rejection emits the audit row and returns the gate error. // - Calls the user-supplied fn with the typed args. fn never sees a // gate-rejected invocation. // - Re-marshals args to JSON and emits the audit row exactly once, // regardless of fn's return value (success or error). // // Why re-marshal vs using the raw LLM JSON: the lenient decode performs // numeric/boolean coercion (e.g. "3" → 3) before invoking the handler; // the audit row should reflect what fn actually received, not the // pre-coercion text the LLM emitted. func (g *gatedTool[Args]) BuildLLM(inv Invocation) llm.Tool { return defineTypedTool[Args]( g.name, g.description, func(ctx context.Context, args Args) (string, error) { if err := CheckGate(inv); err != nil { EmitAudit(inv, "{}", "", err) return "", err } argsJSON, mErr := json.Marshal(args) if mErr != nil { // Vanishingly rare for the typed param structs in use; // fall back to "{}" so the audit row never carries a // half-formed args field. argsJSON = []byte("{}") } result, err := g.fn(ctx, inv, args) EmitAudit(inv, string(argsJSON), result, err) return result, err }, ) } // IsGatedTool reports whether t was constructed via NewGatedTool / // NewGatedToolWithAudit. Used by the meta-test in // tools/default_test.go to enforce that every registered production // tool uses the wrapper. The check is a type assertion against the // unexported gatedToolMarker interface, so only the gatedTool variants // from this package can satisfy it — there is no way for an external // Tool to pretend to be gated. func IsGatedTool(t Tool) bool { _, ok := t.(gatedToolMarker) return ok } // AuditedResult is what a NewGatedToolWithAudit handler returns: // LLMResult is the string surfaced to the LLM (the tool-call result // the model sees in its conversation); AuditArgs and AuditResult are // what the wrapper logs to the audit row INSTEAD of the auto-derived // values. // // Why a separate variant: a small number of tools (paste_create being // the canonical example) need to return a sensitive value to the LLM // (a URL containing an encryption-key fragment) but MUST redact that // value from the audit row, since the audit row is rendered to admins // in the webui run-trace view. The default wrapper auto-logs args + // result, which would leak the key. NewGatedToolWithAudit lets the // handler explicitly separate the LLM-visible output from the // audit-visible output, while still benefitting from auto-injected // CheckGate. type AuditedResult struct { // LLMResult is the string returned to the LLM as the tool result. LLMResult string // AuditArgs is the args string written to the audit row. If empty, // the wrapper falls back to the JSON-marshaled typed args (same // behaviour as NewGatedTool). AuditArgs string // AuditResult is the result string written to the audit row. May // be empty (logged as "") to suppress sensitive fragments. AuditResult string } // gatedToolWithAudit is the variant of gatedTool whose handler returns // an AuditedResult so it can override what the audit row captures. type gatedToolWithAudit[Args any] struct { name string description string permission Permission fn func(ctx context.Context, inv Invocation, args Args) (AuditedResult, error) } // isGatedTool implements gatedToolMarker for the meta-test. func (g *gatedToolWithAudit[Args]) isGatedTool() {} func (g *gatedToolWithAudit[Args]) Name() string { return g.name } func (g *gatedToolWithAudit[Args]) Description() string { return g.description } func (g *gatedToolWithAudit[Args]) Permission() Permission { return g.permission } // NewGatedToolWithAudit is the redaction-aware variant of NewGatedTool. // Use it ONLY when the LLM-facing result must differ from the audit // row (e.g. the result contains an encryption key that the audit must // NOT capture). Most tools should use NewGatedTool. // // Behaviour matches NewGatedTool exactly except: // - The handler returns AuditedResult; the wrapper passes // AuditedResult.LLMResult to the LLM and writes // AuditedResult.AuditArgs / AuditedResult.AuditResult to the // audit row (falling back to the JSON-marshaled args if // AuditArgs is empty). // - Gate rejection still emits an audit row with empty Result and // args="{}" before returning the gate error. // // Test: covered alongside NewGatedTool in pkg/skilltools/ // gated_tool_test.go. func NewGatedToolWithAudit[Args any]( name, description string, permission Permission, fn func(ctx context.Context, inv Invocation, args Args) (AuditedResult, error), ) Tool { return &gatedToolWithAudit[Args]{ name: name, description: description, permission: permission, fn: fn, } } // BuildLLM produces the per-invocation llm.Tool. Same gate-injection // semantics as gatedTool[Args].BuildLLM; the audit row uses the // handler-supplied AuditArgs / AuditResult so a sensitive LLM-visible // result string never leaks into the audit log. func (g *gatedToolWithAudit[Args]) BuildLLM(inv Invocation) llm.Tool { return defineTypedTool[Args]( g.name, g.description, func(ctx context.Context, args Args) (string, error) { if err := CheckGate(inv); err != nil { EmitAudit(inv, "{}", "", err) return "", err } res, err := g.fn(ctx, inv, args) auditArgs := res.AuditArgs if auditArgs == "" { if b, mErr := json.Marshal(args); mErr == nil { auditArgs = string(b) } else { auditArgs = "{}" } } EmitAudit(inv, auditArgs, res.AuditResult, err) return res.LLMResult, err }, ) }