P1 (part 1): move skilltools core -> tool/ (clean, verbatim)
executus CI / test (push) Successful in 36s

The tool registry core (registry, permission model, Invocation, gated-tool
wrapper, ssrf guard, hmac, encryption, argcoerce, helpers, rootrun,
session_tools, webhook_rate_limit) had zero mort coupling — it imports only
majordomo/llm + x/crypto/hkdf — so it moves verbatim with a package rename
(skilltools -> tool). All same-package tests came along and pass; the SSRF,
gated-wrapper, encryption and output-pattern invariants are re-anchored here.

majordomo re-enters the module graph (now pinned to the latest, incl. the
front-loaded-output fix). model/ + llmmeta + structured follow next.

Docs: CLAUDE.md now requires README/examples to stay in sync with changes in
the same commit; CI skips docs/example-only pushes via paths-ignore.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-26 19:31:47 -04:00
parent d2c18ad5bb
commit dc28b63ad8
24 changed files with 3461 additions and 1 deletions
+161
View File
@@ -0,0 +1,161 @@
package tool
import (
"encoding/json"
"reflect"
"strconv"
"strings"
)
// unmarshalArgsLenient decodes the raw JSON arguments the model supplied
// into the tool's typed Args struct, tolerating the classic LLM tool-call
// bug of emitting numbers and booleans as strings ("3" where the schema
// said integer, "true" where it said boolean).
//
// Why mort-side (vs relying on the library): legacy gollm's Define performed
// this coercion internally (tool_coerce.go), and several years of tool
// traffic depend on the tolerance. majordomo's DefineTool decodes
// strictly by design, so the gated wrappers re-create the leniency here
// — the strict path is tried first and coercion only runs on failure,
// which makes the happy path zero-cost.
func unmarshalArgsLenient(raw json.RawMessage, target any) error {
if len(raw) == 0 {
return nil
}
strictErr := json.Unmarshal(raw, target)
if strictErr == nil {
return nil
}
coerced, err := coerceArgsToType(raw, reflect.TypeOf(target).Elem())
if err != nil {
// Malformed JSON: surface the original strict error, which
// names the real problem.
return strictErr
}
if err := json.Unmarshal(coerced, target); err != nil {
return strictErr
}
return nil
}
// coerceArgsToType reparses argsJSON with leniency: where the target
// struct expects a numeric or boolean field but the JSON value is a
// string, it converts the string to the target kind. Recurses into
// nested structs, slices, maps, and pointer fields. Returns a freshly
// marshaled JSON byte slice that unmarshals strictly into the target.
func coerceArgsToType(argsJSON []byte, target reflect.Type) ([]byte, error) {
var raw any
if err := json.Unmarshal(argsJSON, &raw); err != nil {
return nil, err
}
raw = coerceValue(raw, target)
return json.Marshal(raw)
}
func coerceValue(v any, t reflect.Type) any {
if t == nil {
return v
}
for t.Kind() == reflect.Pointer {
t = t.Elem()
}
switch t.Kind() {
case reflect.Struct:
m, ok := v.(map[string]any)
if !ok {
return v
}
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
if !f.IsExported() {
continue
}
name := jsonFieldName(f)
if name == "-" {
continue
}
if val, present := m[name]; present {
m[name] = coerceValue(val, f.Type)
}
}
return m
case reflect.Slice, reflect.Array:
arr, ok := v.([]any)
if !ok {
return v
}
elemType := t.Elem()
for i := range arr {
arr[i] = coerceValue(arr[i], elemType)
}
return arr
case reflect.Map:
m, ok := v.(map[string]any)
if !ok {
return v
}
valType := t.Elem()
for k := range m {
m[k] = coerceValue(m[k], valType)
}
return m
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
if s, ok := v.(string); ok {
s = strings.TrimSpace(s)
s = strings.TrimPrefix(s, "+")
if n, err := strconv.ParseInt(s, 10, 64); err == nil {
return n
}
if f, err := strconv.ParseFloat(s, 64); err == nil {
return int64(f)
}
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
if s, ok := v.(string); ok {
s = strings.TrimSpace(s)
s = strings.TrimPrefix(s, "+")
if n, err := strconv.ParseUint(s, 10, 64); err == nil {
return n
}
if f, err := strconv.ParseFloat(s, 64); err == nil && f >= 0 {
return uint64(f)
}
}
case reflect.Float32, reflect.Float64:
if s, ok := v.(string); ok {
s = strings.TrimSpace(s)
if f, err := strconv.ParseFloat(s, 64); err == nil {
return f
}
}
case reflect.Bool:
if s, ok := v.(string); ok {
if b, err := strconv.ParseBool(strings.TrimSpace(s)); err == nil {
return b
}
}
}
return v
}
// jsonFieldName returns the effective JSON key for a struct field:
// the json tag's name part when present, the Go field name otherwise,
// or "-" when the field is excluded.
func jsonFieldName(f reflect.StructField) string {
tag, ok := f.Tag.Lookup("json")
if !ok {
return f.Name
}
name, _, _ := strings.Cut(tag, ",")
if name == "" {
return f.Name
}
return name
}
+83
View File
@@ -0,0 +1,83 @@
package tool
import (
"context"
"strings"
"testing"
)
// coerceParams mirrors the field kinds legacy gollm's coercion supported.
type coerceParams struct {
Count int `json:"count"`
Ratio float64 `json:"ratio"`
Flag bool `json:"flag"`
Limit *int `json:"limit"`
Tags []int `json:"tags"`
Nested coerceIn `json:"nested"`
Verbose string `json:"verbose"`
}
type coerceIn struct {
Depth uint `json:"depth"`
}
// TestGatedTool_LenientArgCoercion anchors the legacy gollm-era tolerance the
// conversion preserved: numeric and boolean fields supplied as strings
// by the model ("3", "true") decode into the typed Args, recursing into
// pointers, slices, and nested structs. Models emit this shape
// constantly; losing the tolerance would break live tool traffic.
func TestGatedTool_LenientArgCoercion(t *testing.T) {
var seen coerceParams
tool := NewGatedTool[coerceParams](
"coerce_tool", "coercion test",
Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true},
func(ctx context.Context, inv Invocation, args coerceParams) (string, error) {
seen = args
return "ok", nil
},
)
out, err := buildAndExecute(t, tool, Invocation{SkillName: "x"}, VisibilityPrivate, nil,
`{"count":"3","ratio":" 2.5 ","flag":"true","limit":"7","tags":["1","2"],"nested":{"depth":"4"},"verbose":"yes"}`)
if err != nil || out != "ok" {
t.Fatalf("execute: out=%q err=%v", out, err)
}
if seen.Count != 3 || seen.Ratio != 2.5 || seen.Flag != true {
t.Fatalf("scalar coercion failed: %+v", seen)
}
if seen.Limit == nil || *seen.Limit != 7 {
t.Fatalf("pointer coercion failed: %+v", seen.Limit)
}
if len(seen.Tags) != 2 || seen.Tags[0] != 1 || seen.Tags[1] != 2 {
t.Fatalf("slice coercion failed: %+v", seen.Tags)
}
if seen.Nested.Depth != 4 {
t.Fatalf("nested coercion failed: %+v", seen.Nested)
}
if seen.Verbose != "yes" {
t.Fatalf("string field mangled: %q", seen.Verbose)
}
}
// TestGatedTool_StrictPathUnaffected confirms well-typed args take the
// zero-cost strict path and uncoercible strings still fail loudly.
func TestGatedTool_StrictPathUnaffected(t *testing.T) {
tool := NewGatedTool[coerceParams](
"coerce_strict_tool", "coercion test",
Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true},
func(ctx context.Context, inv Invocation, args coerceParams) (string, error) {
return "ok", nil
},
)
if out, err := buildAndExecute(t, tool, Invocation{SkillName: "x"}, VisibilityPrivate, nil,
`{"count":3,"ratio":2.5,"flag":true}`); err != nil || out != "ok" {
t.Fatalf("strict path: out=%q err=%v", out, err)
}
_, err := buildAndExecute(t, tool, Invocation{SkillName: "x"}, VisibilityPrivate, nil,
`{"count":"not-a-number"}`)
if err == nil || !strings.Contains(err.Error(), "invalid arguments") {
t.Fatalf("expected invalid-arguments error for uncoercible string, got %v", err)
}
}
+56
View File
@@ -0,0 +1,56 @@
package tool
import "fmt"
// CheckAuthoring verifies that the saving user is permitted to author a
// skill that uses the given tool list. Called from the save path
// (skills.System.SaveUserSkill); the builtin loader bypasses this check.
//
// Why: the AuthoringRequirement gate is the primary admin trust boundary
// for tools that can read sensitive data (db_select, repo_*) or perform
// privileged Discord queries. Failing closed at save time prevents the
// situation where a skill is saved-then-rejected at execute time.
//
// What: returns nil if all tools clear; otherwise returns the spec's
// exact rejection message for the first offending tool.
//
// Test: see checks_test.go.
func CheckAuthoring(reg Registry, tools []string, isAdmin bool) error {
for _, name := range tools {
t, ok := reg.Get(name)
if !ok {
return fmt.Errorf("unknown tool %q", name)
}
if t.Permission().AuthoringRequirement == RequirementAdmin && !isAdmin {
return fmt.Errorf("The tool `%s` requires admin authoring. Ask an admin to create or publish a skill that uses this tool.", name)
}
}
return nil
}
// CheckShareSafety verifies that none of the listed tools is unsafe for
// sharing. Called when a skill's visibility is being set to shared or
// public.
//
// Why: tools that operate on caller-private data (mortbux_get_balance,
// chatbot_get_memories) leak when invoked by non-owners through a
// shared/public skill — the executor would compute "the caller is
// whoever ran the skill", whose data would then surface to the skill
// authoring user.
//
// What: returns nil if all tools clear; otherwise returns the spec's
// exact rejection message for the first offending tool.
//
// Test: see checks_test.go.
func CheckShareSafety(reg Registry, tools []string) error {
for _, name := range tools {
t, ok := reg.Get(name)
if !ok {
return fmt.Errorf("unknown tool %q", name)
}
if !t.Permission().SafeForShare {
return fmt.Errorf("The tool `%s` cannot appear in a shared skill because it operates on the caller's own data.", name)
}
}
return nil
}
+56
View File
@@ -0,0 +1,56 @@
package tool
import (
"strings"
"testing"
)
func TestCheckAuthoring_AllowsAnyone(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "calc", perm: Permission{AuthoringRequirement: RequirementAnyone}})
if err := CheckAuthoring(r, []string{"calc"}, false); err != nil {
t.Fatalf("expected anyone to pass, got %v", err)
}
}
func TestCheckAuthoring_BlocksNonAdminFromAdminTool(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "db_select", perm: Permission{AuthoringRequirement: RequirementAdmin}})
err := CheckAuthoring(r, []string{"db_select"}, false)
if err == nil || !strings.Contains(err.Error(), "requires admin authoring") {
t.Fatalf("expected admin-required error, got %v", err)
}
}
func TestCheckAuthoring_AllowsAdminWithAdminTool(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "db_select", perm: Permission{AuthoringRequirement: RequirementAdmin}})
if err := CheckAuthoring(r, []string{"db_select"}, true); err != nil {
t.Fatalf("expected admin to pass, got %v", err)
}
}
func TestCheckAuthoring_UnknownTool(t *testing.T) {
r := NewRegistry()
err := CheckAuthoring(r, []string{"missing"}, true)
if err == nil || !strings.Contains(err.Error(), "unknown tool") {
t.Fatalf("expected unknown-tool error, got %v", err)
}
}
func TestCheckShareSafety_Pass(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "search", perm: Permission{SafeForShare: true}})
if err := CheckShareSafety(r, []string{"search"}); err != nil {
t.Fatalf("expected safe tool to pass, got %v", err)
}
}
func TestCheckShareSafety_BlocksUnsafe(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "balance", perm: Permission{SafeForShare: false}})
err := CheckShareSafety(r, []string{"balance"})
if err == nil || !strings.Contains(err.Error(), "operates on the caller's own data") {
t.Fatalf("expected share-safety error, got %v", err)
}
}
+242
View File
@@ -0,0 +1,242 @@
// Package skilltools — encryption.go: per-skill envelope encryption for
// KV values and file blobs. AES-256-GCM with a per-skill key derived
// from a single master key (env var SKILLS_ENCRYPTION_MASTER_KEY) via
// HKDF using the skill ID as the salt.
//
// !!!!! CRITICAL OPERATIONAL WARNING !!!!!
//
// SKILLS_ENCRYPTION_MASTER_KEY MUST BE BACKED UP SEPARATELY FROM THE
// DATABASE. Losing the master key = losing every byte of encrypted
// KV value and every encrypted file blob, with no recovery path. The
// key is the ONLY thing that can decrypt rows whose
// encryption_key_version > 0.
//
// Operational rules:
// - Store the master key in a secrets manager (Vault, 1Password,
// KMS export) — NEVER in the same backup as the database dump.
// - Rotating the master key without a versioned re-encrypt
// migration WILL render existing encrypted rows unreadable. The
// encryption_key_version column was added so a future rotation
// migration can re-encrypt under a new (master, version)
// pair; do not bump the version without that migration.
// - When the env var is empty, encryption is OFF for the whole
// instance. Skills with encryption_enabled=true still write
// plaintext (with a logged WARNING). This is intentional — the
// alternative is to refuse to start, which would break
// deployment for everyone the moment the secret leaks during
// rotation. Loud logging + the boot-time warning in mort.go is
// the correct trade-off.
//
// Why HKDF-derived per-skill keys (vs one global key): a future
// "wipe this skill's data" admin action can be made auditable by
// recording the skill_id in the operation log without exposing the
// master key. Per-skill keys also cap blast radius if one key
// somehow leaks via a side channel — only that one skill's data is
// compromised, not the whole platform.
//
// Why AES-256-GCM: authenticated encryption catches tampered
// ciphertext at decrypt time. The GCM nonce is 12 random bytes per
// row; the auth tag is 16 bytes. Both are stored inline with the
// ciphertext so the storage layer's value/content column holds the
// full envelope (no separate nonce column).
//
// Wire format of an encrypted blob:
//
// +-- 1 byte: format version (0x01)
// +-- 12 bytes: GCM nonce
// +-- N bytes: ciphertext + 16-byte GCM tag
//
// The format-version byte lets a future change to nonce length or
// auth tag handling be detected loudly rather than corrupting reads.
// Encrypt always writes 0x01; Decrypt rejects any other version with
// ErrEncryptionUnknownVersion.
package tool
import (
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"crypto/sha256"
"errors"
"fmt"
"io"
"os"
"golang.org/x/crypto/hkdf"
)
// EncryptionMasterKeyEnv is the environment variable that holds the
// 32-byte (or longer, hashed down) master key for skill envelope
// encryption.
//
// !!!!! LOSING THIS KEY = LOSING ALL ENCRYPTED DATA !!!!!
//
// Back it up separately from database backups. Never commit it.
// Empty value = encryption OFF (with WARNING logged at boot).
const EncryptionMasterKeyEnv = "SKILLS_ENCRYPTION_MASTER_KEY"
// CurrentKeyVersion is the version stamped on every newly-encrypted
// row. Version 0 is reserved for plaintext (legacy / encryption-off).
// Version 1 is "AES-256-GCM with HKDF(master, skill_id) per-skill key,
// envelope format 0x01". Bumping this requires a migration that
// re-encrypts existing rows under the new (master, version) pair.
const CurrentKeyVersion = 1
// envelopeFormatV1 is the first byte of every Encrypt output. Decrypt
// rejects any other value with ErrEncryptionUnknownVersion.
const envelopeFormatV1 = byte(0x01)
// gcmNonceSize is fixed at 12 bytes for AES-GCM (NIST SP 800-38D
// recommended).
const gcmNonceSize = 12
// Encryption sentinel errors. Callers compare with errors.Is so storage
// adapters can branch on "tampered" vs "unknown version" vs "no master
// key".
var (
// ErrEncryptionDisabled is returned when an encryption operation
// is attempted but SKILLS_ENCRYPTION_MASTER_KEY is empty. Storage
// adapters interpret this as "fall through to plaintext" — they
// MUST log loudly when this branch is taken.
ErrEncryptionDisabled = errors.New("skilltools: encryption disabled (master key empty)")
// ErrEncryptionUnknownVersion is returned by Decrypt when the
// envelope's format-version byte is not envelopeFormatV1. A read
// that hits this error is corruption — surface to the operator,
// do NOT silently fall back to plaintext.
ErrEncryptionUnknownVersion = errors.New("skilltools: encryption envelope has unknown format version")
// ErrEncryptionTampered is returned by Decrypt when the GCM auth
// tag check fails. The ciphertext or nonce was modified after
// encryption. Surface as "data corruption" — the row is unreadable.
ErrEncryptionTampered = errors.New("skilltools: encryption auth tag mismatch (data corruption or wrong key)")
// ErrEncryptionShortInput is returned by Decrypt when the input
// is too short to contain even the version byte + nonce. Bug or
// malformed write.
ErrEncryptionShortInput = errors.New("skilltools: encryption input too short")
)
// MasterKeyFromEnv returns the master key bytes (raw, NOT
// HKDF-derived) from the SKILLS_ENCRYPTION_MASTER_KEY env var.
//
// Why hash + truncate to 32 bytes vs require 32 raw bytes: operators
// commonly paste a generated random hex/base64 string of varying
// length. SHA-256-truncate accepts any non-empty input and produces
// a fixed-length key, which is then fed into HKDF for per-skill
// derivation. The hash step is purely "normalize length"; HKDF still
// does the per-skill diversification.
//
// Returns nil bytes (and false) if the env var is empty.
func MasterKeyFromEnv() (key []byte, present bool) {
raw := os.Getenv(EncryptionMasterKeyEnv)
if raw == "" {
return nil, false
}
sum := sha256.Sum256([]byte(raw))
return sum[:], true
}
// DeriveSkillKey returns the per-skill 32-byte AES-256 key for the
// given (master, skillID) pair via HKDF-SHA256.
//
// Why skillID as HKDF salt: each skill gets a distinct subkey so a
// single master breach is necessary to decrypt any one skill, but
// a skill_id leak (which is normal — IDs appear in logs) does NOT
// help an attacker. The HKDF info parameter is fixed to a constant
// label so different uses of the same master+skillID pair (e.g. a
// future per-skill HMAC key) can be derived with a different label
// without colliding.
//
// master must be the 32-byte output of MasterKeyFromEnv (or
// equivalent length-normalized input). skillID must be non-empty —
// caller is responsible.
func DeriveSkillKey(master []byte, skillID string) ([]byte, error) {
if len(master) == 0 {
return nil, ErrEncryptionDisabled
}
if skillID == "" {
return nil, errors.New("skilltools: DeriveSkillKey requires non-empty skillID")
}
r := hkdf.New(sha256.New, master, []byte(skillID), []byte("mort/skills/v1/aead"))
out := make([]byte, 32)
if _, err := io.ReadFull(r, out); err != nil {
return nil, fmt.Errorf("skilltools: HKDF derive: %w", err)
}
return out, nil
}
// Encrypt seals plaintext under skillKey using AES-256-GCM and returns
// the wire envelope (version byte || nonce || ciphertext || tag).
//
// Caller is responsible for stamping the encryption_key_version column
// to CurrentKeyVersion AFTER a successful Encrypt — Encrypt itself
// only produces bytes; persisting them is the storage layer's job.
//
// Why a fresh random nonce per call (vs deterministic): nonce reuse
// under GCM is catastrophic (allows recovering the keystream); fresh
// 96-bit random nonces have a negligible collision probability under
// any realistic write rate.
func Encrypt(skillKey, plaintext []byte) ([]byte, error) {
if len(skillKey) != 32 {
return nil, fmt.Errorf("skilltools: Encrypt requires 32-byte key, got %d", len(skillKey))
}
block, err := aes.NewCipher(skillKey)
if err != nil {
return nil, fmt.Errorf("skilltools: aes.NewCipher: %w", err)
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return nil, fmt.Errorf("skilltools: cipher.NewGCM: %w", err)
}
nonce := make([]byte, gcmNonceSize)
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
return nil, fmt.Errorf("skilltools: rand.Read: %w", err)
}
// Pre-allocate the envelope: 1 (version) + 12 (nonce) + len(plaintext) + 16 (tag).
out := make([]byte, 0, 1+gcmNonceSize+len(plaintext)+gcm.Overhead())
out = append(out, envelopeFormatV1)
out = append(out, nonce...)
out = gcm.Seal(out, nonce, plaintext, nil)
return out, nil
}
// Decrypt opens an envelope produced by Encrypt under the same
// skillKey. Returns the plaintext or one of the sentinel errors.
//
// Caller MUST inspect the storage row's encryption_key_version BEFORE
// calling Decrypt. Version 0 means plaintext — Decrypt SHOULD NOT be
// called for version-0 rows (callers branch on the column value).
// This function does NOT inspect any version column; it only looks at
// the in-band envelope-format byte.
func Decrypt(skillKey, envelope []byte) ([]byte, error) {
if len(skillKey) != 32 {
return nil, fmt.Errorf("skilltools: Decrypt requires 32-byte key, got %d", len(skillKey))
}
if len(envelope) < 1+gcmNonceSize {
return nil, ErrEncryptionShortInput
}
if envelope[0] != envelopeFormatV1 {
return nil, ErrEncryptionUnknownVersion
}
nonce := envelope[1 : 1+gcmNonceSize]
ciphertext := envelope[1+gcmNonceSize:]
block, err := aes.NewCipher(skillKey)
if err != nil {
return nil, fmt.Errorf("skilltools: aes.NewCipher: %w", err)
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return nil, fmt.Errorf("skilltools: cipher.NewGCM: %w", err)
}
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
if err != nil {
// Distinguish auth-tag mismatch from other crypto errors so
// callers can surface "data corruption" specifically. The
// stdlib wraps the failure as a generic error; we map any
// failure here to ErrEncryptionTampered (the most likely
// cause is wrong key / tampered bytes).
return nil, ErrEncryptionTampered
}
return plaintext, nil
}
+205
View File
@@ -0,0 +1,205 @@
package tool
import (
"bytes"
"crypto/sha256"
"errors"
"strings"
"testing"
)
// Why: round-trip is the bedrock — without it, every other test is
// meaningless. What: encrypt then decrypt; assert plaintext returns.
// Test: write a non-trivial plaintext and confirm exact byte equality.
func TestEncryption_RoundTrip(t *testing.T) {
t.Parallel()
master := masterTestKey()
key, err := DeriveSkillKey(master, "skill-abc")
if err != nil {
t.Fatalf("DeriveSkillKey: %v", err)
}
plaintext := []byte(`{"hello":"world","n":42,"arr":[1,2,3]}`)
envelope, err := Encrypt(key, plaintext)
if err != nil {
t.Fatalf("Encrypt: %v", err)
}
if envelope[0] != envelopeFormatV1 {
t.Fatalf("envelope[0] = %d, want %d", envelope[0], envelopeFormatV1)
}
got, err := Decrypt(key, envelope)
if err != nil {
t.Fatalf("Decrypt: %v", err)
}
if !bytes.Equal(got, plaintext) {
t.Fatalf("round-trip mismatch:\n got: %q\nwant: %q", got, plaintext)
}
}
// Why: GCM is authenticated encryption — flipping any bit MUST be
// detected. What: tamper with the ciphertext; assert ErrEncryptionTampered.
// Test: flip one byte of the ciphertext suffix, decrypt, expect tamper error.
func TestEncryption_TamperDetected(t *testing.T) {
t.Parallel()
master := masterTestKey()
key, err := DeriveSkillKey(master, "skill-tamper")
if err != nil {
t.Fatalf("DeriveSkillKey: %v", err)
}
envelope, err := Encrypt(key, []byte("sensitive data"))
if err != nil {
t.Fatalf("Encrypt: %v", err)
}
// Flip a byte in the ciphertext (after version + nonce).
envelope[1+gcmNonceSize] ^= 0x01
_, err = Decrypt(key, envelope)
if !errors.Is(err, ErrEncryptionTampered) {
t.Fatalf("Decrypt after tamper = %v, want ErrEncryptionTampered", err)
}
}
// Why: nonce reuse under GCM is catastrophic — verify the impl uses
// fresh randomness on every call. What: encrypt the same plaintext twice;
// the envelopes must differ.
func TestEncryption_FreshNoncePerCall(t *testing.T) {
t.Parallel()
master := masterTestKey()
key, err := DeriveSkillKey(master, "skill-nonce")
if err != nil {
t.Fatalf("DeriveSkillKey: %v", err)
}
plaintext := []byte("fixed payload")
a, err := Encrypt(key, plaintext)
if err != nil {
t.Fatalf("Encrypt #1: %v", err)
}
b, err := Encrypt(key, plaintext)
if err != nil {
t.Fatalf("Encrypt #2: %v", err)
}
if bytes.Equal(a, b) {
t.Fatalf("two encryptions of the same plaintext produced identical envelopes (nonce not random)")
}
// Both must still decrypt to the same plaintext.
for i, env := range [][]byte{a, b} {
got, err := Decrypt(key, env)
if err != nil {
t.Fatalf("Decrypt #%d: %v", i, err)
}
if !bytes.Equal(got, plaintext) {
t.Fatalf("Decrypt #%d mismatch", i)
}
}
}
// Why: per-skill key derivation MUST give different skills different
// keys so a leaked skillkey doesn't cross-decrypt. What: derive keys
// for skill A and skill B; encrypt under A; decrypt under B; expect
// tamper error.
func TestEncryption_PerSkillIsolation(t *testing.T) {
t.Parallel()
master := masterTestKey()
keyA, _ := DeriveSkillKey(master, "skill-a")
keyB, _ := DeriveSkillKey(master, "skill-b")
if bytes.Equal(keyA, keyB) {
t.Fatalf("derived keys for distinct skills are identical (HKDF salt not effective)")
}
envelope, err := Encrypt(keyA, []byte("only skill A may read"))
if err != nil {
t.Fatalf("Encrypt: %v", err)
}
_, err = Decrypt(keyB, envelope)
if !errors.Is(err, ErrEncryptionTampered) {
t.Fatalf("Decrypt under wrong skill key = %v, want ErrEncryptionTampered", err)
}
}
// Why: a future format change must be detectable, not silently
// corrupting reads. What: hand-craft an envelope with version byte
// 0xFF; assert ErrEncryptionUnknownVersion.
func TestEncryption_UnknownVersionRejected(t *testing.T) {
t.Parallel()
key := make([]byte, 32)
envelope := make([]byte, 1+gcmNonceSize+16)
envelope[0] = 0xFF
_, err := Decrypt(key, envelope)
if !errors.Is(err, ErrEncryptionUnknownVersion) {
t.Fatalf("Decrypt with bad version = %v, want ErrEncryptionUnknownVersion", err)
}
}
// Why: short inputs must not panic. What: feed a 5-byte envelope to
// Decrypt; assert ErrEncryptionShortInput.
func TestEncryption_ShortInputRejected(t *testing.T) {
t.Parallel()
key := make([]byte, 32)
_, err := Decrypt(key, []byte{1, 2, 3, 4, 5})
if !errors.Is(err, ErrEncryptionShortInput) {
t.Fatalf("Decrypt with short input = %v, want ErrEncryptionShortInput", err)
}
}
// Why: empty master = encryption disabled. What: DeriveSkillKey with
// empty master returns ErrEncryptionDisabled.
func TestEncryption_EmptyMasterDisabled(t *testing.T) {
t.Parallel()
_, err := DeriveSkillKey(nil, "skill-x")
if !errors.Is(err, ErrEncryptionDisabled) {
t.Fatalf("DeriveSkillKey(nil) = %v, want ErrEncryptionDisabled", err)
}
}
// Why: callers commonly paste random hex/base64 of varying length;
// MasterKeyFromEnv should normalize to 32 bytes via SHA-256. What:
// set the env var; assert returned bytes match SHA-256 of input.
func TestEncryption_MasterKeyFromEnvNormalizesLength(t *testing.T) {
// not parallel — we mutate process env
const raw = "this-is-a-fake-master-key-for-testing-only-totally-not-secure"
t.Setenv(EncryptionMasterKeyEnv, raw)
got, present := MasterKeyFromEnv()
if !present {
t.Fatalf("MasterKeyFromEnv reported absent for non-empty env var")
}
if len(got) != 32 {
t.Fatalf("len(masterKey) = %d, want 32", len(got))
}
want := sha256.Sum256([]byte(raw))
if !bytes.Equal(got, want[:]) {
t.Fatalf("masterKey does not match SHA-256 of env var")
}
}
// Why: empty env var = encryption off (instance-wide). What:
// MasterKeyFromEnv returns nil + present=false.
func TestEncryption_MasterKeyFromEnvEmpty(t *testing.T) {
t.Setenv(EncryptionMasterKeyEnv, "")
got, present := MasterKeyFromEnv()
if present {
t.Fatalf("MasterKeyFromEnv reported present for empty env var")
}
if got != nil {
t.Fatalf("MasterKeyFromEnv returned non-nil bytes for empty env var")
}
}
// Why: defence in depth — explicit non-32 key sizes should error
// rather than panic.
func TestEncryption_BadKeySize(t *testing.T) {
t.Parallel()
_, err := Encrypt(make([]byte, 16), []byte("x"))
if err == nil {
t.Fatalf("Encrypt with 16-byte key did not error")
}
if !strings.Contains(err.Error(), "32-byte key") {
t.Fatalf("error did not mention key size: %v", err)
}
_, err = Decrypt(make([]byte, 16), make([]byte, 32))
if err == nil {
t.Fatalf("Decrypt with 16-byte key did not error")
}
}
func masterTestKey() []byte {
// fixed deterministic master so test runs are reproducible.
sum := sha256.Sum256([]byte("test-master-do-not-use-in-prod"))
return sum[:]
}
+67
View File
@@ -0,0 +1,67 @@
package tool
import (
"context"
"encoding/json"
"errors"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// toolCall mirrors the legacy gollm-era test shape (string arguments) so the
// pre-conversion test call sites keep their literal syntax. majordomo's
// llm.ToolCall carries json.RawMessage arguments; execBox adapts.
type toolCall struct {
Name string
Arguments string
}
// execBox executes one call through a toolbox and adapts majordomo's
// ToolResult to the (result, error) pair these tests assert against:
// IsError results come back as a Go error carrying the result content
// (which is how the agent-facing error text read in the legacy gollm era).
func execBox(box *llm.Toolbox, call toolCall) (string, error) {
res := box.Execute(context.Background(), llm.ToolCall{
ID: "test-call",
Name: call.Name,
Arguments: json.RawMessage(call.Arguments),
})
if res.IsError {
return "", errors.New(res.Content)
}
return res.Content, nil
}
// execTool runs a single built llm.Tool's handler and serializes the
// result the way llm.ExecuteTool does. Replaces the legacy gollm
// Tool.Execute(ctx, argsJSON) method the original tests called.
//
// Why the handler directly (vs llm.ExecuteTool): ExecuteTool flattens
// handler errors into IsError result text, but several tests assert
// error IDENTITY (errors.Is against sentinel errors the handlers
// wrap). Calling the handler preserves the error value, matching the
// legacy gollm Execute contract these tests were written against.
func execTool(ctx context.Context, t llm.Tool, args string) (string, error) {
raw := json.RawMessage(args)
if len(raw) == 0 {
raw = json.RawMessage("{}")
}
out, err := t.Handler(ctx, raw)
if err != nil {
return "", err
}
switch v := out.(type) {
case nil:
return "null", nil
case string:
return v, nil
case json.RawMessage:
return string(v), nil
default:
enc, mErr := json.Marshal(v)
if mErr != nil {
return "", mErr
}
return string(enc), nil
}
}
+272
View File
@@ -0,0 +1,272 @@
package tool
import (
"context"
"encoding/json"
"fmt"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// gatedToolMarker is the unexported interface implemented by every Tool
// constructed via NewGatedTool. The IsGatedTool helper performs a type
// assertion against this marker so the meta-test in default_test.go
// (and the wizardtools meta-test) can enforce that every registered
// production tool uses the wrapper.
//
// Why an unexported method (vs a public marker): the goal is to make it
// IMPOSSIBLE for an external caller to lie about being gated. Only the
// implementation in this file can satisfy the interface, so the
// type-assertion in IsGatedTool is a real proof of provenance, not a
// pinky-swear from a struct that opts in.
type gatedToolMarker interface {
isGatedTool()
}
// gatedTool is the concrete Tool returned by NewGatedTool. It carries
// the per-tool metadata (Name/Description/Permission) and the typed
// handler closure; BuildLLM wraps the handler with CheckGate +
// EmitAudit so tool authors literally cannot forget either call.
//
// Why generic on Args (vs accepting any-shaped JSON): each tool's
// handler is typed against its own param struct. defineTypedTool
// derives a JSON schema for the LLM from Args (llm.SchemaFor) and
// parses the args before invoking the handler. We re-marshal args to
// JSON once for the audit row so the captured shape matches exactly
// what the handler ran with (post-coercion).
type gatedTool[Args any] struct {
name string
description string
permission Permission
fn func(ctx context.Context, inv Invocation, args Args) (string, error)
}
// isGatedTool implements gatedToolMarker for the meta-test.
func (g *gatedTool[Args]) isGatedTool() {}
// defineTypedTool builds the majordomo llm.Tool for a typed handler:
// schema derived from Args, arguments decoded leniently (string→number/
// boolean coercion preserved from the legacy gollm era — see argcoerce.go)
// before the handler runs. An unparseable arguments object returns the
// decode error WITHOUT running fn, framing arg-parse-error as a
// tool-call wiring failure rather than a tool-handler failure.
//
// Why not majordomo's llm.DefineTool: its decode is strict by design;
// mort's tool catalog keeps the lenient dialect for parity with years
// of model traffic that emits "3" where the schema says integer.
func defineTypedTool[Args any](name, description string, fn func(ctx context.Context, args Args) (string, error)) llm.Tool {
schema, err := llm.SchemaFor[Args]()
if err != nil {
panic(fmt.Sprintf("skilltools: defineTypedTool(%q): %v", name, err))
}
return llm.Tool{
Name: name,
Description: description,
Parameters: schema,
Handler: func(ctx context.Context, raw json.RawMessage) (any, error) {
var args Args
if err := unmarshalArgsLenient(raw, &args); err != nil {
return nil, fmt.Errorf("invalid arguments for %s: %w", name, err)
}
return fn(ctx, args)
},
}
}
// NewGatedTool wraps a typed handler so it automatically:
// 1. Calls CheckGate(inv) before the handler runs. On gate rejection
// emits EmitAudit(inv, "{}", "", err) and returns the gate error.
// 2. Calls fn(ctx, inv, args) once gate passes.
// 3. Re-marshals args to JSON for the audit row (so the captured args
// reflect any coercion performed during deserialisation), then
// emits EmitAudit(inv, argsJSON, result, err) once the handler
// returns.
//
// Production tools SHOULD use NewGatedTool unless they have a strong
// reason to handle gating manually. The wrapper exists because the
// previous per-tool pattern repeated four lines of boilerplate
// (CheckGate at the top, EmitAudit on every return path), and that
// boilerplate is easy to forget — wizard tools in v1 hotfix #4 had to
// be retrofitted because the author overlooked CheckGate. Centralising
// the calls makes them impossible to skip and the meta-test in
// tools/default_test.go enforces the discipline.
//
// The typed define layer handles JSON parsing and arg coercion before
// fn runs; if the args JSON is unparseable, the decode error is
// returned directly (the wrapper's audit emission does NOT fire on
// parse error — arg-parse-error is a tool-call wiring failure rather
// than a tool-handler failure).
//
// Test: pkg/skilltools/gated_tool_test.go covers gate rejection,
// happy path, fn-returned error, and the IsGatedTool assertion. The
// meta-test in pkg/skilltools/tools/default_test.go walks the registry
// and asserts every production tool implements gatedToolMarker.
func NewGatedTool[Args any](
name, description string,
permission Permission,
fn func(ctx context.Context, inv Invocation, args Args) (string, error),
) Tool {
return &gatedTool[Args]{
name: name,
description: description,
permission: permission,
fn: fn,
}
}
// Name returns the tool's registry key.
func (g *gatedTool[Args]) Name() string { return g.name }
// Description is shown to the LLM.
func (g *gatedTool[Args]) Description() string { return g.description }
// Permission classifies the tool for save-time / share-time gating.
func (g *gatedTool[Args]) Permission() Permission { return g.permission }
// BuildLLM produces the per-invocation llm.Tool. The returned tool's
// handler:
// - Runs CheckGate(inv) FIRST (before any handler logic). On gate
// rejection emits the audit row and returns the gate error.
// - Calls the user-supplied fn with the typed args. fn never sees a
// gate-rejected invocation.
// - Re-marshals args to JSON and emits the audit row exactly once,
// regardless of fn's return value (success or error).
//
// Why re-marshal vs using the raw LLM JSON: the lenient decode performs
// numeric/boolean coercion (e.g. "3" → 3) before invoking the handler;
// the audit row should reflect what fn actually received, not the
// pre-coercion text the LLM emitted.
func (g *gatedTool[Args]) BuildLLM(inv Invocation) llm.Tool {
return defineTypedTool[Args](
g.name,
g.description,
func(ctx context.Context, args Args) (string, error) {
if err := CheckGate(inv); err != nil {
EmitAudit(inv, "{}", "", err)
return "", err
}
argsJSON, mErr := json.Marshal(args)
if mErr != nil {
// Vanishingly rare for the typed param structs in use;
// fall back to "{}" so the audit row never carries a
// half-formed args field.
argsJSON = []byte("{}")
}
result, err := g.fn(ctx, inv, args)
EmitAudit(inv, string(argsJSON), result, err)
return result, err
},
)
}
// IsGatedTool reports whether t was constructed via NewGatedTool /
// NewGatedToolWithAudit. Used by the meta-test in
// tools/default_test.go to enforce that every registered production
// tool uses the wrapper. The check is a type assertion against the
// unexported gatedToolMarker interface, so only the gatedTool variants
// from this package can satisfy it — there is no way for an external
// Tool to pretend to be gated.
func IsGatedTool(t Tool) bool {
_, ok := t.(gatedToolMarker)
return ok
}
// AuditedResult is what a NewGatedToolWithAudit handler returns:
// LLMResult is the string surfaced to the LLM (the tool-call result
// the model sees in its conversation); AuditArgs and AuditResult are
// what the wrapper logs to the audit row INSTEAD of the auto-derived
// values.
//
// Why a separate variant: a small number of tools (paste_create being
// the canonical example) need to return a sensitive value to the LLM
// (a URL containing an encryption-key fragment) but MUST redact that
// value from the audit row, since the audit row is rendered to admins
// in the webui run-trace view. The default wrapper auto-logs args +
// result, which would leak the key. NewGatedToolWithAudit lets the
// handler explicitly separate the LLM-visible output from the
// audit-visible output, while still benefitting from auto-injected
// CheckGate.
type AuditedResult struct {
// LLMResult is the string returned to the LLM as the tool result.
LLMResult string
// AuditArgs is the args string written to the audit row. If empty,
// the wrapper falls back to the JSON-marshaled typed args (same
// behaviour as NewGatedTool).
AuditArgs string
// AuditResult is the result string written to the audit row. May
// be empty (logged as "") to suppress sensitive fragments.
AuditResult string
}
// gatedToolWithAudit is the variant of gatedTool whose handler returns
// an AuditedResult so it can override what the audit row captures.
type gatedToolWithAudit[Args any] struct {
name string
description string
permission Permission
fn func(ctx context.Context, inv Invocation, args Args) (AuditedResult, error)
}
// isGatedTool implements gatedToolMarker for the meta-test.
func (g *gatedToolWithAudit[Args]) isGatedTool() {}
func (g *gatedToolWithAudit[Args]) Name() string { return g.name }
func (g *gatedToolWithAudit[Args]) Description() string { return g.description }
func (g *gatedToolWithAudit[Args]) Permission() Permission { return g.permission }
// NewGatedToolWithAudit is the redaction-aware variant of NewGatedTool.
// Use it ONLY when the LLM-facing result must differ from the audit
// row (e.g. the result contains an encryption key that the audit must
// NOT capture). Most tools should use NewGatedTool.
//
// Behaviour matches NewGatedTool exactly except:
// - The handler returns AuditedResult; the wrapper passes
// AuditedResult.LLMResult to the LLM and writes
// AuditedResult.AuditArgs / AuditedResult.AuditResult to the
// audit row (falling back to the JSON-marshaled args if
// AuditArgs is empty).
// - Gate rejection still emits an audit row with empty Result and
// args="{}" before returning the gate error.
//
// Test: covered alongside NewGatedTool in pkg/skilltools/
// gated_tool_test.go.
func NewGatedToolWithAudit[Args any](
name, description string,
permission Permission,
fn func(ctx context.Context, inv Invocation, args Args) (AuditedResult, error),
) Tool {
return &gatedToolWithAudit[Args]{
name: name,
description: description,
permission: permission,
fn: fn,
}
}
// BuildLLM produces the per-invocation llm.Tool. Same gate-injection
// semantics as gatedTool[Args].BuildLLM; the audit row uses the
// handler-supplied AuditArgs / AuditResult so a sensitive LLM-visible
// result string never leaks into the audit log.
func (g *gatedToolWithAudit[Args]) BuildLLM(inv Invocation) llm.Tool {
return defineTypedTool[Args](
g.name,
g.description,
func(ctx context.Context, args Args) (string, error) {
if err := CheckGate(inv); err != nil {
EmitAudit(inv, "{}", "", err)
return "", err
}
res, err := g.fn(ctx, inv, args)
auditArgs := res.AuditArgs
if auditArgs == "" {
if b, mErr := json.Marshal(args); mErr == nil {
auditArgs = string(b)
} else {
auditArgs = "{}"
}
}
EmitAudit(inv, auditArgs, res.AuditResult, err)
return res.LLMResult, err
},
)
}
+401
View File
@@ -0,0 +1,401 @@
package tool
import (
"context"
"encoding/json"
"errors"
"strings"
"sync"
"testing"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// gatedTestParams is a typed param struct used by the gated_tool tests.
// Mirrors a real production tool: a couple of strings the LLM supplies.
type gatedTestParams struct {
Question string `json:"question" description:"The question to answer."`
Detail string `json:"detail,omitempty" description:"Optional detail level."`
}
// recordingAudit captures every AuditCall the wrapper emits so tests
// can assert exactly what the wrapper logged. Concurrent-safe in case a
// future test parallelises across goroutines.
type recordingAudit struct {
mu sync.Mutex
calls []AuditCall
}
func (r *recordingAudit) hook() AuditHook {
return func(call AuditCall) {
r.mu.Lock()
defer r.mu.Unlock()
r.calls = append(r.calls, call)
}
}
func (r *recordingAudit) snapshot() []AuditCall {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]AuditCall, len(r.calls))
copy(out, r.calls)
return out
}
// buildAndExecute is the test-only convenience for going from a
// constructed Tool to an llm.Tool result. Mirrors how the production
// registry's Build call wires inv.gate / inv.audit.
func buildAndExecute(t *testing.T, tool Tool, inv Invocation, vis Visibility, audit AuditHook, args string) (string, error) {
t.Helper()
r := NewRegistry()
if err := r.Register(tool); err != nil {
t.Fatalf("register: %v", err)
}
box, err := r.Build([]string{tool.Name()}, inv, vis, audit)
if err != nil {
t.Fatalf("build: %v", err)
}
return execBox(box, toolCall{Name: tool.Name(), Arguments: args})
}
// TestNewGatedTool_GateRejection verifies that the wrapper auto-injects
// CheckGate: if the invocation's SkillName doesn't match the tool's
// SkillNameGate, fn never runs and the audit row is emitted with the
// gate error. This is the core contract that v1 hotfix #4 had to
// retrofit by hand.
func TestNewGatedTool_GateRejection(t *testing.T) {
called := false
tool := NewGatedTool[gatedTestParams](
"gated_test_tool",
"A test tool gated to my-skill.",
Permission{
AuthoringRequirement: RequirementAnyone,
OperatesOn: ScopeGlobal,
SafeForShare: true,
SkillNameGate: "my-skill",
},
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
called = true
return "should not be reached", nil
},
)
rec := &recordingAudit{}
out, err := buildAndExecute(t, tool,
Invocation{SkillName: "other-skill"},
VisibilityPrivate, rec.hook(),
`{"question":"hi"}`)
if err == nil {
t.Fatalf("expected gate-rejection error, got out=%q err=nil", out)
}
if !strings.Contains(err.Error(), "restricted to") {
t.Fatalf("expected error containing 'restricted to', got %v", err)
}
if called {
t.Errorf("fn was called despite gate rejection — wrapper failed to inject CheckGate")
}
calls := rec.snapshot()
if len(calls) != 1 {
t.Fatalf("expected exactly 1 audit call, got %d: %+v", len(calls), calls)
}
if calls[0].Err == nil {
t.Errorf("audit call.Err was nil; expected the gate error")
}
if calls[0].Args != "{}" {
t.Errorf("audit call.Args=%q, want \"{}\" (no args parsed pre-gate)", calls[0].Args)
}
}
// TestNewGatedTool_HappyPath verifies the wrapper passes args to fn,
// returns fn's result, and emits a successful audit row with the
// re-marshaled args.
func TestNewGatedTool_HappyPath(t *testing.T) {
var seen gatedTestParams
var seenInv Invocation
tool := NewGatedTool[gatedTestParams](
"gated_happy_tool",
"A test tool with no gate.",
Permission{
AuthoringRequirement: RequirementAnyone,
OperatesOn: ScopeGlobal,
SafeForShare: true,
},
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
seen = args
seenInv = inv
return "answered: " + args.Question, nil
},
)
rec := &recordingAudit{}
out, err := buildAndExecute(t, tool,
Invocation{SkillName: "any-skill", CallerID: "user-7"},
VisibilityPrivate, rec.hook(),
`{"question":"what is the time?","detail":"verbose"}`)
if err != nil {
t.Fatalf("execute: %v", err)
}
if out != "answered: what is the time?" {
t.Errorf("unexpected output: %q", out)
}
if seen.Question != "what is the time?" || seen.Detail != "verbose" {
t.Errorf("fn received %+v, want question/detail populated", seen)
}
if seenInv.CallerID != "user-7" {
t.Errorf("fn saw CallerID=%q, want user-7", seenInv.CallerID)
}
calls := rec.snapshot()
if len(calls) != 1 {
t.Fatalf("expected exactly 1 audit call, got %d", len(calls))
}
if calls[0].Err != nil {
t.Errorf("audit call.Err=%v, want nil", calls[0].Err)
}
if calls[0].Result != "answered: what is the time?" {
t.Errorf("audit call.Result=%q, want match output", calls[0].Result)
}
// The wrapper re-marshals the args — verify the JSON is well-formed
// and contains the expected fields.
var argsBack gatedTestParams
if err := json.Unmarshal([]byte(calls[0].Args), &argsBack); err != nil {
t.Fatalf("audit args not valid JSON: %q (%v)", calls[0].Args, err)
}
if argsBack.Question != "what is the time?" || argsBack.Detail != "verbose" {
t.Errorf("audit args round-trip mismatch: %+v", argsBack)
}
}
// TestNewGatedTool_FnError verifies the wrapper surfaces fn's error
// AND captures the partial result + error in the audit row.
func TestNewGatedTool_FnError(t *testing.T) {
tool := NewGatedTool[gatedTestParams](
"gated_fn_err_tool",
"A test tool whose handler always errors.",
Permission{
AuthoringRequirement: RequirementAnyone,
OperatesOn: ScopeGlobal,
SafeForShare: true,
},
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
return "partial output", errors.New("boom")
},
)
rec := &recordingAudit{}
out, err := buildAndExecute(t, tool,
Invocation{SkillName: "any-skill"},
VisibilityPrivate, rec.hook(),
`{"question":"x"}`)
// llm.Define's Execute returns ("", err) when the handler returns a
// non-nil error — out is dropped on the LLM side. But the wrapper's
// audit row should still capture both partial result + error.
if err == nil || !strings.Contains(err.Error(), "boom") {
t.Fatalf("expected boom error, got out=%q err=%v", out, err)
}
calls := rec.snapshot()
if len(calls) != 1 {
t.Fatalf("expected exactly 1 audit call, got %d", len(calls))
}
if calls[0].Err == nil || !strings.Contains(calls[0].Err.Error(), "boom") {
t.Errorf("audit call.Err=%v, want boom", calls[0].Err)
}
if calls[0].Result != "partial output" {
t.Errorf("audit call.Result=%q, want 'partial output' (partial captured)", calls[0].Result)
}
}
// TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted documents the
// behaviour at the wrapper boundary: when the LLM sends malformed JSON
// args, llm.Define's Execute fails BEFORE the wrapper's inner closure
// runs. The wrapper does NOT emit an audit row in that case — it never
// got the chance. This is intentional: arg-parse failure is a
// tool-call wiring problem, not a tool-handler problem; the audit log
// reflects what the handler did, and on parse failure no handler ran.
//
// The test exists so future readers see this invariant documented in
// code and don't re-introduce a "log everything" path that breaks the
// wrapper's contract with the audit storage layer.
func TestNewGatedTool_ArgsParseHandledByLLM_NoAuditEmitted(t *testing.T) {
tool := NewGatedTool[gatedTestParams](
"gated_parse_err_tool",
"A test tool that should never receive bad JSON.",
Permission{
AuthoringRequirement: RequirementAnyone,
OperatesOn: ScopeGlobal,
SafeForShare: true,
},
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
t.Fatalf("fn ran despite malformed JSON — should never happen")
return "", nil
},
)
rec := &recordingAudit{}
_, err := buildAndExecute(t, tool,
Invocation{SkillName: "any-skill"},
VisibilityPrivate, rec.hook(),
`{"question":not-quoted}`) // intentionally malformed
if err == nil {
t.Fatalf("expected JSON parse error, got nil")
}
if calls := rec.snapshot(); len(calls) != 0 {
t.Errorf("audit emitted %d calls on parse error; expected 0 (parse-fail is pre-handler)", len(calls))
}
}
// TestIsGatedTool_DetectsWrapped confirms that NewGatedTool's return
// value satisfies the gatedToolMarker interface so the meta-test can
// distinguish wrapped from unwrapped tools.
func TestIsGatedTool_DetectsWrapped(t *testing.T) {
tool := NewGatedTool[gatedTestParams](
"gated_marker_tool", "marker test",
Permission{AuthoringRequirement: RequirementAnyone},
func(ctx context.Context, inv Invocation, args gatedTestParams) (string, error) {
return "", nil
},
)
if !IsGatedTool(tool) {
t.Fatalf("IsGatedTool returned false for a NewGatedTool result")
}
}
// TestIsGatedTool_DetectsNonWrapped is the negative half of the
// detection test: a hand-rolled Tool that does NOT go through
// NewGatedTool must fail IsGatedTool. This guards the meta-test
// against trivially passing for everything.
func TestIsGatedTool_DetectsNonWrapped(t *testing.T) {
stub := manualToolStub{}
if IsGatedTool(stub) {
t.Fatalf("IsGatedTool returned true for a non-wrapped Tool — detection broken")
}
}
// manualToolStub satisfies skilltools.Tool by hand without going
// through NewGatedTool. Used only to prove IsGatedTool rejects
// non-wrapped implementations.
type manualToolStub struct{}
func (manualToolStub) Name() string { return "manual_stub" }
func (manualToolStub) Description() string { return "manual stub" }
func (manualToolStub) Permission() Permission { return Permission{} }
func (manualToolStub) BuildLLM(Invocation) llm.Tool {
type p struct{}
return llm.DefineTool("manual_stub", "manual stub",
func(ctx context.Context, _ p) (any, error) { return "", nil })
}
// TestNewGatedToolWithAudit_RedactsAuditResult covers the variant used
// by paste_create: the LLM receives a sensitive string (e.g. URL with
// fragment-encoded key) but the audit row records only a redacted
// summary. Confirms LLMResult ↔ AuditResult separation works.
func TestNewGatedToolWithAudit_RedactsAuditResult(t *testing.T) {
tool := NewGatedToolWithAudit[gatedTestParams](
"audited_tool",
"A tool whose audit result is redacted from its LLM result.",
Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true},
func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
return AuditedResult{
LLMResult: "secret-fragment-12345",
AuditArgs: "redacted",
AuditResult: "[redacted]",
}, nil
},
)
if !IsGatedTool(tool) {
t.Fatalf("audited variant must satisfy IsGatedTool")
}
rec := &recordingAudit{}
out, err := buildAndExecute(t, tool,
Invocation{SkillName: "any"},
VisibilityPrivate, rec.hook(),
`{"question":"x"}`)
if err != nil {
t.Fatalf("execute: %v", err)
}
if out != "secret-fragment-12345" {
t.Errorf("LLM saw %q, want secret-fragment-12345", out)
}
calls := rec.snapshot()
if len(calls) != 1 {
t.Fatalf("expected 1 audit call, got %d", len(calls))
}
if calls[0].Args != "redacted" {
t.Errorf("audit args=%q, want redacted", calls[0].Args)
}
if calls[0].Result != "[redacted]" {
t.Errorf("audit result=%q, want [redacted]", calls[0].Result)
}
if strings.Contains(calls[0].Result, "secret-fragment-12345") {
t.Fatalf("audit leaked LLM result into Result field: %q", calls[0].Result)
}
}
// TestNewGatedToolWithAudit_GateRejection mirrors the gate-rejection
// test for the default wrapper to anchor the same contract for the
// audited variant.
func TestNewGatedToolWithAudit_GateRejection(t *testing.T) {
tool := NewGatedToolWithAudit[gatedTestParams](
"audited_gated_tool", "gated tool",
Permission{
AuthoringRequirement: RequirementAnyone,
SkillNameGate: "my-skill",
},
func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
t.Fatalf("fn should not run on gate rejection")
return AuditedResult{}, nil
},
)
rec := &recordingAudit{}
_, err := buildAndExecute(t, tool,
Invocation{SkillName: "other"},
VisibilityPrivate, rec.hook(),
`{}`)
if err == nil || !strings.Contains(err.Error(), "restricted to") {
t.Fatalf("expected gate rejection, got %v", err)
}
calls := rec.snapshot()
if len(calls) != 1 || calls[0].Err == nil {
t.Fatalf("expected gate-rejection audit row, got %+v", calls)
}
}
// TestNewGatedToolWithAudit_FallbackArgs verifies that an empty
// AuditArgs falls back to the JSON-marshaled typed args (matching the
// default wrapper's behaviour).
func TestNewGatedToolWithAudit_FallbackArgs(t *testing.T) {
tool := NewGatedToolWithAudit[gatedTestParams](
"audited_fallback_tool", "fallback args test",
Permission{AuthoringRequirement: RequirementAnyone},
func(ctx context.Context, inv Invocation, args gatedTestParams) (AuditedResult, error) {
return AuditedResult{
LLMResult: "ok",
AuditResult: "ok",
// AuditArgs intentionally empty
}, nil
},
)
rec := &recordingAudit{}
_, err := buildAndExecute(t, tool,
Invocation{SkillName: "x"},
VisibilityPrivate, rec.hook(),
`{"question":"hi"}`)
if err != nil {
t.Fatalf("execute: %v", err)
}
calls := rec.snapshot()
if len(calls) != 1 {
t.Fatalf("expected 1 audit call, got %d", len(calls))
}
if !strings.Contains(calls[0].Args, "hi") {
t.Errorf("expected fallback to JSON args containing 'hi', got %q", calls[0].Args)
}
}
+44
View File
@@ -0,0 +1,44 @@
package tool
import "fmt"
// CheckGate returns an error if the invocation context's SkillName does
// not match the tool's gate. Tools should call this at the top of their
// handler when their Permission has a non-empty SkillNameGate.
//
// Why: the gate is enforced per-call (not per-build) because the same
// Tool may be referenced by multiple skills, only one of which is
// gate-eligible. Build cannot know in advance which skill will call it
// — that's per-Invocation.
//
// What: returns nil if no gate, or the names match. Returns an error
// suitable for surfacing to the LLM as the tool's failure result.
func CheckGate(inv Invocation) error {
if inv.gate == "" {
return nil
}
if inv.currentSkill == inv.gate {
return nil
}
return fmt.Errorf("tool %q is restricted to the %q skill", inv.toolName, inv.gate)
}
// EmitAudit forwards a tool's call+result to the audit hook, if one is
// wired. Tools should call this once per Execute, after the underlying
// work has completed (regardless of error). Pass the original args
// JSON, the result string, and any error.
//
// Why: keeping the audit emission inside the tool ensures the captured
// args are exactly what the tool ran with (after coercion / defaults),
// not the raw LLM JSON which can drift.
func EmitAudit(inv Invocation, args, result string, err error) {
if inv.audit == nil {
return
}
inv.audit(AuditCall{
Tool: inv.toolName,
Args: args,
Result: result,
Err: err,
})
}
+121
View File
@@ -0,0 +1,121 @@
// Package skilltools — hmac.go: HMAC-SHA256 signature verification
// for the v7 inbound webhook subsystem.
//
// Why a small util in pkg/skilltools (vs inline in skillsui): the
// signature format is part of the skill platform's public contract —
// callers (GitHub, monitoring, Stripe, etc) compute it client-side,
// and other parts of mort may eventually verify the same shape (e.g.
// outbound retry verification). A shared util means we test the
// verifier once and the format stays consistent.
//
// Format:
//
// X-Mort-Signature: sha256=<hex(HMAC-SHA256(secret, body))>
// X-Mort-Timestamp: <unix-seconds>
//
// The timestamp is included so a stolen payload+signature pair can't
// be replayed indefinitely. Default skew window is 5 min via the
// caller-supplied maxSkew. The body is verified verbatim — callers
// must NOT canonicalise (the LLM-supplied JSON shape is usually
// unstable on round-trip).
package tool
import (
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
"errors"
"strconv"
"strings"
"time"
)
// HMAC-related sentinel errors. Callers compare with errors.Is so
// handler code can surface the right HTTP status (401 vs 400).
var (
// ErrHMACBadFormat is returned when the signature header is not
// the expected "sha256=<hex>" form.
ErrHMACBadFormat = errors.New("hmac: bad signature format")
// ErrHMACBadSignature is returned when the computed HMAC does
// not match the supplied signature (constant-time compare).
ErrHMACBadSignature = errors.New("hmac: signature mismatch")
// ErrHMACBadTimestamp is returned when the timestamp header is
// missing, malformed, or outside the maxSkew window.
ErrHMACBadTimestamp = errors.New("hmac: bad or stale timestamp")
// ErrHMACEmptySecret is returned when verification is requested
// but the secret is empty — a programmer error (caller should
// have rejected the request earlier).
ErrHMACEmptySecret = errors.New("hmac: empty secret")
)
// SignBody returns the canonical signature value for the given body
// + secret. Used by the test-payload sender on the management page.
//
// Why exported: the management page's "send test payload" button needs
// to compute the signature server-side before POSTing; reusing the
// same function ensures the verifier and signer stay in lock-step.
func SignBody(body []byte, secret string) string {
mac := hmac.New(sha256.New, []byte(secret))
mac.Write(body)
return "sha256=" + hex.EncodeToString(mac.Sum(nil))
}
// VerifyHMAC checks the signature + timestamp against the body using
// the supplied secret. Returns nil on success or one of the sentinels.
//
// Why hmac.Equal (constant-time): a naive == leaks signature length
// information through timing — VerifyHMAC must be safe against
// length-extension and timing oracle attacks.
//
// Why the timestamp is part of the verification (not the body): the
// signature does NOT cover the timestamp itself (callers may rotate
// timestamps without re-signing). The timestamp is a separate
// freshness check; if you wanted timestamp-bound replay protection
// you'd include it in the signed payload — but that complicates the
// signing API for callers and the per-skill rate limiter is the
// real defence against rapid replay.
func VerifyHMAC(body []byte, signature, timestamp, secret string, maxSkew time.Duration) error {
if secret == "" {
return ErrHMACEmptySecret
}
// Timestamp first — cheap reject before the HMAC compute.
if timestamp != "" {
ts, err := strconv.ParseInt(strings.TrimSpace(timestamp), 10, 64)
if err != nil {
return ErrHMACBadTimestamp
}
if maxSkew > 0 {
now := time.Now().Unix()
if abs(now-ts) > int64(maxSkew.Seconds()) {
return ErrHMACBadTimestamp
}
}
}
// Signature format: "sha256=<hex>"
const prefix = "sha256="
if !strings.HasPrefix(signature, prefix) {
return ErrHMACBadFormat
}
provided, err := hex.DecodeString(signature[len(prefix):])
if err != nil {
return ErrHMACBadFormat
}
mac := hmac.New(sha256.New, []byte(secret))
mac.Write(body)
expected := mac.Sum(nil)
if !hmac.Equal(provided, expected) {
return ErrHMACBadSignature
}
return nil
}
// abs returns |x| for int64. Avoids importing math for one call.
func abs(x int64) int64 {
if x < 0 {
return -x
}
return x
}
+167
View File
@@ -0,0 +1,167 @@
package tool
import (
"reflect"
"strings"
"testing"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// TestOutputPatternMetaTest enforces the V10 byte-vs-reference
// principle: any tool's typed return shape MUST NOT contain a raw
// []byte field that lacks a documented cap. Inline byte fields blow
// the agent's context window — the right pattern is to return a
// file_id reference.
//
// What this catches:
// - A future tool author returning {"data": []byte("...")} inline.
// - A reflective walk that sees `[]byte` or `Bytes` named fields
// with no annotation flagging them as size-capped.
//
// What this DOES NOT catch (acceptable trade-off):
// - Base64-encoded byte fields hidden as `string` (e.g. file_get's
// content_base64). The agent author can still misuse those, but
// existing code is grandfathered — the new pattern is to use
// file_get_metadata + file_get_text + send_attachments instead.
// - Tools whose outputs are JSON-marshalled at the LLM boundary; the
// check operates on the GO RETURN TYPES, not the wire JSON. That's
// fine because Go authors can't accidentally introduce []byte at
// marshal time.
//
// The test walks llm.Tool's exposed result type (where available)
// and Permission.Categories so future binary tools must label
// themselves with "binary" + return file_id-shaped envelopes.
//
// Currently this is a forward-looking contract — the existing tools
// emit JSON-string results from the typed gated wrappers, and the result
// type is opaque. We assert here that no STARTER tool registers a
// `[]byte`-shaped public Args (which is the foot-gun for input), and
// document the principle for new authors.
func TestOutputPatternMetaTest_NoRawByteArgs(t *testing.T) {
r := NewRegistry()
// We don't have access to deps here; a tool author wishing to
// enforce can run the same walk on their concrete Registry. The
// test asserts NewRegistry() produces an empty registry that the
// production wiring populates via tools.RegisterDefaults — and we
// re-enforce the principle in pkg/skilltools/tools/default_test.go
// where the live tools are registered.
for _, tool := range r.List() {
assertNoRawByteArgs(t, tool)
}
}
// assertNoRawByteArgs reflects on the tool's BuildLLM result and walks
// its declared Args struct to fail when a public field is a raw []byte.
//
// Why public-fields-only: private fields can't be set by the LLM, so
// they're not a concern.
func assertNoRawByteArgs(t *testing.T, tool Tool) {
t.Helper()
llmTool := tool.BuildLLM(Invocation{})
// Use reflection on the tool's call signature. The built llm.Tool
// exposes only a JSON schema derived from a Go type —
// we don't need to deconstruct it here; the existing meta-tests
// in pkg/skilltools/tools/default_test.go already enforce
// IsGatedTool(tool), and the gated wrappers are typed via
// generics. New authors should use NewGatedTool[ArgsStruct] which
// makes raw []byte impossible to declare without compile-time
// awareness.
_ = llmTool
}
// TestBinaryContentTypeRecognition ensures the content-type
// classifier (used by http_get's V10 binary persistence path) picks
// up the content types that motivated the v10 change. Adding a new
// MIME to the binary list requires updating this test alongside the
// classifier so the meta-test stays load-bearing.
func TestBinaryContentTypeRecognition(t *testing.T) {
tests := []struct {
ct string
want bool
comment string
}{
{"image/png", true, "image"},
{"image/jpeg; charset=binary", true, "image with parameter"},
{"audio/mpeg", true, "audio"},
{"video/mp4", true, "video"},
{"application/pdf", true, "pdf"},
{"application/octet-stream", true, "octet-stream"},
{"application/zip", true, "zip"},
{"text/plain", false, "text"},
{"text/html; charset=utf-8", false, "html"},
{"application/json", false, "json"},
{"application/xml", false, "xml"},
{"", false, "empty"},
}
for _, tt := range tests {
t.Run(tt.comment, func(t *testing.T) {
got := isBinaryContentTypeForTest(tt.ct)
if got != tt.want {
t.Fatalf("ct=%q got=%v want=%v", tt.ct, got, tt.want)
}
})
}
}
// isBinaryContentTypeForTest mirrors tools.isBinaryContentType but is
// duplicated here so the package-level meta-test doesn't import
// tools/. The two MUST stay in sync — the test in pkg/skilltools/tools
// covers the production helper directly via end-to-end http_get tests.
func isBinaryContentTypeForTest(ct string) bool {
ct = strings.ToLower(strings.TrimSpace(ct))
if i := strings.Index(ct, ";"); i >= 0 {
ct = strings.TrimSpace(ct[:i])
}
if ct == "" {
return false
}
if strings.HasPrefix(ct, "image/") ||
strings.HasPrefix(ct, "audio/") ||
strings.HasPrefix(ct, "video/") {
return true
}
switch ct {
case "application/octet-stream", "application/pdf", "application/zip",
"application/x-tar", "application/x-gzip", "application/x-bzip2",
"application/x-7z-compressed", "application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation":
return true
}
return false
}
// TestArgsStructsHaveNoRawBytes is the static-typed check the v10
// principle relies on: NewGatedTool[Args] is the public surface, and
// Args structs MUST NOT carry a `[]byte`. We can't enumerate all
// Args types from outside (they're per-tool generics), so the
// production check is in pkg/skilltools/tools/default_test.go which
// reflects on every registered tool's BuildLLM args via the
// schema generator (llm.SchemaFor).
//
// This test asserts the documented principle compiles by referencing
// it: the `bytesForbiddenSentinel` type below intentionally contains
// `[]byte` and the test marks it as a known antipattern.
func TestArgsStructsHaveNoRawBytes(t *testing.T) {
tp := reflect.TypeOf(bytesForbiddenSentinel{})
if tp.NumField() != 1 || tp.Field(0).Type.Kind() != reflect.Slice {
t.Fatalf("sentinel shape unexpected")
}
// Documenting: this is the SHAPE we forbid. Future authors who
// see a CodeReview comment pointing at this test can read the
// principle here and the doc in CLAUDE.md. (majordomo's SchemaFor
// encodes []byte as a base64 string on the wire, which is exactly
// the inline-bytes foot-gun the v10 principle bans.)
_, _ = llm.SchemaFor[bytesForbiddenSentinel]()
}
// bytesForbiddenSentinel is the antipattern shape for tool Args. The
// meta-test references this so a developer searching for "[]byte" in
// the codebase finds the explanation immediately.
type bytesForbiddenSentinel struct {
Data []byte `json:"data" description:"DO NOT USE: raw bytes in Args blow the LLM's context window. Use file_id references via file_save / file_get_text / file_get_metadata / send_attachments instead."`
}
+701
View File
@@ -0,0 +1,701 @@
// Package skilltools is the tool registry for the agentic skills platform.
// Tools registered here can be referenced by name from a Skill's Tools
// list and are surfaced to the underlying majordomo agent loop via Build().
//
// Independent of pkg/logic/chatbot/tool_provider.go: the chatbot's
// ToolProvider supplies tools per-channel during a chatbot turn; skill
// tools are scoped to one skill execution. Bridging happens once, in
// pkg/logic/skills/chatbot_provider.go, which exposes whole agent skills
// as chatbot tools (not individual skill tools).
//
// Permission model is documented in
// docs/superpowers/specs/2026-05-02-agentic-skills-design.md, "Tool
// registry" section. Three orthogonal checks:
//
// 1. Save-time: AuthoringRequirement vs caller's admin status.
// 2. Share-time: SafeForShare for visibility != private.
// 3. Execute-time: SkillNameGate.
package tool
import (
"context"
"fmt"
"sync"
"time"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// Visibility is the spec's visibility enum mirrored here as a typed
// string. It's redeclared (vs imported from pkg/logic/skills) to break
// the import cycle that would otherwise form: skills → skilltools →
// skills. The string values match Visibility one-to-one so a
// caller can pass `string(VisibilityPublic)` and it just works.
type Visibility string
const (
VisibilityPrivate Visibility = "private"
VisibilityShared Visibility = "shared"
VisibilityPublic Visibility = "public"
)
// Tool is what a registry entry implements. Concrete tools wrap an
// underlying mort subsystem (e.g. wolfram, weather, paste) and produce
// an llm.Tool on demand for a given Invocation.
//
// Why an interface (vs majordomo's concrete llm.Tool): we need richer
// metadata (Permission, Categories, SkillNameGate) for the platform's
// gating logic before we hand the tool to majordomo. BuildLLM converts
// to llm.Tool for one execution, closing over the Invocation so the
// per-tool handler can read CallerID/ChannelID without further plumbing.
//
// Why BuildLLM-per-call (vs static llm.Tool): per-user tools must close
// over inv.CallerID — the LLM-supplied args are intentionally ignored
// for those. Constructing the llm.Tool inside BuildLLM lets each tool
// craft its own typed Define call while reading the invocation context.
//
// Test: each tool under pkg/skilltools/tools/ has its own *_test.go.
type Tool interface {
Name() string
Description() string
Permission() Permission
// BuildLLM produces the llm.Tool for one invocation. The returned
// tool's name MUST equal Name(); the registry's Build() relies on
// this when wiring multiple tools into a Toolbox.
BuildLLM(inv Invocation) llm.Tool
}
// Permission summarises the three lifecycle gates plus UI metadata.
type Permission struct {
// AuthoringRequirement governs who may SAVE a skill that uses
// this tool: anyone or admin-only.
AuthoringRequirement Requirement
// OperatesOn classifies whose data the tool reads: global
// (channel-wide, public sources) or caller (the invoking user's
// own data).
OperatesOn Scope
// SafeForShare reports whether the tool may appear in a shared or
// public skill. Tools that operate on caller data are typically
// not safe for share — the executing skill becomes a vector for
// reading other users' data.
SafeForShare bool
// Categories are free-form labels used for UI grouping (read,
// write, network, code, data, social). Code does NOT branch on
// these strings.
Categories []string
// SkillNameGate, if non-empty, restricts execution to the named
// skill. Used for wizard-only tools in v2; SkillNameGate=="" means
// any skill may use the tool.
SkillNameGate string
}
// Requirement is who is allowed to author a skill using this tool.
type Requirement string
const (
RequirementAnyone Requirement = "anyone"
RequirementAdmin Requirement = "admin"
)
// Scope classifies the data domain a tool acts on.
type Scope string
const (
ScopeGlobal Scope = "global"
ScopeCaller Scope = "caller"
)
// ContinuationContext describes a V10 reply continuation. When set on
// an Invocation, the skill executor reuses the parent run's KV scope,
// renders a continuation prompt, and bumps ChainDepth for cap
// enforcement.
//
// The executor reads ParentRunID to set the new run's parent_run_id
// column (for call-tree reconstruction); ParentOutput to render the
// "previous output you sent" line in the agent prompt; ReplyText to
// render the "user replied with" line; ReplyMessageID for diagnostic
// logging; and ChainDepth to compare against
// skills.reply.max_chain_depth.
//
// Why ChainDepth (vs walking parent_run_id at execution time): a fresh
// query per turn would add a DB roundtrip on every reply hop. Carrying
// the count in the invocation is cheap and authoritative.
type ContinuationContext struct {
// ParentRunID is the run that produced the message the user
// replied to. The new run inherits its KV scope (run:<ParentRunID>).
ParentRunID string
// ParentOutput is the text the parent run delivered to Discord —
// stored on the run row so it survives even if the parent's
// run-scope KV has been auto-purged (24h after parent finished).
ParentOutput string
// ReplyText is what the user said when they replied (the new
// turn's user input). May be empty if the reply was an attachment-
// only message (handle gracefully — agent should handle empty
// input as a "noop continuation").
ReplyText string
// ReplyMessageID is the Discord message ID of the user's reply.
// Used for audit + log breadcrumbs; not currently consumed by the
// agent prompt.
ReplyMessageID string
// ChainDepth is how many continuation hops have happened in the
// chain rooted at the original invocation. The router should set
// this to (parent's chain depth + 1). The executor rejects when
// it exceeds skills.reply.max_chain_depth.
ChainDepth int
}
// InputFile is a non-image file the user supplied with a run (audio,
// etc.). The executor stages it into the file store under run scope and
// surfaces its file_id to the agent. Name is a safe base name (no path
// separators) suitable for /workspace/<name>; MimeType is the resolved
// content type; Data is the raw bytes.
type InputFile struct {
Name string
MimeType string
Data []byte
}
// Invocation is the runtime context passed to Tool.BuildLLM. The executor
// builds it once per skill run and the same struct is closed over by
// every tool's handler, so each tool sees the caller / channel identity.
type Invocation struct {
SkillID string
SkillName string
RunID string
CallerID string
ChannelID string
GuildID string
// CallerIsAdmin is true when the caller is a mort admin (Member.Admin).
// Populated by the executor at run dispatch via Bot.GetMember; defaults
// to false on any lookup failure (member not found, DB error, empty
// CallerID for system-invoked runs). Read by tools that gate behaviour
// on admin status — currently `code_exec` for the v15 admin-only WAN
// network mode.
//
// Why a precomputed bool on Invocation (vs an AdminChecker dep on
// every tool): the admin lookup is read-once-per-run; every tool
// would otherwise have to redo the work. The executor knows the
// caller's admin status by the time it builds Invocation, so it
// stamps the field once and every tool reads it for free.
CallerIsAdmin bool
// SkillInputs is the parsed input map for the enclosing skill —
// available so a tool can reference values the user supplied at
// invocation time. Tools may read this to specialise behaviour but
// MUST NOT use it as a substitute for inv.CallerID-based isolation.
SkillInputs map[string]any
// ParentRunID is set when the skill was invoked via skill_invoke
// from a parent skill run. Empty for top-level invocations
// (Discord, chatbot, scheduler). Used by the loop guard in
// skill_invoke and by the audit log for call-tree reconstruction.
//
// Why threaded through Invocation (vs context.Value): the loop
// guard runs at tool-handler time, where the only context the
// handler sees is inv. Stuffing it into context would force a
// helper for unwrap on every read; an explicit field is easier to
// audit and impossible to forget.
ParentRunID string
// RootRunID is the audit run id at the ROOT of the dispatch tree
// this run belongs to — for a top-level run, its own RunID; for a
// delegated run (skill_invoke / agent_invoke / agent_spawn /
// palette wrappers), the outermost ancestor's. Stamped by both
// executors from the dispatchguard ancestor chain right after
// guard entry. Backs the shared `root_run:<id>` KV scope that lets
// parallel sibling workers coordinate (see tools/scope_validate.go
// + RootRunKVPartition).
RootRunID string
// ToolsSubset, when non-empty, narrows an AGENT run's low-level tools
// to the named subset of the agent's configured LowLevelTools. Set by
// agent_invoke's `tools_subset` arg for ephemeral fan-out — spawning a
// focused worker from a template (e.g. a `coder` template with only
// code_exec + read_page). Names outside the agent's tool menu are
// rejected upstream (in the invoke adapter), so by the time the
// executor reads this the intersection is safe. Empty = full palette.
// Skill runs ignore this field.
ToolsSubset []string
// SystemPromptPrepend, when non-empty, is prepended to an AGENT's
// system prompt for this invocation only — the fan-out "customized
// system prompt" lever (agent_invoke's `prompt_prepend` arg). It
// specializes a template persona to a task without mutating the
// persisted agent row. Skill runs ignore this field.
SystemPromptPrepend string
// SuppressDelivery, when true, instructs the skill executor to
// SKIP its OutputTarget Delivery (Deliver / DeliverError) entirely.
// The run still produces an output string (returned from Run) and
// still writes to the audit log — only the side-channel delivery
// (Discord channel/DM/thread post) is suppressed.
//
// Why: when the chatbot exposure adapter invokes a skill, the skill's
// output is already going to be consumed by the chatbot as a tool
// result; ALSO posting it to Discord via OutputTarget produces double
// output and (worse) primes the chatbot to call the tool again on
// the next turn after seeing its own output as a "human message",
// kicking off a tool-loop. The chatbot adapter sets this to true on
// every invocation it constructs.
SuppressDelivery bool
// HandlerOwnsDelivery, when true, tells the executor that the caller
// (typically a Discord command handler) will assemble the final
// user-visible reply itself — folding any deferred attachments
// (rows queued by send_attachments to skill_run_pending_attachments)
// into the same message as the text output. The executor's
// post-run AttachmentDrainer is skipped so the handler can drain +
// classify + chain-overflow + post in one place.
//
// Why an explicit flag (vs reusing SuppressDelivery): SuppressDelivery
// also short-circuits the OutputTarget Delivery layer (channel/dm/
// thread post), which is the right shape for chatbot exposure but
// the WRONG shape for `.agent run` — the handler still wants the
// audit row to land and the executor's drainer to NOT post a
// separate "here's an image" follow-up message after the handler's
// own text reply. HandlerOwnsDelivery is the narrow "the caller is
// taking over post-run delivery" signal that does NOT change any
// other executor behaviour.
//
// SuppressDelivery and HandlerOwnsDelivery are independent. The
// drainer is skipped when EITHER is set (the chatbot path doesn't
// want stray posts either; agent-run sets HandlerOwnsDelivery
// because it owns delivery; sub-agent dispatches set SuppressDelivery
// because they surface output as a tool result).
HandlerOwnsDelivery bool
// Priority is the v9 per-invocation priority override for the lane
// scheduler. When non-zero, the executor uses this value when
// constructing the lane Job; zero falls back to the skill's
// Skill.DefaultPriority. Owners are capped by convar
// `skills.priority_max_per_user` (default 5); admins may exceed it.
//
// Why a non-pointer (vs *int): zero means "use the default", which
// matches the convention everywhere else in this struct. Skills
// that need an explicit zero priority can store
// DefaultPriority=0 — the result is identical.
Priority int
// LaneWaitMaxSeconds is the v9 per-invocation lane backoff cap. When
// >0, the executor calls SubmitWithMaxWait so the run is rejected
// with ErrLaneBusy (surfaced as `lane_busy`) when the estimated
// queue wait would exceed this many seconds. 0 (default) preserves
// the legacy block-forever Submit semantics.
LaneWaitMaxSeconds int
// LaneOverride forces the run onto the named lane regardless of
// Skill.ExecutionLane. Used by the v9 inbound webhook handler to
// route webhook-triggered runs to the dedicated webhook-default
// lane. Empty preserves the per-skill ExecutionLane.
LaneOverride string
// Continuation, when non-nil, signals that this Invocation is a
// V10 reply continuation: a Discord user replied to a message the
// originating skill posted, and mort is re-invoking the skill to
// produce the next turn. The executor reads this field to:
//
// - Reuse the parent run's `run:<parent_run_id>` KV scope (so any
// state the prior turn saved is still readable).
// - Render a continuation block at the top of the agent's user
// prompt that includes the parent output + reply text.
// - Enforce the per-deployment chain-depth cap
// (skills.reply.max_chain_depth, default 20).
// - Stamp parent_run_id on the new run for call-tree
// reconstruction in audit + UI.
//
// Why a pointer struct (vs flat fields): all five fields are
// meaningful only together — splitting them would invite
// half-populated states. nil = "this is a fresh invocation, not a
// continuation".
Continuation *ContinuationContext
// SourceWebhookSecretMatched is set true by the inbound webhook
// handler AFTER it has validated both the URL secret AND the HMAC
// signature for the named skill. It signals to System.Run that the
// caller is authenticated by a per-skill secret (not by Discord
// identity), so the visibility / owner gate in CanInvoke should be
// bypassed for THIS skill (matching SkillID). All other gates —
// pinned_version, budget caps, lane caps — still apply.
//
// Hotfix-5 Bug 1: pre-fix the webhook handler built an Invocation
// with CallerID=`<webhook>:<source-IP>` and dispatched through
// System.Run. CanInvoke saw a non-owner non-admin caller against a
// private skill and rejected with HTTP 500 ("caller is not
// permitted to invoke skill"). The cure isn't to weaken
// CanInvoke's general-purpose policy — it's to recognise that a
// matched secret IS the auth gate for the named skill.
//
// Why per-Invocation (vs a separate gate path): the executor uses
// Run as the single canonical dispatch point — adding a second
// "authenticated dispatch" entry would split run-recording, lane
// dispatch, and audit emission into two parallel implementations.
SourceWebhookSecretMatched bool
// OnEvent, when non-nil, is called by the executor at run
// boundaries and by the agent loop on each tool dispatch. The
// bot's command handler closes over the invoking message and
// reacts an emoji from the skill's StateReactEmoji map. Nil-safe.
//
// Event names:
// "__start__" — right before agent.Run starts
// "__end__" — on successful completion
// "__error__" — on terminal error
// <tool_name> — when a tool dispatches (any registered tool)
//
// The executor passes the resolved emoji as `emoji` so callers
// don't have to look it up themselves; emoji=="" means "no react
// for this event" and callers should skip the react entirely.
//
// Why a callback (vs a state-react map carried in the Invocation):
// the lookup table lives on the Skill, not the Invocation, but the
// caller-supplied side effect (a Discord react) lives on the bot
// command surface. A callback bridges the two without forcing the
// executor to import discord types and without forcing the bot
// command surface to know about the Skill's emoji map shape.
OnEvent func(ctx context.Context, event string, emoji string)
// OnToolEvent, when non-nil, is called by the executor on each tool
// dispatch with phase "start" (before the tool runs) then "end" or
// "error" (after it completes, with the result text in detail). Distinct
// from OnEvent (which is the emoji state-react hook): this carries the
// tool name + args/result so an out-of-band caller — e.g. the mortise
// chat API streaming SSE tool.start/tool.end frames — can surface live
// tool-progress. Nil-safe; the callback MUST be fast and non-blocking
// (it runs on the agent-loop goroutine).
OnToolEvent func(ctx context.Context, toolName, phase, detail string)
// OnStep, when non-nil, is called by the executor as the agent loop
// makes progress — currently once per tool call: phase "start" before
// the tool runs, phase "end" after it completes (StepEvent.Step.Status
// is "complete" or "error"). Correlate the two by StepEvent.Step.ID.
// "delta" is reserved for progressive detail and is unused today.
//
// Distinct from OnToolEvent (the raw tool-name/result hook): OnStep
// carries a richer, presentation-ready Step (kind + human present-tense
// summary) so an out-of-band consumer — e.g. the mortise chat API
// streaming SSE step.start/step.end frames — can render structured
// progress without re-deriving it. The executor ALSO accumulates the
// same Steps onto its run Result, so persistence does not depend on
// this callback being set. Nil-safe; the callback MUST be fast and
// non-blocking (it runs on the agent-loop goroutine).
OnStep func(ctx context.Context, ev StepEvent)
// InvokingMessageID is the Discord message ID of the user's command
// that triggered this run, when it was triggered by a Discord text
// command. Used by delivery to thread the reply (Discord native
// reply with the gray quote bar + jump link). Empty for chatbot
// exposure, scheduled, or webhook invocations — delivery falls
// back to a plain channel post for those.
//
// Why threaded through Invocation (vs a separate field on Skill or
// a magic SkillInputs key): the message ID is per-invocation, not
// per-skill, and the delivery layer is the natural reader. Direct
// field on Invocation matches the existing ChannelID / GuildID
// fields' shape.
InvokingMessageID string
// Images carries multi-modal image content for the initial user
// message. When non-empty, the executor builds the initial user
// message with llm.UserParts(text + image parts) instead of plain
// llm.UserText. Populated by callers that extract images from Discord
// attachments or URLs in prompt text (pkg/imageutil downloads the
// bytes — majordomo image parts are bytes-only). Nil = text-only.
Images []llm.ImagePart
// InputFiles carries non-image attachments (audio, etc.) the user
// supplied with the run. Unlike Images, these are NOT inlined into
// the model's context — the LLM can't ingest raw mp3/wav/midi bytes.
// Instead the executor stages each into the skill file store under
// run scope and tells the agent the resulting file_ids (in the
// prompt) so it can hand one to a worker tool (e.g. code_exec
// files_in → /workspace/<name>) for processing. Nil = none.
InputFiles []InputFile
// ExtraTools are additional llm.Tool instances injected for this
// run only. They are appended to the palette after registry-built
// tools, skill-palette wrappers, and sub-agent wrappers. Use this
// for session-specific tools that cannot be pre-registered in the
// catalog (e.g., scaddy's write_scad which needs per-session
// workspace + renderer state).
//
// Why on Invocation (vs a dedicated Run parameter): the Invocation
// is the per-run context carrier in mort's execution path. Adding
// a separate ExtraTools arg to Executor.Run would fork the
// signature for one use case; a field on the existing carrier
// keeps the surface stable.
ExtraTools []llm.Tool
// SessionToolFactory, if set, is called with the live AgentSession
// after the executor constructs the agent but before it runs. It
// returns a SessionTools struct carrying the tools to add, an
// optional PostRun hook for post-processing (e.g., rendering final
// artifacts from workspace state), and an optional Cleanup func for
// resource teardown. Types are defined in session_tools.go.
//
// Why a factory (vs ExtraTools): ExtraTools are static — they
// don't have access to the running agent. Tools that need to call
// session.AttachImages (to show rendered previews to the model on
// its next turn) require the live session handle that only exists
// after construction. The factory receives that handle.
SessionToolFactory SessionToolFactory
// PostRunDelivery, if set, is called by the agent command handler
// (`.agent run`) INSTEAD of the default text + paste-fallback reply
// when the executor's result carries a PostRunResult. The callback
// receives the Discord message to reply to, the agent's text output,
// and the PostRunResult. It returns the message ID of the primary
// reply (for origin recording) and any error.
//
// Why a callback on Invocation (vs a handler method on the agent):
// delivery needs services (paste, filetransfer, Discord session)
// that live outside the agents package. A callback lets the adapter
// (e.g., scaddy) close over the services at factory-build time
// without adding service dependencies to the agents.System struct.
//
// When nil, `handleRun` falls through to the standard text-based
// reply path (formatRunReply + postRunReply). When set, the
// callback owns the ENTIRE reply — `handleRun` does NOT post a
// text reply alongside it.
PostRunDelivery func(ctx context.Context, channelID, replyToMsgID string, output string, prr *PostRunResult) (primaryMsgID string, err error)
// RunState, when set by the executor, lets a tool read the live
// run's progress + budget snapshot (iteration vs cap, tool calls,
// tokens, cost, elapsed). Nil on paths that do not provide it (e.g.
// the no-tools direct path, or executors that predate the hook).
// The skill_self_status tool reads this.
RunState RunStateAccessor
// AttachImages, when set by the executor, queues a user-role message
// (optional text + image parts) into the LIVE run so the model sees
// the images on its next step — the same steer-mailbox mechanism the
// SessionToolFactory's AgentSession exposes, but reachable from any
// ordinary tool handler. A tool returns text; images cannot ride a
// string result, so a tool that fetches images the model must SEE
// (e.g. discord_list_recent_messages reading channel history) calls
// this to feed the pixels in. Nil on paths that do not own a steer
// mailbox (skillexec, the no-tools direct path); tools MUST nil-check
// before calling and degrade to text-only when it is nil.
AttachImages func(text string, images ...llm.ImagePart)
// gate / audit are populated by the registry's Build before
// BuildLLM is called. Tools should call CheckGate(inv) at the top
// of their handler and EmitAudit(inv, ...) when reporting tool
// results. The fields are unexported in the public surface but
// available to tools via the helpers in helpers.go.
gate string
currentSkill string
audit AuditHook
toolName string
}
// RunState is a live, read-only snapshot of the current run's progress
// and budget. Populated on demand by the executor's per-run accessor
// (see Invocation.RunState).
type RunState struct {
Iteration int
MaxIterations int
ToolCalls int
MaxToolCalls int
InputTokens int64
OutputTokens int64
ThinkingTokens int64
ElapsedSeconds int
}
// RunStateAccessor returns the live RunState for the enclosing run. The
// executor builds one per run and stamps it on Invocation.RunState
// before the toolbox is built; tools read it via inv.RunState. Nil on
// any path that does not provide it.
type RunStateAccessor interface {
RunState() RunState
}
// Registry is the read interface to the tool catalog. Concrete impl is
// the package-private *registry struct returned by NewRegistry.
type Registry interface {
Register(t Tool) error
Get(name string) (Tool, bool)
List() []Tool
// Build returns an llm.Toolbox with each named tool prepared for
// execution against the given invocation. Save-time authoring
// checks happen elsewhere (CheckAuthoring in checks.go) — Build
// trusts that the skill was already saved past those gates and
// only re-checks runtime invariants:
//
// 1. Share-safety drift: rejects an unsafe tool when visibility
// != private.
// 2. SkillNameGate enforcement is delegated to the per-tool
// handler via CheckGate, which reads invocation context.
// 3. Audit emission via EmitAudit (also per-tool).
//
// The optional `trusted` variadic argument lets the caller declare
// the skill as trusted infrastructure (a builtin loaded from disk
// by the project's own loader) so the share-safety drift check is
// skipped. Builtins legitimately ship with public visibility AND
// not-safe-for-share tools (e.g. skill-wizard's wizard_* tools),
// and the loader bypasses save-time gates by design — applying the
// share-safety check at invocation would be inconsistent with the
// rest of the trusted-builtin contract. Pass true ONLY for builtins
// (Skill.Source == SourceBuiltin / OwnerID == ""). Variadic so the
// existing call sites (and tests) compile unchanged.
Build(names []string, inv Invocation, vis Visibility, audit AuditHook, trusted ...bool) (*llm.Toolbox, error)
}
// AuditHook is invoked synchronously around each tool call. Implementations
// typically forward to skillaudit.Writer. May be nil for tests.
type AuditHook func(call AuditCall)
// AuditCall describes one tool invocation. Result is set on success;
// Err is set on failure. Either may be present together (e.g. the tool
// returned partial output then errored).
type AuditCall struct {
Tool string
Args string
Result string
Err error
}
// Step is one unit of agent progress surfaced to a consumer of OnStep
// (and accumulated onto the executor's run Result). Today there is one
// Step per tool call; the shape is deliberately open so future kinds
// (a coalesced reasoning beat, a sub-agent delegation) slot in without a
// wire change.
//
// This is a plain DTO — no HTTP/Discord/JSON-tag coupling beyond the
// neutral snake_case tags a transport may reuse. The chat API converts
// it to its own persisted/wire type; Discord/cron consumers read the
// Result field directly.
type Step struct {
// ID is stable per-step and unique within one run; it is the
// correlation key between the "start" and "end" emissions.
ID string `json:"id"`
// Kind is an open vocabulary (search, read, code, image, file,
// memory, delegate, tool, …); consumers map known values to an icon
// and fall back for unknown ones. Never drop a step for an
// unrecognised kind.
Kind string `json:"kind"`
// Title is a short machine-ish label (typically the raw tool name).
Title string `json:"title,omitempty"`
// Summary is the human present-tense one-liner ("Searching the web
// for …"); on end it may be replaced with a result phrase.
Summary string `json:"summary"`
// Status is "running" | "complete" | "error".
Status string `json:"status"`
// Detail is optional, user-safe, size-capped markdown. Never raw tool
// output, credentials, or chain-of-thought.
Detail string `json:"detail,omitempty"`
// StartedAt is when the step began.
StartedAt time.Time `json:"started_at"`
// EndedAt is set on the terminal "end" emission.
EndedAt *time.Time `json:"ended_at,omitempty"`
}
// StepEvent is one live emission to OnStep. Phase is "start" or "end"
// ("delta" is reserved for progressive detail and unused today). Step
// carries the full current snapshot; Detail holds the delta text when
// Phase == "delta".
type StepEvent struct {
Phase string
Step Step
Detail string
}
// NewRegistry constructs an empty registry. Call Register for each tool;
// see pkg/skilltools/default_registry.go for the v1 set.
func NewRegistry() Registry {
return &registry{tools: make(map[string]Tool)}
}
type registry struct {
mu sync.RWMutex
tools map[string]Tool
}
func (r *registry) Register(t Tool) error {
if t == nil {
return fmt.Errorf("skilltools: nil tool")
}
name := t.Name()
if name == "" {
return fmt.Errorf("skilltools: tool with empty name")
}
r.mu.Lock()
defer r.mu.Unlock()
if _, dup := r.tools[name]; dup {
return fmt.Errorf("skilltools: duplicate tool name %q", name)
}
r.tools[name] = t
return nil
}
func (r *registry) Get(name string) (Tool, bool) {
r.mu.RLock()
defer r.mu.RUnlock()
t, ok := r.tools[name]
return t, ok
}
func (r *registry) List() []Tool {
r.mu.RLock()
defer r.mu.RUnlock()
out := make([]Tool, 0, len(r.tools))
for _, t := range r.tools {
out = append(out, t)
}
return out
}
// Build prepares an llm.Toolbox for one skill execution.
//
// Why: each tool needs to know the caller / channel / skill name plus
// the audit hook. Stuffing them into Invocation lets each Tool.BuildLLM
// produce a closure that has everything it needs without further
// plumbing.
//
// Defence in depth: rejects an unsafe tool when visibility != private —
// the share-time check should already have prevented this; this catches
// drift (e.g. a tool's SafeForShare flag flipping after a skill saved).
//
// The trusted variadic flag lets a caller bypass the share-safety drift
// check for builtin (trusted-infrastructure) skills. The mortventure /
// skill-wizard builtins legitimately ship with public visibility AND
// not-safe-for-share tools — the loader bypasses save-time gates and
// the share-safety check at invocation would block them inconsistently.
// Pass true ONLY for builtins.
func (r *registry) Build(names []string, inv Invocation, vis Visibility, audit AuditHook, trusted ...bool) (*llm.Toolbox, error) {
isTrusted := len(trusted) > 0 && trusted[0]
box := llm.NewToolbox("skilltools")
for _, name := range names {
t, ok := r.Get(name)
if !ok {
return nil, fmt.Errorf("skilltools: unknown tool %q", name)
}
if !isTrusted && vis != VisibilityPrivate && !t.Permission().SafeForShare {
return nil, fmt.Errorf("skilltools: tool %q is not safe for share but skill visibility is %s", name, vis)
}
// Populate the gate/audit fields on the Invocation so the tool
// can call CheckGate / EmitAudit from its handler.
toolInv := inv
toolInv.gate = t.Permission().SkillNameGate
toolInv.currentSkill = inv.SkillName
toolInv.audit = audit
toolInv.toolName = name
built := t.BuildLLM(toolInv)
if built.Name == "" {
return nil, fmt.Errorf("skilltools: tool %q built llm.Tool with empty name", name)
}
if err := box.Add(built); err != nil {
return nil, fmt.Errorf("skilltools: adding tool %q: %w", name, err)
}
}
return box, nil
}
+184
View File
@@ -0,0 +1,184 @@
package tool
import (
"context"
"strings"
"testing"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// fakeTool is a minimal Tool used to exercise the registry's gating.
type fakeTool struct {
name string
desc string
perm Permission
calledWith *Invocation
returnText string
returnError error
}
func (f *fakeTool) Name() string { return f.name }
func (f *fakeTool) Description() string { return f.desc }
func (f *fakeTool) Permission() Permission { return f.perm }
func (f *fakeTool) BuildLLM(inv Invocation) llm.Tool {
type emptyParams struct{}
return llm.DefineTool(
f.name,
f.desc,
func(ctx context.Context, _ emptyParams) (any, error) {
if err := CheckGate(inv); err != nil {
EmitAudit(inv, "{}", "", err)
return "", err
}
f.calledWith = &inv
EmitAudit(inv, "{}", f.returnText, f.returnError)
return f.returnText, f.returnError
},
)
}
func TestRegister_DuplicateRejected(t *testing.T) {
r := NewRegistry()
a := &fakeTool{name: "x", perm: Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true}}
b := &fakeTool{name: "x", perm: Permission{AuthoringRequirement: RequirementAnyone, SafeForShare: true}}
if err := r.Register(a); err != nil {
t.Fatal(err)
}
err := r.Register(b)
if err == nil || !strings.Contains(err.Error(), "duplicate") {
t.Fatalf("expected duplicate-name error, got %v", err)
}
}
func TestRegister_RejectsEmpty(t *testing.T) {
r := NewRegistry()
if err := r.Register(&fakeTool{name: ""}); err == nil {
t.Fatal("expected empty-name rejection")
}
if err := r.Register(nil); err == nil {
t.Fatal("expected nil-tool rejection")
}
}
func TestBuild_UnknownTool(t *testing.T) {
r := NewRegistry()
_, err := r.Build([]string{"nope"}, Invocation{}, VisibilityPrivate, nil)
if err == nil || !strings.Contains(err.Error(), "unknown tool") {
t.Fatalf("expected unknown-tool error, got %v", err)
}
}
func TestBuild_SharedRejectsUnsafeTool(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "balance", perm: Permission{SafeForShare: false}})
_, err := r.Build([]string{"balance"}, Invocation{}, VisibilityShared, nil)
if err == nil || !strings.Contains(err.Error(), "not safe for share") {
t.Fatalf("expected share-safety error, got %v", err)
}
}
// TestBuild_TrustedBuiltinBypassesShareSafety verifies the
// trusted-flag escape hatch: a builtin (skill-wizard, mortventure)
// legitimately ships with public visibility AND not-safe-for-share
// tools. Build with trusted=true must not reject those.
//
// Why: pre-fix, invocation of skill-wizard (visibility=public, tools
// include wizard_* with SafeForShare=false) was rejected at runtime
// even though the loader had already bypassed save-time gates. The
// trusted flag aligns the invocation-time gate with the loader's
// trust model.
func TestBuild_TrustedBuiltinBypassesShareSafety(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{
name: "wizard_list",
perm: Permission{SafeForShare: false},
returnText: "ok",
})
box, err := r.Build([]string{"wizard_list"}, Invocation{SkillName: "skill-wizard"}, VisibilityPublic, nil, true)
if err != nil {
t.Fatalf("trusted=true should bypass share-safety, got %v", err)
}
if box == nil {
t.Fatal("trusted=true should produce a toolbox, got nil")
}
}
// TestBuild_NonTrustedSharedStillRejects confirms the bypass is
// strictly opt-in: a non-builtin caller with the same shape (public
// visibility + unsafe tool) still hits the rejection path.
func TestBuild_NonTrustedSharedStillRejects(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "balance", perm: Permission{SafeForShare: false}})
_, err := r.Build([]string{"balance"}, Invocation{}, VisibilityPublic, nil, false)
if err == nil || !strings.Contains(err.Error(), "not safe for share") {
t.Fatalf("trusted=false (non-builtin) must still reject unsafe tool at public visibility, got %v", err)
}
// Omitted variadic = trusted defaults to false → same rejection.
_, err = r.Build([]string{"balance"}, Invocation{}, VisibilityPublic, nil)
if err == nil || !strings.Contains(err.Error(), "not safe for share") {
t.Fatalf("omitted variadic must default to trusted=false, got %v", err)
}
}
func TestBuild_PublicAcceptsSafeTool(t *testing.T) {
r := NewRegistry()
_ = r.Register(&fakeTool{name: "search", perm: Permission{SafeForShare: true}, returnText: "hits"})
box, err := r.Build([]string{"search"}, Invocation{SkillName: "echo"}, VisibilityPublic, nil)
if err != nil {
t.Fatal(err)
}
out, err := execBox(box, toolCall{Name: "search", Arguments: "{}"})
if err != nil || out != "hits" {
t.Fatalf("unexpected: %q %v", out, err)
}
}
func TestBuild_GateBlocksMismatchedSkill(t *testing.T) {
r := NewRegistry()
tt := &fakeTool{
name: "wizard_save",
perm: Permission{SafeForShare: true, SkillNameGate: "skill-wizard"},
returnText: "saved",
}
_ = r.Register(tt)
box, err := r.Build([]string{"wizard_save"}, Invocation{SkillName: "echo"}, VisibilityPrivate, nil)
if err != nil {
t.Fatalf("build: %v", err)
}
out, err := execBox(box, toolCall{Name: "wizard_save", Arguments: "{}"})
if err == nil || !strings.Contains(err.Error(), "restricted to") {
t.Fatalf("expected gate rejection, got out=%q err=%v", out, err)
}
}
func TestBuild_GateAllowsMatchingSkill(t *testing.T) {
r := NewRegistry()
tt := &fakeTool{
name: "wizard_save",
perm: Permission{SafeForShare: true, SkillNameGate: "skill-wizard"},
returnText: "saved",
}
_ = r.Register(tt)
box, _ := r.Build([]string{"wizard_save"}, Invocation{SkillName: "skill-wizard"}, VisibilityPrivate, nil)
out, err := execBox(box, toolCall{Name: "wizard_save", Arguments: "{}"})
if err != nil || out != "saved" {
t.Fatalf("unexpected: %q %v", out, err)
}
}
func TestBuild_EmitsAudit(t *testing.T) {
r := NewRegistry()
tt := &fakeTool{name: "search", perm: Permission{SafeForShare: true}, returnText: "hits"}
_ = r.Register(tt)
var calls []AuditCall
hook := func(c AuditCall) { calls = append(calls, c) }
box, _ := r.Build([]string{"search"}, Invocation{SkillName: "echo"}, VisibilityPrivate, hook)
_, _ = execBox(box, toolCall{Name: "search", Arguments: "{}"})
if len(calls) != 1 || calls[0].Tool != "search" || calls[0].Result != "hits" || calls[0].Err != nil {
t.Fatalf("unexpected audit: %+v", calls)
}
}
+18
View File
@@ -0,0 +1,18 @@
package tool
// RootRunKVPartition is the sentinel skill_id partition under which all
// `root_run:<id>` KV rows are stored.
//
// Why a sentinel: skill KV rows are keyed (skill_id, scope, key), so
// two sibling workers with different IDs (e.g. agent_spawn ephemeral
// workers under one fan-out) could never share state through a scope
// string alone — each would read/write its own partition. Routing every
// root_run scope into one shared partition makes the scope string the
// real boundary: it embeds the root run id, which the validator checks
// against Invocation.RootRunID, so per-tree isolation holds even though
// the partition is global.
//
// Declared in the root skilltools package (not tools/) because both the
// tool handlers (pkg/skilltools/tools) and the storage sweeper
// (pkg/logic/skills) need it without importing each other.
const RootRunKVPartition = "__root_run__"
+18
View File
@@ -0,0 +1,18 @@
package tool
import "testing"
type fakeAccessor struct{ s RunState }
func (f fakeAccessor) RunState() RunState { return f.s }
func TestInvocationRunState_NilSafe(t *testing.T) {
var inv Invocation
if inv.RunState != nil {
t.Fatal("RunState should default nil")
}
inv.RunState = fakeAccessor{s: RunState{Iteration: 3, MaxIterations: 10}}
if got := inv.RunState.RunState(); got.Iteration != 3 || got.MaxIterations != 10 {
t.Fatalf("unexpected RunState: %+v", got)
}
}
+99
View File
@@ -0,0 +1,99 @@
package tool
import (
"context"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// AgentSession is the live-run handle a SessionToolFactory receives.
// It is implemented by the executors (agentexec / skillexec / scaddy's
// adapter) on top of majordomo's agent loop and exposes the one mid-run
// mutation session tools need: feeding content back into the running
// conversation.
//
// Why an interface (vs the concrete agent type): legacy agentkit handed the
// factory a *agentkit.Agent so tools could call agent.AttachImages.
// majordomo's *agent.Agent is deliberately immutable mid-run — message
// injection happens through the run-scoped steer mailbox
// (agent.WithSteer). A narrow interface lets each executor implement
// AttachImages over its own steer queue without skilltools importing
// the agent package, and keeps session tools testable with a two-line
// fake.
type AgentSession interface {
// AttachImages queues a user-role message (text plus image parts)
// for injection into the conversation before the agent's next
// step. Used by tools that produce visual feedback the model must
// see on its following turn (e.g. scaddy's rendered OpenSCAD
// previews). Safe to call from inside a tool handler; the message
// lands after the current step's tool results.
AttachImages(text string, images ...llm.ImagePart)
}
// SessionToolFactory builds per-session tools that close over the live
// agent session. Called by the executor after the agent is constructed
// but before it runs. See Invocation.SessionToolFactory for the
// rationale (static ExtraTools cannot reach the running agent).
type SessionToolFactory func(session AgentSession) SessionTools
// SessionTools carries per-session tools plus optional post-run and
// teardown hooks. It replaces legacy agentkit's SessionTools with the same
// three-field shape, re-based on majordomo types.
type SessionTools struct {
// Tools to add to the agent's toolbox for this run only.
Tools []llm.Tool
// PostRun, if set, is called after the agent run completes
// (successfully or not). It receives the full run transcript (the
// agent Result's Messages — also populated on partial results from
// agent.ErrMaxSteps / agent.ErrToolLoop), the agent's text output,
// and the run error, so the hook can decide whether to attempt
// artifact production on partial success (e.g. scaddy ships the
// latest SCAD even when the step budget ran out). The returned
// PostRunResult is attached to the executor's run result. Errors
// inside PostRun must be handled by the hook itself — the executor
// logs a nil return but never fails the run over it; the agent's
// output is the source of truth.
//
// Why a transcript slice (vs the live agent): the consumers only
// ever read the message history (thought-chain transcripts); the
// majordomo agent exposes that on Result, not on the Agent.
PostRun func(ctx context.Context, transcript []llm.Message, output string, runErr error) *PostRunResult
// Cleanup, if set, is deferred by the executor immediately after
// the factory returns. Called even if the run fails or PostRun
// panics. Use for temp directory removal, closing file handles,
// etc.
Cleanup func()
}
// PostRunResult carries artifacts produced by the PostRun hook.
// Attached to the executor's run result so callers (Discord command
// handlers, HTTP API responses) can inspect and deliver the artifacts.
//
// Why a separate struct (vs returning artifacts inline): post-
// processing may produce multiple typed artifacts (PNGs, STLs, SCAD
// source) that the delivery layer classifies and routes differently.
// A flat []Artifact + arbitrary Metadata covers the known use cases
// without over-specifying the shape.
type PostRunResult struct {
// Artifacts are files produced during post-processing
// (e.g., rendered PNGs, STL files, SCAD source).
Artifacts []Artifact
// Metadata is arbitrary key-value data the delivery layer can
// use for formatting (e.g., iteration count, model name, notes).
Metadata map[string]any
}
// Artifact is a named binary blob produced by post-run processing.
//
// Why: the delivery layer needs name + type + bytes to classify
// each artifact (PNG → embed image, STL → filetransfer upload,
// SCAD → paste upload). A struct with these three fields is the
// minimal viable description.
type Artifact struct {
Name string // e.g., "model.stl", "preview_iso.png"
MimeType string // e.g., "model/stl", "image/png"
Data []byte
}
+221
View File
@@ -0,0 +1,221 @@
// Package skilltools — SSRF protection layer for skill HTTP tools.
//
// Why a dedicated layer (vs reusing pkg/utils.ValidateExternalURL):
// the platform's HTTP tools enforce a per-deployment ALLOWLIST (not
// just a "no private IPs" denylist) — admins must explicitly opt-in
// to each domain a skill may call. Additionally, defeating DNS
// rebinding requires capturing the resolved IP at validation time
// and pinning the dialler so a hostile DNS resolver can't return a
// public IP during the check and a private one at dial time.
package tool
import (
"context"
"fmt"
"net"
"net/http"
"net/url"
"strings"
"time"
)
// AllowlistConfig governs which hosts a skill HTTP tool may contact.
//
// Why a config struct (vs raw []string): forward-compatibility — we
// expect to add per-tool overrides (e.g. "this skill may also reach
// internal.example.com") and an explicit `AllowLoopback` opt-in for
// development environments. Keeping the validation surface as a
// struct lets new fields land without breaking call sites.
type AllowlistConfig struct {
// Domains is the list of allowed hostnames. Wildcards: "*.example.com"
// matches "foo.example.com" and "bar.baz.example.com" but NOT
// "example.com" itself (to allow both, list both entries).
//
// Comparison is case-insensitive; trailing dots are NOT trimmed
// (DNS treats "example.com" and "example.com." as different).
Domains []string
}
// ResolveAndCheck validates urlStr against the allowlist and returns
// the resolved IP. The IP is meant to be passed to the transport's
// dial step (via PinnedDialTransport) to defeat DNS rebinding.
//
// Loopback / private / link-local rejection is bypassed when the
// HOSTNAME (not the resolved IP) is itself an entry in the allowlist
// OR the resolved IP literal appears in the allowlist. This lets an
// admin opt-in to "127.0.0.1" or "localhost" for tests / debug
// without a global allow-private flag, while keeping the default
// (random hostname → resolved private IP) safe.
//
// Returns:
// - resolvedIP if the URL is acceptable
// - error explaining the rejection (host not allowlisted, scheme
// unsupported, resolves to private IP, etc.)
func ResolveAndCheck(ctx context.Context, urlStr string, allow AllowlistConfig) (net.IP, error) {
u, err := url.Parse(urlStr)
if err != nil {
return nil, fmt.Errorf("parse url: %w", err)
}
if u.Scheme != "http" && u.Scheme != "https" {
return nil, fmt.Errorf("scheme %q not supported (need http or https)", u.Scheme)
}
host := u.Hostname()
if host == "" {
return nil, fmt.Errorf("url has no host")
}
if !matchesAllowlist(host, allow.Domains) {
return nil, fmt.Errorf("host %q not in allowlist", host)
}
// If the host is already a literal IP, skip the resolve step.
if literal := net.ParseIP(host); literal != nil {
// Even an explicitly allowlisted IP literal goes through the
// privacy check UNLESS the literal is itself in the allowlist
// (covers admin opt-in "127.0.0.1" for tests).
if hostExplicitlyAllowed(host, allow.Domains) {
return literal, nil
}
if err := rejectPrivateIP(host, literal); err != nil {
return nil, err
}
return literal, nil
}
// Resolve. context controls timeout.
resolver := &net.Resolver{}
addrs, err := resolver.LookupIPAddr(ctx, host)
if err != nil {
return nil, fmt.Errorf("resolve %q: %w", host, err)
}
if len(addrs) == 0 {
return nil, fmt.Errorf("resolve %q: no addresses", host)
}
ip := addrs[0].IP
// Hostname explicitly in allowlist (e.g. "localhost" → opt-in by
// admin) bypasses the private-IP check. The wildcard form does NOT
// bypass — wildcards are for public domain families, not for
// private space.
if hostExplicitlyAllowed(host, allow.Domains) {
return ip, nil
}
if err := rejectPrivateIP(host, ip); err != nil {
return nil, err
}
return ip, nil
}
// rejectPrivateIP returns an error if the IP is loopback / private /
// link-local / unspecified, formatted with the original hostname so
// the rejection message is informative.
//
// Why a helper: ResolveAndCheck calls it twice (literal-IP path and
// resolved-host path) and the same checks apply.
func rejectPrivateIP(host string, ip net.IP) error {
// Cloud metadata endpoint check FIRST — it's a link-local IP, so
// the more-specific metadata error message would otherwise be
// shadowed by the link-local rejection.
if ip.Equal(net.ParseIP("169.254.169.254")) {
return fmt.Errorf("host %q resolves to cloud metadata IP %v", host, ip)
}
if ip.IsLoopback() {
return fmt.Errorf("host %q resolves to loopback %v", host, ip)
}
if ip.IsPrivate() {
return fmt.Errorf("host %q resolves to private IP %v", host, ip)
}
if ip.IsLinkLocalUnicast() {
return fmt.Errorf("host %q resolves to link-local %v", host, ip)
}
if ip.IsUnspecified() {
return fmt.Errorf("host %q resolves to unspecified %v", host, ip)
}
return nil
}
// hostExplicitlyAllowed reports whether host is in the allowlist as
// an exact entry (NOT via a wildcard). Used to bypass the private-IP
// check when an admin has explicitly named a host (e.g. "127.0.0.1"
// or "localhost") to opt-in.
func hostExplicitlyAllowed(host string, allow []string) bool {
host = strings.ToLower(host)
for _, pattern := range allow {
pattern = strings.ToLower(strings.TrimSpace(pattern))
if pattern == host {
return true
}
}
return false
}
// matchesAllowlist reports whether host matches any entry in allow,
// either by exact match, by "*.example.com" wildcard, or by the
// special bare "*" wildcard (allow every host).
//
// Wildcards match one-or-more subdomain levels: "*.example.com"
// matches "foo.example.com" and "a.b.example.com" but NOT
// "example.com" itself.
//
// Bare "*" matches any host. **Operators should use this only when
// they understand the SSRF + iptables layers still defend against
// private-IP traffic** (ResolveAndCheck blocks loopback / RFC1918 /
// link-local UNLESS the IP literal is also in the allowlist; the v15
// codeexec firewall sidecar adds host-level iptables drops). The
// bare-"*" form is the v15.1 operator UX answer to "I just want to
// let the agent reach the public internet" — without it, operators
// had to enumerate TLDs (*.com, *.org, *.io, etc.) which never
// covered the long tail.
func matchesAllowlist(host string, allow []string) bool {
host = strings.ToLower(host)
for _, pattern := range allow {
pattern = strings.ToLower(strings.TrimSpace(pattern))
if pattern == "" {
continue
}
// Bare "*" = allow-any. The SSRF + iptables layers still
// enforce private-IP blocks; this only opens the hostname gate.
if pattern == "*" {
return true
}
if pattern == host {
return true
}
if strings.HasPrefix(pattern, "*.") {
suffix := pattern[1:] // ".example.com"
if strings.HasSuffix(host, suffix) && len(host) > len(suffix) {
return true
}
}
}
return false
}
// PinnedDialTransport returns an http.RoundTripper that uses the given
// IP for all Dial operations regardless of host (defeats DNS rebinding).
// The Host header is preserved from the request — TLS SNI and HTTP
// Host routing continue to work, only the network connection is
// pinned to the pre-validated IP.
//
// Why pre-validated dial vs trusting the request: between the
// ResolveAndCheck call and the http.Client.Do call, a hostile DNS
// server can return a different IP. Pinning the dialler ensures the
// connection lands on the exact address that passed the privacy
// check.
func PinnedDialTransport(ip net.IP, timeout time.Duration) http.RoundTripper {
dialer := &net.Dialer{Timeout: timeout}
return &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
// addr is "host:port" — replace host with pinned IP.
_, port, err := net.SplitHostPort(addr)
if err != nil {
return nil, err
}
return dialer.DialContext(ctx, network, net.JoinHostPort(ip.String(), port))
},
ResponseHeaderTimeout: timeout,
TLSHandshakeTimeout: timeout,
}
}
+168
View File
@@ -0,0 +1,168 @@
package tool
import (
"context"
"net"
"strings"
"testing"
)
// TestResolveAndCheck_AllowlistedPublic anchors the happy path: a
// public domain in the allowlist resolves and returns its IP. Skips
// when DNS isn't available so the suite still passes in offline CI.
func TestResolveAndCheck_AllowlistedPublic(t *testing.T) {
// Pre-flight: skip the test if the test environment has no DNS.
if _, err := net.LookupHost("example.com"); err != nil {
t.Skipf("no DNS in test environment: %v", err)
}
allow := AllowlistConfig{Domains: []string{"example.com"}}
ip, err := ResolveAndCheck(context.Background(), "https://example.com/", allow)
if err != nil {
t.Fatalf("ResolveAndCheck failed: %v", err)
}
if ip == nil {
t.Fatal("expected non-nil IP")
}
}
// TestResolveAndCheck_NotAllowlisted ensures a domain outside the
// allowlist is rejected before any DNS resolution.
func TestResolveAndCheck_NotAllowlisted(t *testing.T) {
allow := AllowlistConfig{Domains: []string{"example.com"}}
_, err := ResolveAndCheck(context.Background(), "https://evil.test/", allow)
if err == nil {
t.Fatal("expected rejection for non-allowlisted host")
}
if !strings.Contains(err.Error(), "not in allowlist") {
t.Errorf("expected allowlist error, got: %v", err)
}
}
// TestResolveAndCheck_WildcardMatch confirms "*.example.com" matches
// foo.example.com.
func TestResolveAndCheck_WildcardMatch(t *testing.T) {
if !matchesAllowlist("foo.example.com", []string{"*.example.com"}) {
t.Error("expected *.example.com to match foo.example.com")
}
if !matchesAllowlist("a.b.example.com", []string{"*.example.com"}) {
t.Error("expected *.example.com to match a.b.example.com")
}
}
// TestResolveAndCheck_WildcardDoesNotMatchBareDomain documents the
// design choice: "*.example.com" does NOT match "example.com" itself.
// Admins who want both must list both entries.
func TestResolveAndCheck_WildcardDoesNotMatchBareDomain(t *testing.T) {
if matchesAllowlist("example.com", []string{"*.example.com"}) {
t.Error("expected *.example.com NOT to match bare example.com")
}
}
// TestResolveAndCheck_LocalhostRejected verifies that a hostname
// resolving to 127.0.0.1 is rejected unless the admin explicitly
// includes it in the allowlist.
func TestResolveAndCheck_LocalhostRejected(t *testing.T) {
if _, err := net.LookupHost("localhost"); err != nil {
t.Skipf("no DNS in test environment: %v", err)
}
// "localhost" matches the allowlist by exact name match, but the
// hostExplicitlyAllowed bypass kicks in only when the host is in
// the allowlist as an exact entry. Here we use a DIFFERENT bare
// allowlist entry so the host fails the allowlist match outright.
allow := AllowlistConfig{Domains: []string{"example.com"}}
_, err := ResolveAndCheck(context.Background(), "http://localhost/", allow)
if err == nil {
t.Fatal("expected rejection for localhost (not in allowlist)")
}
}
// TestResolveAndCheck_LocalhostAllowedExplicit confirms the
// admin-opt-in escape hatch: when the hostname is itself in the
// allowlist as an exact entry, the private-IP check is bypassed.
// This is what test code uses to drive httptest.NewServer URLs.
func TestResolveAndCheck_LocalhostAllowedExplicit(t *testing.T) {
// Use 127.0.0.1 directly so this test doesn't depend on DNS for
// "localhost".
allow := AllowlistConfig{Domains: []string{"127.0.0.1"}}
ip, err := ResolveAndCheck(context.Background(), "http://127.0.0.1/", allow)
if err != nil {
t.Fatalf("expected 127.0.0.1 with explicit allowlist to succeed; got: %v", err)
}
if !ip.Equal(net.ParseIP("127.0.0.1")) {
t.Errorf("expected ip=127.0.0.1, got %v", ip)
}
}
// TestResolveAndCheck_FileSchemeRejected blocks file:// URLs.
func TestResolveAndCheck_FileSchemeRejected(t *testing.T) {
allow := AllowlistConfig{Domains: []string{"anything"}}
_, err := ResolveAndCheck(context.Background(), "file:///etc/passwd", allow)
if err == nil {
t.Fatal("expected rejection for file:// scheme")
}
if !strings.Contains(err.Error(), "scheme") {
t.Errorf("expected scheme error, got: %v", err)
}
}
// TestResolveAndCheck_EmptyHostRejected blocks malformed URLs with
// no host component.
func TestResolveAndCheck_EmptyHostRejected(t *testing.T) {
allow := AllowlistConfig{Domains: []string{"anything"}}
_, err := ResolveAndCheck(context.Background(), "http:///nohost", allow)
if err == nil {
t.Fatal("expected rejection for empty host")
}
}
// TestResolveAndCheck_PrivateIPLiteralRejected confirms that an IP
// literal resolving to the private range is rejected even if the
// allowlist matches by wildcard or other means. The private-IP gate
// is the last line of defence.
func TestResolveAndCheck_PrivateIPLiteralRejected(t *testing.T) {
// Add a wildcard that would match anything (silly but plausible
// admin error) and confirm a private IP literal is still blocked
// because the literal isn't itself in the allowlist as exact.
allow := AllowlistConfig{Domains: []string{"192.168.1.1"}}
// The exact-IP-in-allowlist case bypasses the private check; flip
// to a NEAR-but-different IP literal that's NOT in the allowlist.
allow2 := AllowlistConfig{Domains: []string{"192.168.0.0"}}
_, err := ResolveAndCheck(context.Background(), "http://192.168.1.1/", allow2)
if err == nil {
t.Fatal("expected rejection for private IP literal not in allowlist")
}
// Sanity: explicit allowlist entry bypasses.
_, err = ResolveAndCheck(context.Background(), "http://192.168.1.1/", allow)
if err != nil {
t.Errorf("expected explicit allowlist entry to bypass; got: %v", err)
}
}
// TestResolveAndCheck_CloudMetadataRejected blocks the well-known
// cloud metadata IP via the link-local check. We use a wildcard that
// matches the IP-as-hostname so the rejection comes from the
// private/link-local layer (not the allowlist).
func TestResolveAndCheck_CloudMetadataRejected(t *testing.T) {
// "*.169.254.169.254" wildcard wouldn't match either; instead use
// a wildcard that matches any IP literal under .254 — but
// matchesAllowlist treats '.' as a literal so we just allowlist
// the IP itself with a one-bit-different sibling that fails the
// exact-allow check (so private check still runs).
//
// Easier: include a different exact IP entry so the IP literal
// fails hostExplicitlyAllowed but passes the wildcard.
allow := AllowlistConfig{Domains: []string{"*.169.254.169.254"}} // matches "x.169.254.169.254", not the bare IP
// 169.254.169.254 won't match the wildcard pattern either —
// switch to a strategy that lets the host pass allowlist but
// fails the private check.
_ = allow
// Use an explicit non-IP-literal hostname (we'd need DNS to point
// to 169.254.169.254 which is not feasible). Instead, exercise
// the rejectPrivateIP helper directly for the metadata IP since
// the public surface only enters that path through resolution.
if err := rejectPrivateIP("metadata.test", net.ParseIP("169.254.169.254")); err == nil {
t.Fatal("expected rejection for cloud metadata IP")
} else if !strings.Contains(err.Error(), "metadata") {
t.Errorf("expected metadata error, got: %v", err)
}
}
+145
View File
@@ -0,0 +1,145 @@
// Package skilltools — webhook_rate_limit.go: per-IP-per-skill
// sliding-window rate limiter for the v7 inbound webhook handler.
//
// Why an in-memory limiter (vs Redis or DB-backed): rate limiting is
// the cheap reject path BEFORE the HMAC compute and run-budget check,
// and an extra round-trip per inbound webhook would be wasted. The
// 6-person server's volume is well within a single-process limiter's
// scale; if mort is ever multi-process the limiter becomes
// approximate (still good enough to throttle abusive sources).
//
// Why per-IP-per-skill (vs per-IP global): one busy webhook (e.g.
// GitHub PR opened) shouldn't shadow another (Stripe charge). The
// composite key keeps a noisy source from pushing other skill's
// callers off the lane.
//
// Test: webhook_rate_limit_test.go covers admit + reject paths.
package tool
import (
"sync"
"time"
)
// WebhookRateLimiter is a sliding-window per-(skillID, sourceIP)
// counter. Configure once at construction; concurrent-safe.
type WebhookRateLimiter struct {
limit int
window time.Duration
clock func() time.Time
mu sync.Mutex
buckets map[string]*rateBucket // key = skillID + "|" + sourceIP
}
type rateBucket struct {
// hits is a slice of timestamps within the window. Pruned on
// every Admit call so the slice never grows unbounded.
hits []time.Time
}
// NewWebhookRateLimiter constructs the limiter.
//
// limit — max calls per (skill, ip) within window. <=0 means
//
// "unlimited" (every call admitted; useful for tests).
//
// window — sliding window length. <=0 falls back to 1 minute.
// clock — testable wall-clock; nil → time.Now.
func NewWebhookRateLimiter(limit int, window time.Duration, clock func() time.Time) *WebhookRateLimiter {
if window <= 0 {
window = time.Minute
}
if clock == nil {
clock = time.Now
}
return &WebhookRateLimiter{
limit: limit,
window: window,
clock: clock,
buckets: make(map[string]*rateBucket),
}
}
// Admit returns (true, 0) if the call is within the rate cap (records
// the hit), or (false, retry-after) if the cap is hit. retry-after is
// the time until the OLDEST hit in the window expires — the caller can
// surface it via the Retry-After response header.
//
// Why return retry-after not just bool: HTTP 429 responses commonly
// include Retry-After to avoid synchronizing client retries; computing
// it from the sliding window is essentially free.
func (l *WebhookRateLimiter) Admit(skillID, sourceIP string) (bool, time.Duration) {
if l.limit <= 0 {
return true, 0
}
now := l.clock()
cutoff := now.Add(-l.window)
key := skillID + "|" + sourceIP
l.mu.Lock()
defer l.mu.Unlock()
b, ok := l.buckets[key]
if !ok {
b = &rateBucket{}
l.buckets[key] = b
}
// Prune in place. The slice is append-only at the tail; the head
// shrinks as old hits fall out of the window.
first := 0
for first < len(b.hits) && b.hits[first].Before(cutoff) {
first++
}
if first > 0 {
// Copy the surviving tail to the head; reuse backing array.
n := copy(b.hits, b.hits[first:])
b.hits = b.hits[:n]
}
if len(b.hits) >= l.limit {
oldest := b.hits[0]
retryAfter := oldest.Add(l.window).Sub(now)
if retryAfter < 0 {
retryAfter = 0
}
return false, retryAfter
}
b.hits = append(b.hits, now)
return true, 0
}
// Sweep purges buckets whose hit-list is empty after pruning. Called
// periodically (e.g. once per minute) to bound the buckets map's
// growth.
//
// Why a separate Sweep vs auto-prune in Admit: a hostile source that
// rotates IP addresses across many addresses each hitting once
// would leave millions of single-hit buckets in the map. A periodic
// sweep keeps the worst case bounded.
func (l *WebhookRateLimiter) Sweep() {
now := l.clock()
cutoff := now.Add(-l.window)
l.mu.Lock()
defer l.mu.Unlock()
for k, b := range l.buckets {
// Prune in place.
first := 0
for first < len(b.hits) && b.hits[first].Before(cutoff) {
first++
}
if first > 0 {
n := copy(b.hits, b.hits[first:])
b.hits = b.hits[:n]
}
if len(b.hits) == 0 {
delete(l.buckets, k)
}
}
}
// CountKeys returns the bucket count. Test helper.
func (l *WebhookRateLimiter) CountKeys() int {
l.mu.Lock()
defer l.mu.Unlock()
return len(l.buckets)
}