Files
steve dc28b63ad8
executus CI / test (push) Successful in 36s
P1 (part 1): move skilltools core -> tool/ (clean, verbatim)
The tool registry core (registry, permission model, Invocation, gated-tool
wrapper, ssrf guard, hmac, encryption, argcoerce, helpers, rootrun,
session_tools, webhook_rate_limit) had zero mort coupling — it imports only
majordomo/llm + x/crypto/hkdf — so it moves verbatim with a package rename
(skilltools -> tool). All same-package tests came along and pass; the SSRF,
gated-wrapper, encryption and output-pattern invariants are re-anchored here.

majordomo re-enters the module graph (now pinned to the latest, incl. the
front-loaded-output fix). model/ + llmmeta + structured follow next.

Docs: CLAUDE.md now requires README/examples to stay in sync with changes in
the same commit; CI skips docs/example-only pushes via paths-ignore.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 19:31:47 -04:00

243 lines
9.9 KiB
Go

// Package skilltools — encryption.go: per-skill envelope encryption for
// KV values and file blobs. AES-256-GCM with a per-skill key derived
// from a single master key (env var SKILLS_ENCRYPTION_MASTER_KEY) via
// HKDF using the skill ID as the salt.
//
// !!!!! CRITICAL OPERATIONAL WARNING !!!!!
//
// SKILLS_ENCRYPTION_MASTER_KEY MUST BE BACKED UP SEPARATELY FROM THE
// DATABASE. Losing the master key = losing every byte of encrypted
// KV value and every encrypted file blob, with no recovery path. The
// key is the ONLY thing that can decrypt rows whose
// encryption_key_version > 0.
//
// Operational rules:
// - Store the master key in a secrets manager (Vault, 1Password,
// KMS export) — NEVER in the same backup as the database dump.
// - Rotating the master key without a versioned re-encrypt
// migration WILL render existing encrypted rows unreadable. The
// encryption_key_version column was added so a future rotation
// migration can re-encrypt under a new (master, version)
// pair; do not bump the version without that migration.
// - When the env var is empty, encryption is OFF for the whole
// instance. Skills with encryption_enabled=true still write
// plaintext (with a logged WARNING). This is intentional — the
// alternative is to refuse to start, which would break
// deployment for everyone the moment the secret leaks during
// rotation. Loud logging + the boot-time warning in mort.go is
// the correct trade-off.
//
// Why HKDF-derived per-skill keys (vs one global key): a future
// "wipe this skill's data" admin action can be made auditable by
// recording the skill_id in the operation log without exposing the
// master key. Per-skill keys also cap blast radius if one key
// somehow leaks via a side channel — only that one skill's data is
// compromised, not the whole platform.
//
// Why AES-256-GCM: authenticated encryption catches tampered
// ciphertext at decrypt time. The GCM nonce is 12 random bytes per
// row; the auth tag is 16 bytes. Both are stored inline with the
// ciphertext so the storage layer's value/content column holds the
// full envelope (no separate nonce column).
//
// Wire format of an encrypted blob:
//
// +-- 1 byte: format version (0x01)
// +-- 12 bytes: GCM nonce
// +-- N bytes: ciphertext + 16-byte GCM tag
//
// The format-version byte lets a future change to nonce length or
// auth tag handling be detected loudly rather than corrupting reads.
// Encrypt always writes 0x01; Decrypt rejects any other version with
// ErrEncryptionUnknownVersion.
package tool
import (
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"crypto/sha256"
"errors"
"fmt"
"io"
"os"
"golang.org/x/crypto/hkdf"
)
// EncryptionMasterKeyEnv is the environment variable that holds the
// 32-byte (or longer, hashed down) master key for skill envelope
// encryption.
//
// !!!!! LOSING THIS KEY = LOSING ALL ENCRYPTED DATA !!!!!
//
// Back it up separately from database backups. Never commit it.
// Empty value = encryption OFF (with WARNING logged at boot).
const EncryptionMasterKeyEnv = "SKILLS_ENCRYPTION_MASTER_KEY"
// CurrentKeyVersion is the version stamped on every newly-encrypted
// row. Version 0 is reserved for plaintext (legacy / encryption-off).
// Version 1 is "AES-256-GCM with HKDF(master, skill_id) per-skill key,
// envelope format 0x01". Bumping this requires a migration that
// re-encrypts existing rows under the new (master, version) pair.
const CurrentKeyVersion = 1
// envelopeFormatV1 is the first byte of every Encrypt output. Decrypt
// rejects any other value with ErrEncryptionUnknownVersion.
const envelopeFormatV1 = byte(0x01)
// gcmNonceSize is fixed at 12 bytes for AES-GCM (NIST SP 800-38D
// recommended).
const gcmNonceSize = 12
// Encryption sentinel errors. Callers compare with errors.Is so storage
// adapters can branch on "tampered" vs "unknown version" vs "no master
// key".
var (
// ErrEncryptionDisabled is returned when an encryption operation
// is attempted but SKILLS_ENCRYPTION_MASTER_KEY is empty. Storage
// adapters interpret this as "fall through to plaintext" — they
// MUST log loudly when this branch is taken.
ErrEncryptionDisabled = errors.New("skilltools: encryption disabled (master key empty)")
// ErrEncryptionUnknownVersion is returned by Decrypt when the
// envelope's format-version byte is not envelopeFormatV1. A read
// that hits this error is corruption — surface to the operator,
// do NOT silently fall back to plaintext.
ErrEncryptionUnknownVersion = errors.New("skilltools: encryption envelope has unknown format version")
// ErrEncryptionTampered is returned by Decrypt when the GCM auth
// tag check fails. The ciphertext or nonce was modified after
// encryption. Surface as "data corruption" — the row is unreadable.
ErrEncryptionTampered = errors.New("skilltools: encryption auth tag mismatch (data corruption or wrong key)")
// ErrEncryptionShortInput is returned by Decrypt when the input
// is too short to contain even the version byte + nonce. Bug or
// malformed write.
ErrEncryptionShortInput = errors.New("skilltools: encryption input too short")
)
// MasterKeyFromEnv returns the master key bytes (raw, NOT
// HKDF-derived) from the SKILLS_ENCRYPTION_MASTER_KEY env var.
//
// Why hash + truncate to 32 bytes vs require 32 raw bytes: operators
// commonly paste a generated random hex/base64 string of varying
// length. SHA-256-truncate accepts any non-empty input and produces
// a fixed-length key, which is then fed into HKDF for per-skill
// derivation. The hash step is purely "normalize length"; HKDF still
// does the per-skill diversification.
//
// Returns nil bytes (and false) if the env var is empty.
func MasterKeyFromEnv() (key []byte, present bool) {
raw := os.Getenv(EncryptionMasterKeyEnv)
if raw == "" {
return nil, false
}
sum := sha256.Sum256([]byte(raw))
return sum[:], true
}
// DeriveSkillKey returns the per-skill 32-byte AES-256 key for the
// given (master, skillID) pair via HKDF-SHA256.
//
// Why skillID as HKDF salt: each skill gets a distinct subkey so a
// single master breach is necessary to decrypt any one skill, but
// a skill_id leak (which is normal — IDs appear in logs) does NOT
// help an attacker. The HKDF info parameter is fixed to a constant
// label so different uses of the same master+skillID pair (e.g. a
// future per-skill HMAC key) can be derived with a different label
// without colliding.
//
// master must be the 32-byte output of MasterKeyFromEnv (or
// equivalent length-normalized input). skillID must be non-empty —
// caller is responsible.
func DeriveSkillKey(master []byte, skillID string) ([]byte, error) {
if len(master) == 0 {
return nil, ErrEncryptionDisabled
}
if skillID == "" {
return nil, errors.New("skilltools: DeriveSkillKey requires non-empty skillID")
}
r := hkdf.New(sha256.New, master, []byte(skillID), []byte("mort/skills/v1/aead"))
out := make([]byte, 32)
if _, err := io.ReadFull(r, out); err != nil {
return nil, fmt.Errorf("skilltools: HKDF derive: %w", err)
}
return out, nil
}
// Encrypt seals plaintext under skillKey using AES-256-GCM and returns
// the wire envelope (version byte || nonce || ciphertext || tag).
//
// Caller is responsible for stamping the encryption_key_version column
// to CurrentKeyVersion AFTER a successful Encrypt — Encrypt itself
// only produces bytes; persisting them is the storage layer's job.
//
// Why a fresh random nonce per call (vs deterministic): nonce reuse
// under GCM is catastrophic (allows recovering the keystream); fresh
// 96-bit random nonces have a negligible collision probability under
// any realistic write rate.
func Encrypt(skillKey, plaintext []byte) ([]byte, error) {
if len(skillKey) != 32 {
return nil, fmt.Errorf("skilltools: Encrypt requires 32-byte key, got %d", len(skillKey))
}
block, err := aes.NewCipher(skillKey)
if err != nil {
return nil, fmt.Errorf("skilltools: aes.NewCipher: %w", err)
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return nil, fmt.Errorf("skilltools: cipher.NewGCM: %w", err)
}
nonce := make([]byte, gcmNonceSize)
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
return nil, fmt.Errorf("skilltools: rand.Read: %w", err)
}
// Pre-allocate the envelope: 1 (version) + 12 (nonce) + len(plaintext) + 16 (tag).
out := make([]byte, 0, 1+gcmNonceSize+len(plaintext)+gcm.Overhead())
out = append(out, envelopeFormatV1)
out = append(out, nonce...)
out = gcm.Seal(out, nonce, plaintext, nil)
return out, nil
}
// Decrypt opens an envelope produced by Encrypt under the same
// skillKey. Returns the plaintext or one of the sentinel errors.
//
// Caller MUST inspect the storage row's encryption_key_version BEFORE
// calling Decrypt. Version 0 means plaintext — Decrypt SHOULD NOT be
// called for version-0 rows (callers branch on the column value).
// This function does NOT inspect any version column; it only looks at
// the in-band envelope-format byte.
func Decrypt(skillKey, envelope []byte) ([]byte, error) {
if len(skillKey) != 32 {
return nil, fmt.Errorf("skilltools: Decrypt requires 32-byte key, got %d", len(skillKey))
}
if len(envelope) < 1+gcmNonceSize {
return nil, ErrEncryptionShortInput
}
if envelope[0] != envelopeFormatV1 {
return nil, ErrEncryptionUnknownVersion
}
nonce := envelope[1 : 1+gcmNonceSize]
ciphertext := envelope[1+gcmNonceSize:]
block, err := aes.NewCipher(skillKey)
if err != nil {
return nil, fmt.Errorf("skilltools: aes.NewCipher: %w", err)
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return nil, fmt.Errorf("skilltools: cipher.NewGCM: %w", err)
}
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
if err != nil {
// Distinguish auth-tag mismatch from other crypto errors so
// callers can surface "data corruption" specifically. The
// stdlib wraps the failure as a generic error; we map any
// failure here to ErrEncryptionTampered (the most likely
// cause is wrong key / tampered bytes).
return nil, ErrEncryptionTampered
}
return plaintext, nil
}