dc28b63ad8
executus CI / test (push) Successful in 36s
The tool registry core (registry, permission model, Invocation, gated-tool wrapper, ssrf guard, hmac, encryption, argcoerce, helpers, rootrun, session_tools, webhook_rate_limit) had zero mort coupling — it imports only majordomo/llm + x/crypto/hkdf — so it moves verbatim with a package rename (skilltools -> tool). All same-package tests came along and pass; the SSRF, gated-wrapper, encryption and output-pattern invariants are re-anchored here. majordomo re-enters the module graph (now pinned to the latest, incl. the front-loaded-output fix). model/ + llmmeta + structured follow next. Docs: CLAUDE.md now requires README/examples to stay in sync with changes in the same commit; CI skips docs/example-only pushes via paths-ignore. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
243 lines
9.9 KiB
Go
243 lines
9.9 KiB
Go
// Package skilltools — encryption.go: per-skill envelope encryption for
|
|
// KV values and file blobs. AES-256-GCM with a per-skill key derived
|
|
// from a single master key (env var SKILLS_ENCRYPTION_MASTER_KEY) via
|
|
// HKDF using the skill ID as the salt.
|
|
//
|
|
// !!!!! CRITICAL OPERATIONAL WARNING !!!!!
|
|
//
|
|
// SKILLS_ENCRYPTION_MASTER_KEY MUST BE BACKED UP SEPARATELY FROM THE
|
|
// DATABASE. Losing the master key = losing every byte of encrypted
|
|
// KV value and every encrypted file blob, with no recovery path. The
|
|
// key is the ONLY thing that can decrypt rows whose
|
|
// encryption_key_version > 0.
|
|
//
|
|
// Operational rules:
|
|
// - Store the master key in a secrets manager (Vault, 1Password,
|
|
// KMS export) — NEVER in the same backup as the database dump.
|
|
// - Rotating the master key without a versioned re-encrypt
|
|
// migration WILL render existing encrypted rows unreadable. The
|
|
// encryption_key_version column was added so a future rotation
|
|
// migration can re-encrypt under a new (master, version)
|
|
// pair; do not bump the version without that migration.
|
|
// - When the env var is empty, encryption is OFF for the whole
|
|
// instance. Skills with encryption_enabled=true still write
|
|
// plaintext (with a logged WARNING). This is intentional — the
|
|
// alternative is to refuse to start, which would break
|
|
// deployment for everyone the moment the secret leaks during
|
|
// rotation. Loud logging + the boot-time warning in mort.go is
|
|
// the correct trade-off.
|
|
//
|
|
// Why HKDF-derived per-skill keys (vs one global key): a future
|
|
// "wipe this skill's data" admin action can be made auditable by
|
|
// recording the skill_id in the operation log without exposing the
|
|
// master key. Per-skill keys also cap blast radius if one key
|
|
// somehow leaks via a side channel — only that one skill's data is
|
|
// compromised, not the whole platform.
|
|
//
|
|
// Why AES-256-GCM: authenticated encryption catches tampered
|
|
// ciphertext at decrypt time. The GCM nonce is 12 random bytes per
|
|
// row; the auth tag is 16 bytes. Both are stored inline with the
|
|
// ciphertext so the storage layer's value/content column holds the
|
|
// full envelope (no separate nonce column).
|
|
//
|
|
// Wire format of an encrypted blob:
|
|
//
|
|
// +-- 1 byte: format version (0x01)
|
|
// +-- 12 bytes: GCM nonce
|
|
// +-- N bytes: ciphertext + 16-byte GCM tag
|
|
//
|
|
// The format-version byte lets a future change to nonce length or
|
|
// auth tag handling be detected loudly rather than corrupting reads.
|
|
// Encrypt always writes 0x01; Decrypt rejects any other version with
|
|
// ErrEncryptionUnknownVersion.
|
|
package tool
|
|
|
|
import (
|
|
"crypto/aes"
|
|
"crypto/cipher"
|
|
"crypto/rand"
|
|
"crypto/sha256"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
|
|
"golang.org/x/crypto/hkdf"
|
|
)
|
|
|
|
// EncryptionMasterKeyEnv is the environment variable that holds the
|
|
// 32-byte (or longer, hashed down) master key for skill envelope
|
|
// encryption.
|
|
//
|
|
// !!!!! LOSING THIS KEY = LOSING ALL ENCRYPTED DATA !!!!!
|
|
//
|
|
// Back it up separately from database backups. Never commit it.
|
|
// Empty value = encryption OFF (with WARNING logged at boot).
|
|
const EncryptionMasterKeyEnv = "SKILLS_ENCRYPTION_MASTER_KEY"
|
|
|
|
// CurrentKeyVersion is the version stamped on every newly-encrypted
|
|
// row. Version 0 is reserved for plaintext (legacy / encryption-off).
|
|
// Version 1 is "AES-256-GCM with HKDF(master, skill_id) per-skill key,
|
|
// envelope format 0x01". Bumping this requires a migration that
|
|
// re-encrypts existing rows under the new (master, version) pair.
|
|
const CurrentKeyVersion = 1
|
|
|
|
// envelopeFormatV1 is the first byte of every Encrypt output. Decrypt
|
|
// rejects any other value with ErrEncryptionUnknownVersion.
|
|
const envelopeFormatV1 = byte(0x01)
|
|
|
|
// gcmNonceSize is fixed at 12 bytes for AES-GCM (NIST SP 800-38D
|
|
// recommended).
|
|
const gcmNonceSize = 12
|
|
|
|
// Encryption sentinel errors. Callers compare with errors.Is so storage
|
|
// adapters can branch on "tampered" vs "unknown version" vs "no master
|
|
// key".
|
|
var (
|
|
// ErrEncryptionDisabled is returned when an encryption operation
|
|
// is attempted but SKILLS_ENCRYPTION_MASTER_KEY is empty. Storage
|
|
// adapters interpret this as "fall through to plaintext" — they
|
|
// MUST log loudly when this branch is taken.
|
|
ErrEncryptionDisabled = errors.New("skilltools: encryption disabled (master key empty)")
|
|
|
|
// ErrEncryptionUnknownVersion is returned by Decrypt when the
|
|
// envelope's format-version byte is not envelopeFormatV1. A read
|
|
// that hits this error is corruption — surface to the operator,
|
|
// do NOT silently fall back to plaintext.
|
|
ErrEncryptionUnknownVersion = errors.New("skilltools: encryption envelope has unknown format version")
|
|
|
|
// ErrEncryptionTampered is returned by Decrypt when the GCM auth
|
|
// tag check fails. The ciphertext or nonce was modified after
|
|
// encryption. Surface as "data corruption" — the row is unreadable.
|
|
ErrEncryptionTampered = errors.New("skilltools: encryption auth tag mismatch (data corruption or wrong key)")
|
|
|
|
// ErrEncryptionShortInput is returned by Decrypt when the input
|
|
// is too short to contain even the version byte + nonce. Bug or
|
|
// malformed write.
|
|
ErrEncryptionShortInput = errors.New("skilltools: encryption input too short")
|
|
)
|
|
|
|
// MasterKeyFromEnv returns the master key bytes (raw, NOT
|
|
// HKDF-derived) from the SKILLS_ENCRYPTION_MASTER_KEY env var.
|
|
//
|
|
// Why hash + truncate to 32 bytes vs require 32 raw bytes: operators
|
|
// commonly paste a generated random hex/base64 string of varying
|
|
// length. SHA-256-truncate accepts any non-empty input and produces
|
|
// a fixed-length key, which is then fed into HKDF for per-skill
|
|
// derivation. The hash step is purely "normalize length"; HKDF still
|
|
// does the per-skill diversification.
|
|
//
|
|
// Returns nil bytes (and false) if the env var is empty.
|
|
func MasterKeyFromEnv() (key []byte, present bool) {
|
|
raw := os.Getenv(EncryptionMasterKeyEnv)
|
|
if raw == "" {
|
|
return nil, false
|
|
}
|
|
sum := sha256.Sum256([]byte(raw))
|
|
return sum[:], true
|
|
}
|
|
|
|
// DeriveSkillKey returns the per-skill 32-byte AES-256 key for the
|
|
// given (master, skillID) pair via HKDF-SHA256.
|
|
//
|
|
// Why skillID as HKDF salt: each skill gets a distinct subkey so a
|
|
// single master breach is necessary to decrypt any one skill, but
|
|
// a skill_id leak (which is normal — IDs appear in logs) does NOT
|
|
// help an attacker. The HKDF info parameter is fixed to a constant
|
|
// label so different uses of the same master+skillID pair (e.g. a
|
|
// future per-skill HMAC key) can be derived with a different label
|
|
// without colliding.
|
|
//
|
|
// master must be the 32-byte output of MasterKeyFromEnv (or
|
|
// equivalent length-normalized input). skillID must be non-empty —
|
|
// caller is responsible.
|
|
func DeriveSkillKey(master []byte, skillID string) ([]byte, error) {
|
|
if len(master) == 0 {
|
|
return nil, ErrEncryptionDisabled
|
|
}
|
|
if skillID == "" {
|
|
return nil, errors.New("skilltools: DeriveSkillKey requires non-empty skillID")
|
|
}
|
|
r := hkdf.New(sha256.New, master, []byte(skillID), []byte("mort/skills/v1/aead"))
|
|
out := make([]byte, 32)
|
|
if _, err := io.ReadFull(r, out); err != nil {
|
|
return nil, fmt.Errorf("skilltools: HKDF derive: %w", err)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// Encrypt seals plaintext under skillKey using AES-256-GCM and returns
|
|
// the wire envelope (version byte || nonce || ciphertext || tag).
|
|
//
|
|
// Caller is responsible for stamping the encryption_key_version column
|
|
// to CurrentKeyVersion AFTER a successful Encrypt — Encrypt itself
|
|
// only produces bytes; persisting them is the storage layer's job.
|
|
//
|
|
// Why a fresh random nonce per call (vs deterministic): nonce reuse
|
|
// under GCM is catastrophic (allows recovering the keystream); fresh
|
|
// 96-bit random nonces have a negligible collision probability under
|
|
// any realistic write rate.
|
|
func Encrypt(skillKey, plaintext []byte) ([]byte, error) {
|
|
if len(skillKey) != 32 {
|
|
return nil, fmt.Errorf("skilltools: Encrypt requires 32-byte key, got %d", len(skillKey))
|
|
}
|
|
block, err := aes.NewCipher(skillKey)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("skilltools: aes.NewCipher: %w", err)
|
|
}
|
|
gcm, err := cipher.NewGCM(block)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("skilltools: cipher.NewGCM: %w", err)
|
|
}
|
|
nonce := make([]byte, gcmNonceSize)
|
|
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
|
|
return nil, fmt.Errorf("skilltools: rand.Read: %w", err)
|
|
}
|
|
// Pre-allocate the envelope: 1 (version) + 12 (nonce) + len(plaintext) + 16 (tag).
|
|
out := make([]byte, 0, 1+gcmNonceSize+len(plaintext)+gcm.Overhead())
|
|
out = append(out, envelopeFormatV1)
|
|
out = append(out, nonce...)
|
|
out = gcm.Seal(out, nonce, plaintext, nil)
|
|
return out, nil
|
|
}
|
|
|
|
// Decrypt opens an envelope produced by Encrypt under the same
|
|
// skillKey. Returns the plaintext or one of the sentinel errors.
|
|
//
|
|
// Caller MUST inspect the storage row's encryption_key_version BEFORE
|
|
// calling Decrypt. Version 0 means plaintext — Decrypt SHOULD NOT be
|
|
// called for version-0 rows (callers branch on the column value).
|
|
// This function does NOT inspect any version column; it only looks at
|
|
// the in-band envelope-format byte.
|
|
func Decrypt(skillKey, envelope []byte) ([]byte, error) {
|
|
if len(skillKey) != 32 {
|
|
return nil, fmt.Errorf("skilltools: Decrypt requires 32-byte key, got %d", len(skillKey))
|
|
}
|
|
if len(envelope) < 1+gcmNonceSize {
|
|
return nil, ErrEncryptionShortInput
|
|
}
|
|
if envelope[0] != envelopeFormatV1 {
|
|
return nil, ErrEncryptionUnknownVersion
|
|
}
|
|
nonce := envelope[1 : 1+gcmNonceSize]
|
|
ciphertext := envelope[1+gcmNonceSize:]
|
|
block, err := aes.NewCipher(skillKey)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("skilltools: aes.NewCipher: %w", err)
|
|
}
|
|
gcm, err := cipher.NewGCM(block)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("skilltools: cipher.NewGCM: %w", err)
|
|
}
|
|
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
|
|
if err != nil {
|
|
// Distinguish auth-tag mismatch from other crypto errors so
|
|
// callers can surface "data corruption" specifically. The
|
|
// stdlib wraps the failure as a generic error; we map any
|
|
// failure here to ErrEncryptionTampered (the most likely
|
|
// cause is wrong key / tampered bytes).
|
|
return nil, ErrEncryptionTampered
|
|
}
|
|
return plaintext, nil
|
|
}
|