fix: address verified gadfly P4/#4 findings (audit/budget/persona)
executus CI / test (push) Failing after 1m4s

Security (all 3 models — HIGH): audit OnTool persisted raw tool args + results
verbatim for the very tools the OnStep narration-redaction flags as secret
(mcp_call/email_send/http_*) — the args/results are what CARRY the secret, so
they landed in skill_run_logs unredacted. Factored the predicate into
isSecretTool() (single source of truth) and OnTool now emits
args_redacted/result_redacted (+ lengths) for secret tools. Test asserts no
secret reaches the log. (persona) webhook_ip_allowlist entries are now
CIDR/IP-validated at load (malformed dropped + warned) instead of accepted raw.

Contract correctness (glm-5.2 + deepseek) — audit Memory now honors its
documented Storage contract: ListChildrenByParent/ListFinishedRunsBefore return
oldest-first; WalkParentChain returns root-first and honors MaxParentChainDepth;
ListRunsFiltered clamps limit (<=0 or >500 -> 50); ListFinishedRunsBefore with
limit<=0 returns none; an explicit RunFilter.Status (incl. "dry_run") matches
regardless of IncludeDryRun; LastRunBySkills counts only status=="ok" unless
includeFailed. (PurgeOlderThan's FinishedAt key is the SAFE behavior — in-flight
runs retained — so the doc was aligned to it, not the impl.)

Error-handling: appendLog now uses a bounded context (auditAppendTimeout=3s) so
a hung backend can't block the run goroutine on the hot path; Sink.StartRun
logs its (still best-effort) failure instead of swallowing it; budget Memory.Get
uses RLock (RWMutex); budget package doc fixed (was skillexec's); Check uses the
budgetWindow constant, not a duplicated literal.

Triaged false-positive: NewNoOpBudget returning BudgetTracker is assignable to
run.Budget (identical method sets) — no change needed.

Core go.sum still free of host/DB deps.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-26 23:44:34 -04:00
parent 2260480c81
commit d82cef46b4
8 changed files with 197 additions and 24 deletions
+34 -9
View File
@@ -99,6 +99,19 @@ func (m *Memory) newestFirst(keep func(SkillRun) bool) []SkillRun {
return out return out
} }
// oldestFirst returns the retained runs in insertion (oldest-first) order,
// optionally filtered. Caller holds at least RLock.
func (m *Memory) oldestFirst(keep func(SkillRun) bool) []SkillRun {
out := make([]SkillRun, 0, len(m.order))
for _, id := range m.order {
r := m.runs[id]
if keep == nil || keep(r) {
out = append(out, r)
}
}
return out
}
func page(rs []SkillRun, offset, limit int) []SkillRun { func page(rs []SkillRun, offset, limit int) []SkillRun {
if offset < 0 { if offset < 0 {
offset = 0 offset = 0
@@ -142,10 +155,12 @@ func (m *Memory) ListRunsByCaller(_ context.Context, callerID string, limit int)
} }
func (m *Memory) matchesFilter(r SkillRun, f RunFilter) bool { func (m *Memory) matchesFilter(r SkillRun, f RunFilter) bool {
if !f.IncludeDryRun && r.Status == "dry_run" { if f.Status != "" {
return false if r.Status != f.Status {
} return false
if f.Status != "" && r.Status != f.Status { }
// An explicit Status (even "dry_run") matches regardless of IncludeDryRun.
} else if !f.IncludeDryRun && r.Status == "dry_run" {
return false return false
} }
if f.SkillID != "" && r.SkillID != f.SkillID { if f.SkillID != "" && r.SkillID != f.SkillID {
@@ -170,6 +185,9 @@ func (m *Memory) matchesFilter(r SkillRun, f RunFilter) bool {
} }
func (m *Memory) ListRunsFiltered(_ context.Context, f RunFilter, offset, limit int) ([]SkillRun, error) { func (m *Memory) ListRunsFiltered(_ context.Context, f RunFilter, offset, limit int) ([]SkillRun, error) {
if limit <= 0 || limit > 500 {
limit = 50 // bound admin scans, per the Storage contract
}
m.mu.RLock() m.mu.RLock()
defer m.mu.RUnlock() defer m.mu.RUnlock()
return page(m.newestFirst(func(r SkillRun) bool { return m.matchesFilter(r, f) }), offset, limit), nil return page(m.newestFirst(func(r SkillRun) bool { return m.matchesFilter(r, f) }), offset, limit), nil
@@ -203,7 +221,7 @@ func (m *Memory) PurgeOlderThan(_ context.Context, t time.Time) (int64, error) {
func (m *Memory) ListChildrenByParent(_ context.Context, parentRunID string) ([]SkillRun, error) { func (m *Memory) ListChildrenByParent(_ context.Context, parentRunID string) ([]SkillRun, error) {
m.mu.RLock() m.mu.RLock()
defer m.mu.RUnlock() defer m.mu.RUnlock()
return m.newestFirst(func(r SkillRun) bool { return r.ParentRunID == parentRunID }), nil return m.oldestFirst(func(r SkillRun) bool { return r.ParentRunID == parentRunID }), nil
} }
func (m *Memory) WalkParentChain(_ context.Context, runID string) ([]SkillRun, error) { func (m *Memory) WalkParentChain(_ context.Context, runID string) ([]SkillRun, error) {
@@ -211,7 +229,7 @@ func (m *Memory) WalkParentChain(_ context.Context, runID string) ([]SkillRun, e
defer m.mu.RUnlock() defer m.mu.RUnlock()
var chain []SkillRun var chain []SkillRun
seen := map[string]bool{} seen := map[string]bool{}
for id := runID; id != ""; { for id := runID; id != "" && len(chain) < MaxParentChainDepth; {
r, ok := m.runs[id] r, ok := m.runs[id]
if !ok || seen[id] { if !ok || seen[id] {
break break
@@ -220,13 +238,20 @@ func (m *Memory) WalkParentChain(_ context.Context, runID string) ([]SkillRun, e
chain = append(chain, r) chain = append(chain, r)
id = r.ParentRunID id = r.ParentRunID
} }
// Contract: root first, the queried run last. We walked child→root, so reverse.
for i, j := 0, len(chain)-1; i < j; i, j = i+1, j-1 {
chain[i], chain[j] = chain[j], chain[i]
}
return chain, nil return chain, nil
} }
func (m *Memory) ListFinishedRunsBefore(_ context.Context, cutoff time.Time, limit int) ([]SkillRun, error) { func (m *Memory) ListFinishedRunsBefore(_ context.Context, cutoff time.Time, limit int) ([]SkillRun, error) {
if limit <= 0 {
return nil, nil // contract: a real bound is required
}
m.mu.RLock() m.mu.RLock()
defer m.mu.RUnlock() defer m.mu.RUnlock()
return page(m.newestFirst(func(r SkillRun) bool { return page(m.oldestFirst(func(r SkillRun) bool {
return r.FinishedAt != nil && r.FinishedAt.Before(cutoff) return r.FinishedAt != nil && r.FinishedAt.Before(cutoff)
}), 0, limit), nil }), 0, limit), nil
} }
@@ -244,8 +269,8 @@ func (m *Memory) LastRunBySkills(_ context.Context, skillIDs []string, includeFa
if !want[r.SkillID] { if !want[r.SkillID] {
continue continue
} }
if !includeFailed && (r.Status == "error" || r.Status == "timeout") { if !includeFailed && r.Status != "ok" {
continue continue // contract: only status=="ok" counts unless includeFailed
} }
if r.StartedAt.After(out[r.SkillID]) { if r.StartedAt.After(out[r.SkillID]) {
out[r.SkillID] = r.StartedAt out[r.SkillID] = r.StartedAt
+58
View File
@@ -0,0 +1,58 @@
package audit
import (
"context"
"strings"
"testing"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// TestOnToolRedactsSecretTools: a secret-bearing tool's args/result must NOT be
// persisted verbatim in the audit log.
func TestOnToolRedactsSecretTools(t *testing.T) {
ctx := context.Background()
mem := NewMemory()
mem.StartRun(ctx, SkillRun{ID: "r1"})
w := NewWriter(mem, "r1")
secret := `{"url":"https://x","headers":{"Authorization":"Bearer SUPERSECRET"}}`
w.OnTool(llm.ToolCall{Name: "http_get", ID: "1", Arguments: []byte(secret)}, "TOPSECRETBODY")
// a non-secret tool is logged verbatim
w.OnTool(llm.ToolCall{Name: "think", ID: "2", Arguments: []byte(`{"thought":"hi"}`)}, "ok")
logs, _ := mem.ListLogsByRun(ctx, "r1")
var dump strings.Builder
for _, l := range logs {
for k, v := range l.Payload {
dump.WriteString(k)
dump.WriteString("=")
if s, ok := v.(string); ok {
dump.WriteString(s)
}
dump.WriteString(" ")
}
}
all := dump.String()
if strings.Contains(all, "SUPERSECRET") || strings.Contains(all, "TOPSECRETBODY") {
t.Fatalf("secret leaked into audit log: %s", all)
}
// the redaction marker is present, and the non-secret tool's args survive
foundRedacted, foundThink := false, false
for _, l := range logs {
if l.EventType == "tool_call" {
if r, _ := l.Payload["args_redacted"].(bool); r {
foundRedacted = true
}
if a, _ := l.Payload["args"].(string); strings.Contains(a, "thought") {
foundThink = true
}
}
}
if !foundRedacted {
t.Error("secret tool_call should carry args_redacted=true")
}
if !foundThink {
t.Error("non-secret tool args should be logged verbatim")
}
}
+8 -3
View File
@@ -2,6 +2,7 @@ package audit
import ( import (
"context" "context"
"log/slog"
"time" "time"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm" "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
@@ -36,8 +37,9 @@ func (s *Sink) StartRun(ctx context.Context, info run.RunInfo) run.RunRecorder {
if started.IsZero() { if started.IsZero() {
started = time.Now() started = time.Now()
} }
// Best-effort: a failed StartRun must not break the user-visible run. // Best-effort: a failed StartRun must not break the user-visible run, but we
_ = s.storage.StartRun(ctx, SkillRun{ // surface it (a swallowed failure leaves orphan log events with no run row).
if err := s.storage.StartRun(ctx, SkillRun{
ID: info.RunID, ID: info.RunID,
SkillID: info.SubjectID, SkillID: info.SubjectID,
CallerID: info.CallerID, CallerID: info.CallerID,
@@ -46,7 +48,10 @@ func (s *Sink) StartRun(ctx context.Context, info run.RunInfo) run.RunRecorder {
Inputs: info.Inputs, Inputs: info.Inputs,
StartedAt: started, StartedAt: started,
Status: "running", Status: "running",
}) }); err != nil {
slog.Warn("audit: StartRun failed; the run row is missing so its log events will orphan",
"run_id", info.RunID, "error", err)
}
return &recorder{w: NewWriter(s.storage, info.RunID)} return &recorder{w: NewWriter(s.storage, info.RunID)}
} }
+43 -6
View File
@@ -168,16 +168,26 @@ func (w *Writer) OnStep(iter int, resp *llm.Response) {
// surrounding narration could leak a secret (MCP args, email body/ // surrounding narration could leak a secret (MCP args, email body/
// recipients, raw HTTP request). Mirrors the steps.go redaction list so // recipients, raw HTTP request). Mirrors the steps.go redaction list so
// the audit trace never persists secret-adjacent assistant text. // the audit trace never persists secret-adjacent assistant text.
// isSecretTool reports whether a tool's arguments/results may carry secrets
// (MCP args, email bodies/recipients, HTTP auth headers/bodies) and so must be
// redacted from the persisted audit log. Single source of truth for both the
// step-narration redaction and the OnTool arg/result redaction. NOTE: this is
// a name-prefix allowlist — a NEW secret-bearing tool must be added here or its
// args/results will be logged verbatim.
func isSecretTool(name string) bool {
switch name {
case "mcp_call", "email_send":
return true
}
return strings.HasPrefix(name, "http_")
}
func stepHasSecretTool(resp *llm.Response) bool { func stepHasSecretTool(resp *llm.Response) bool {
if resp == nil { if resp == nil {
return false return false
} }
for _, c := range resp.ToolCalls { for _, c := range resp.ToolCalls {
switch c.Name { if isSecretTool(c.Name) {
case "mcp_call", "email_send":
return true
}
if strings.HasPrefix(c.Name, "http_") {
return true return true
} }
} }
@@ -211,6 +221,24 @@ func (w *Writer) OnTool(call llm.ToolCall, result string) {
return return
} }
w.calls.Add(1) w.calls.Add(1)
// Redact the args/result of secret-bearing tools — these fields actually
// CARRY the secret (MCP args, email body/recipients, HTTP auth/body), so
// logging them verbatim would defeat the OnStep narration redaction.
if isSecretTool(call.Name) {
w.appendLog("tool_call", map[string]any{
"name": call.Name,
"id": call.ID,
"args_redacted": true,
"args_len": len(call.Arguments),
})
w.appendLog("tool_result", map[string]any{
"name": call.Name,
"id": call.ID,
"result_redacted": true,
"result_len": len(result),
})
return
}
w.appendLog("tool_call", map[string]any{ w.appendLog("tool_call", map[string]any{
"name": call.Name, "name": call.Name,
"args": string(call.Arguments), "args": string(call.Arguments),
@@ -296,6 +324,10 @@ func (w *Writer) Close(ctx context.Context, stats RunStats) {
// hung connection that the run goroutine shouldn't keep waiting on. // hung connection that the run goroutine shouldn't keep waiting on.
const auditFinishTimeout = 10 * time.Second const auditFinishTimeout = 10 * time.Second
// auditAppendTimeout bounds each per-event AppendLog on the hot path so a hung
// storage backend can't block the run goroutine.
const auditAppendTimeout = 3 * time.Second
// ToolCallsCount returns how many tool invocations OnTool has seen so // ToolCallsCount returns how many tool invocations OnTool has seen so
// far. Useful for budget enforcement. // far. Useful for budget enforcement.
func (w *Writer) ToolCallsCount() int { return int(w.calls.Load()) } func (w *Writer) ToolCallsCount() int { return int(w.calls.Load()) }
@@ -309,7 +341,12 @@ func (w *Writer) appendLog(eventType string, payload map[string]any) {
Payload: payload, Payload: payload,
CreatedAt: time.Now(), CreatedAt: time.Now(),
} }
if err := w.storage.AppendLog(context.Background(), log); err != nil { // Bound the write: a hung storage backend must not block the run goroutine
// on the hot path (every step/tool event flows through here). Detached from
// any caller deadline — the log write is independent of the run's context.
ctx, cancel := context.WithTimeout(context.Background(), auditAppendTimeout)
defer cancel()
if err := w.storage.AppendLog(ctx, log); err != nil {
slog.Warn("skillaudit: AppendLog failed", "run_id", w.runID, "seq", seq, "type", eventType, "error", err) slog.Warn("skillaudit: AppendLog failed", "run_id", w.runID, "seq", seq, "type", eventType, "error", err)
} }
} }
+4 -2
View File
@@ -1,4 +1,6 @@
// Package skillexec runs saved Skill definitions via majordomo's agent // Package budget gates and meters per-caller resource use over a rolling
// 7-day window (run.Ports.Budget). DBBudget is the durable tracker; NoOpBudget
// disables metering; the BudgetStorage seam backs it (Memory / contrib SQLite).
// loop (gitea.stevedudenhoeffer.com/steve/majordomo/agent). // loop (gitea.stevedudenhoeffer.com/steve/majordomo/agent).
// //
// Why: a Skill is data; the executor turns data into a running agent // Why: a Skill is data; the executor turns data into a running agent
@@ -130,7 +132,7 @@ func (b *DBBudget) Check(ctx context.Context, callerID string) error {
return fmt.Errorf("budget: %w", err) return fmt.Errorf("budget: %w", err)
} }
if bud != nil { if bud != nil {
if b.now().Sub(bud.WindowStart) < 7*24*time.Hour { if b.now().Sub(bud.WindowStart) < budgetWindow {
cap := b.weeklyLimit() cap := b.weeklyLimit()
if cap > 0 && bud.SecondsUsed >= cap { if cap > 0 && bud.SecondsUsed >= cap {
if b.notify != nil { if b.notify != nil {
+3 -3
View File
@@ -10,7 +10,7 @@ import (
// usage held in memory (lost on restart). The default behind DBBudget for a // usage held in memory (lost on restart). The default behind DBBudget for a
// light host or tests; mort uses its GORM Storage, contrib/store adds SQLite. // light host or tests; mort uses its GORM Storage, contrib/store adds SQLite.
type Memory struct { type Memory struct {
mu sync.Mutex mu sync.RWMutex
rows map[string]*SkillBudget rows map[string]*SkillBudget
} }
@@ -22,8 +22,8 @@ var _ BudgetStorage = (*Memory)(nil)
func (m *Memory) Initialize(context.Context) error { return nil } func (m *Memory) Initialize(context.Context) error { return nil }
func (m *Memory) Get(_ context.Context, userID string) (*SkillBudget, error) { func (m *Memory) Get(_ context.Context, userID string) (*SkillBudget, error) {
m.mu.Lock() m.mu.RLock()
defer m.mu.Unlock() defer m.mu.RUnlock()
r, ok := m.rows[userID] r, ok := m.rows[userID]
if !ok { if !ok {
return nil, nil return nil, nil
+30 -1
View File
@@ -35,6 +35,7 @@ import (
"fmt" "fmt"
"io/fs" "io/fs"
"log/slog" "log/slog"
"net"
"path" "path"
"strings" "strings"
"time" "time"
@@ -540,6 +541,10 @@ func decodeAgentManifest(data []byte) (*Agent, error) {
}) })
} }
// Validate the webhook IP allow-list (CIDR or bare IP); drop + warn on
// malformed entries so a typo can't silently widen or void the allow-list.
allowlist := validateIPAllowlist(m.WebhookIPAllowlist, m.Name)
ag := &Agent{ ag := &Agent{
Name: strings.TrimSpace(m.Name), Name: strings.TrimSpace(m.Name),
Description: m.Description, Description: m.Description,
@@ -559,7 +564,7 @@ func decodeAgentManifest(data []byte) (*Agent, error) {
LowLevelTools: m.LowLevelTools, LowLevelTools: m.LowLevelTools,
PersonalizationSources: m.PersonalizationSources, PersonalizationSources: m.PersonalizationSources,
Schedule: strings.TrimSpace(m.Schedule), Schedule: strings.TrimSpace(m.Schedule),
WebhookIPAllowlist: m.WebhookIPAllowlist, WebhookIPAllowlist: allowlist,
ChatbotChannelFilter: strings.TrimSpace(m.ChatbotChannelFilter), ChatbotChannelFilter: strings.TrimSpace(m.ChatbotChannelFilter),
DefaultEmoji: m.DefaultEmoji, DefaultEmoji: m.DefaultEmoji,
StateReactEmoji: m.StateReact, StateReactEmoji: m.StateReact,
@@ -568,3 +573,27 @@ func decodeAgentManifest(data []byte) (*Agent, error) {
} }
return ag, nil return ag, nil
} }
// validateIPAllowlist keeps only entries that parse as a CIDR block or a bare
// IP; malformed entries are dropped with a warning (a typo must not silently
// widen or void the webhook allow-list). The struct field documents "CIDR
// strings", so this enforces it at load time.
func validateIPAllowlist(entries []string, agent string) []string {
var out []string
for _, e := range entries {
e = strings.TrimSpace(e)
if e == "" {
continue
}
if _, _, err := net.ParseCIDR(e); err == nil {
out = append(out, e)
continue
}
if ip := net.ParseIP(e); ip != nil {
out = append(out, e)
continue
}
slog.Warn("agents: dropping malformed webhook_ip_allowlist entry (not a CIDR or IP)", "agent", agent, "entry", e)
}
return out
}
+17
View File
@@ -0,0 +1,17 @@
package persona
import "testing"
func TestValidateIPAllowlist(t *testing.T) {
in := []string{"10.0.0.0/8", " 192.168.1.5 ", "not-an-ip", "", "2001:db8::/32", "garbage/99"}
got := validateIPAllowlist(in, "test")
want := map[string]bool{"10.0.0.0/8": true, "192.168.1.5": true, "2001:db8::/32": true}
if len(got) != len(want) {
t.Fatalf("got %v, want %d valid entries", got, len(want))
}
for _, e := range got {
if !want[e] {
t.Errorf("unexpected entry kept: %q", e)
}
}
}