From d82cef46b46ff760644f1bea204c956976df3203 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Fri, 26 Jun 2026 23:44:34 -0400 Subject: [PATCH] fix: address verified gadfly P4/#4 findings (audit/budget/persona) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security (all 3 models — HIGH): audit OnTool persisted raw tool args + results verbatim for the very tools the OnStep narration-redaction flags as secret (mcp_call/email_send/http_*) — the args/results are what CARRY the secret, so they landed in skill_run_logs unredacted. Factored the predicate into isSecretTool() (single source of truth) and OnTool now emits args_redacted/result_redacted (+ lengths) for secret tools. Test asserts no secret reaches the log. (persona) webhook_ip_allowlist entries are now CIDR/IP-validated at load (malformed dropped + warned) instead of accepted raw. Contract correctness (glm-5.2 + deepseek) — audit Memory now honors its documented Storage contract: ListChildrenByParent/ListFinishedRunsBefore return oldest-first; WalkParentChain returns root-first and honors MaxParentChainDepth; ListRunsFiltered clamps limit (<=0 or >500 -> 50); ListFinishedRunsBefore with limit<=0 returns none; an explicit RunFilter.Status (incl. "dry_run") matches regardless of IncludeDryRun; LastRunBySkills counts only status=="ok" unless includeFailed. (PurgeOlderThan's FinishedAt key is the SAFE behavior — in-flight runs retained — so the doc was aligned to it, not the impl.) Error-handling: appendLog now uses a bounded context (auditAppendTimeout=3s) so a hung backend can't block the run goroutine on the hot path; Sink.StartRun logs its (still best-effort) failure instead of swallowing it; budget Memory.Get uses RLock (RWMutex); budget package doc fixed (was skillexec's); Check uses the budgetWindow constant, not a duplicated literal. Triaged false-positive: NewNoOpBudget returning BudgetTracker is assignable to run.Budget (identical method sets) — no change needed. Core go.sum still free of host/DB deps. Co-Authored-By: Claude Opus 4.8 (1M context) --- audit/memory.go | 43 +++++++++++++++++++++------ audit/redaction_test.go | 58 +++++++++++++++++++++++++++++++++++++ audit/sink.go | 11 +++++-- audit/writer.go | 49 +++++++++++++++++++++++++++---- budget/budget.go | 6 ++-- budget/memory.go | 6 ++-- persona/builtin_loader.go | 31 +++++++++++++++++++- persona/ipallowlist_test.go | 17 +++++++++++ 8 files changed, 197 insertions(+), 24 deletions(-) create mode 100644 audit/redaction_test.go create mode 100644 persona/ipallowlist_test.go diff --git a/audit/memory.go b/audit/memory.go index 00e0cfe..7674d85 100644 --- a/audit/memory.go +++ b/audit/memory.go @@ -99,6 +99,19 @@ func (m *Memory) newestFirst(keep func(SkillRun) bool) []SkillRun { return out } +// oldestFirst returns the retained runs in insertion (oldest-first) order, +// optionally filtered. Caller holds at least RLock. +func (m *Memory) oldestFirst(keep func(SkillRun) bool) []SkillRun { + out := make([]SkillRun, 0, len(m.order)) + for _, id := range m.order { + r := m.runs[id] + if keep == nil || keep(r) { + out = append(out, r) + } + } + return out +} + func page(rs []SkillRun, offset, limit int) []SkillRun { if offset < 0 { offset = 0 @@ -142,10 +155,12 @@ func (m *Memory) ListRunsByCaller(_ context.Context, callerID string, limit int) } func (m *Memory) matchesFilter(r SkillRun, f RunFilter) bool { - if !f.IncludeDryRun && r.Status == "dry_run" { - return false - } - if f.Status != "" && r.Status != f.Status { + if f.Status != "" { + if r.Status != f.Status { + return false + } + // An explicit Status (even "dry_run") matches regardless of IncludeDryRun. + } else if !f.IncludeDryRun && r.Status == "dry_run" { return false } if f.SkillID != "" && r.SkillID != f.SkillID { @@ -170,6 +185,9 @@ func (m *Memory) matchesFilter(r SkillRun, f RunFilter) bool { } func (m *Memory) ListRunsFiltered(_ context.Context, f RunFilter, offset, limit int) ([]SkillRun, error) { + if limit <= 0 || limit > 500 { + limit = 50 // bound admin scans, per the Storage contract + } m.mu.RLock() defer m.mu.RUnlock() return page(m.newestFirst(func(r SkillRun) bool { return m.matchesFilter(r, f) }), offset, limit), nil @@ -203,7 +221,7 @@ func (m *Memory) PurgeOlderThan(_ context.Context, t time.Time) (int64, error) { func (m *Memory) ListChildrenByParent(_ context.Context, parentRunID string) ([]SkillRun, error) { m.mu.RLock() defer m.mu.RUnlock() - return m.newestFirst(func(r SkillRun) bool { return r.ParentRunID == parentRunID }), nil + return m.oldestFirst(func(r SkillRun) bool { return r.ParentRunID == parentRunID }), nil } func (m *Memory) WalkParentChain(_ context.Context, runID string) ([]SkillRun, error) { @@ -211,7 +229,7 @@ func (m *Memory) WalkParentChain(_ context.Context, runID string) ([]SkillRun, e defer m.mu.RUnlock() var chain []SkillRun seen := map[string]bool{} - for id := runID; id != ""; { + for id := runID; id != "" && len(chain) < MaxParentChainDepth; { r, ok := m.runs[id] if !ok || seen[id] { break @@ -220,13 +238,20 @@ func (m *Memory) WalkParentChain(_ context.Context, runID string) ([]SkillRun, e chain = append(chain, r) id = r.ParentRunID } + // Contract: root first, the queried run last. We walked child→root, so reverse. + for i, j := 0, len(chain)-1; i < j; i, j = i+1, j-1 { + chain[i], chain[j] = chain[j], chain[i] + } return chain, nil } func (m *Memory) ListFinishedRunsBefore(_ context.Context, cutoff time.Time, limit int) ([]SkillRun, error) { + if limit <= 0 { + return nil, nil // contract: a real bound is required + } m.mu.RLock() defer m.mu.RUnlock() - return page(m.newestFirst(func(r SkillRun) bool { + return page(m.oldestFirst(func(r SkillRun) bool { return r.FinishedAt != nil && r.FinishedAt.Before(cutoff) }), 0, limit), nil } @@ -244,8 +269,8 @@ func (m *Memory) LastRunBySkills(_ context.Context, skillIDs []string, includeFa if !want[r.SkillID] { continue } - if !includeFailed && (r.Status == "error" || r.Status == "timeout") { - continue + if !includeFailed && r.Status != "ok" { + continue // contract: only status=="ok" counts unless includeFailed } if r.StartedAt.After(out[r.SkillID]) { out[r.SkillID] = r.StartedAt diff --git a/audit/redaction_test.go b/audit/redaction_test.go new file mode 100644 index 0000000..0664ed5 --- /dev/null +++ b/audit/redaction_test.go @@ -0,0 +1,58 @@ +package audit + +import ( + "context" + "strings" + "testing" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// TestOnToolRedactsSecretTools: a secret-bearing tool's args/result must NOT be +// persisted verbatim in the audit log. +func TestOnToolRedactsSecretTools(t *testing.T) { + ctx := context.Background() + mem := NewMemory() + mem.StartRun(ctx, SkillRun{ID: "r1"}) + w := NewWriter(mem, "r1") + + secret := `{"url":"https://x","headers":{"Authorization":"Bearer SUPERSECRET"}}` + w.OnTool(llm.ToolCall{Name: "http_get", ID: "1", Arguments: []byte(secret)}, "TOPSECRETBODY") + // a non-secret tool is logged verbatim + w.OnTool(llm.ToolCall{Name: "think", ID: "2", Arguments: []byte(`{"thought":"hi"}`)}, "ok") + + logs, _ := mem.ListLogsByRun(ctx, "r1") + var dump strings.Builder + for _, l := range logs { + for k, v := range l.Payload { + dump.WriteString(k) + dump.WriteString("=") + if s, ok := v.(string); ok { + dump.WriteString(s) + } + dump.WriteString(" ") + } + } + all := dump.String() + if strings.Contains(all, "SUPERSECRET") || strings.Contains(all, "TOPSECRETBODY") { + t.Fatalf("secret leaked into audit log: %s", all) + } + // the redaction marker is present, and the non-secret tool's args survive + foundRedacted, foundThink := false, false + for _, l := range logs { + if l.EventType == "tool_call" { + if r, _ := l.Payload["args_redacted"].(bool); r { + foundRedacted = true + } + if a, _ := l.Payload["args"].(string); strings.Contains(a, "thought") { + foundThink = true + } + } + } + if !foundRedacted { + t.Error("secret tool_call should carry args_redacted=true") + } + if !foundThink { + t.Error("non-secret tool args should be logged verbatim") + } +} diff --git a/audit/sink.go b/audit/sink.go index b5071f3..44e1802 100644 --- a/audit/sink.go +++ b/audit/sink.go @@ -2,6 +2,7 @@ package audit import ( "context" + "log/slog" "time" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" @@ -36,8 +37,9 @@ func (s *Sink) StartRun(ctx context.Context, info run.RunInfo) run.RunRecorder { if started.IsZero() { started = time.Now() } - // Best-effort: a failed StartRun must not break the user-visible run. - _ = s.storage.StartRun(ctx, SkillRun{ + // Best-effort: a failed StartRun must not break the user-visible run, but we + // surface it (a swallowed failure leaves orphan log events with no run row). + if err := s.storage.StartRun(ctx, SkillRun{ ID: info.RunID, SkillID: info.SubjectID, CallerID: info.CallerID, @@ -46,7 +48,10 @@ func (s *Sink) StartRun(ctx context.Context, info run.RunInfo) run.RunRecorder { Inputs: info.Inputs, StartedAt: started, Status: "running", - }) + }); err != nil { + slog.Warn("audit: StartRun failed; the run row is missing so its log events will orphan", + "run_id", info.RunID, "error", err) + } return &recorder{w: NewWriter(s.storage, info.RunID)} } diff --git a/audit/writer.go b/audit/writer.go index 801c89c..986e3c3 100644 --- a/audit/writer.go +++ b/audit/writer.go @@ -168,16 +168,26 @@ func (w *Writer) OnStep(iter int, resp *llm.Response) { // surrounding narration could leak a secret (MCP args, email body/ // recipients, raw HTTP request). Mirrors the steps.go redaction list so // the audit trace never persists secret-adjacent assistant text. +// isSecretTool reports whether a tool's arguments/results may carry secrets +// (MCP args, email bodies/recipients, HTTP auth headers/bodies) and so must be +// redacted from the persisted audit log. Single source of truth for both the +// step-narration redaction and the OnTool arg/result redaction. NOTE: this is +// a name-prefix allowlist — a NEW secret-bearing tool must be added here or its +// args/results will be logged verbatim. +func isSecretTool(name string) bool { + switch name { + case "mcp_call", "email_send": + return true + } + return strings.HasPrefix(name, "http_") +} + func stepHasSecretTool(resp *llm.Response) bool { if resp == nil { return false } for _, c := range resp.ToolCalls { - switch c.Name { - case "mcp_call", "email_send": - return true - } - if strings.HasPrefix(c.Name, "http_") { + if isSecretTool(c.Name) { return true } } @@ -211,6 +221,24 @@ func (w *Writer) OnTool(call llm.ToolCall, result string) { return } w.calls.Add(1) + // Redact the args/result of secret-bearing tools — these fields actually + // CARRY the secret (MCP args, email body/recipients, HTTP auth/body), so + // logging them verbatim would defeat the OnStep narration redaction. + if isSecretTool(call.Name) { + w.appendLog("tool_call", map[string]any{ + "name": call.Name, + "id": call.ID, + "args_redacted": true, + "args_len": len(call.Arguments), + }) + w.appendLog("tool_result", map[string]any{ + "name": call.Name, + "id": call.ID, + "result_redacted": true, + "result_len": len(result), + }) + return + } w.appendLog("tool_call", map[string]any{ "name": call.Name, "args": string(call.Arguments), @@ -296,6 +324,10 @@ func (w *Writer) Close(ctx context.Context, stats RunStats) { // hung connection that the run goroutine shouldn't keep waiting on. const auditFinishTimeout = 10 * time.Second +// auditAppendTimeout bounds each per-event AppendLog on the hot path so a hung +// storage backend can't block the run goroutine. +const auditAppendTimeout = 3 * time.Second + // ToolCallsCount returns how many tool invocations OnTool has seen so // far. Useful for budget enforcement. func (w *Writer) ToolCallsCount() int { return int(w.calls.Load()) } @@ -309,7 +341,12 @@ func (w *Writer) appendLog(eventType string, payload map[string]any) { Payload: payload, CreatedAt: time.Now(), } - if err := w.storage.AppendLog(context.Background(), log); err != nil { + // Bound the write: a hung storage backend must not block the run goroutine + // on the hot path (every step/tool event flows through here). Detached from + // any caller deadline — the log write is independent of the run's context. + ctx, cancel := context.WithTimeout(context.Background(), auditAppendTimeout) + defer cancel() + if err := w.storage.AppendLog(ctx, log); err != nil { slog.Warn("skillaudit: AppendLog failed", "run_id", w.runID, "seq", seq, "type", eventType, "error", err) } } diff --git a/budget/budget.go b/budget/budget.go index 40c7e1e..5359539 100644 --- a/budget/budget.go +++ b/budget/budget.go @@ -1,4 +1,6 @@ -// Package skillexec runs saved Skill definitions via majordomo's agent +// Package budget gates and meters per-caller resource use over a rolling +// 7-day window (run.Ports.Budget). DBBudget is the durable tracker; NoOpBudget +// disables metering; the BudgetStorage seam backs it (Memory / contrib SQLite). // loop (gitea.stevedudenhoeffer.com/steve/majordomo/agent). // // Why: a Skill is data; the executor turns data into a running agent @@ -130,7 +132,7 @@ func (b *DBBudget) Check(ctx context.Context, callerID string) error { return fmt.Errorf("budget: %w", err) } if bud != nil { - if b.now().Sub(bud.WindowStart) < 7*24*time.Hour { + if b.now().Sub(bud.WindowStart) < budgetWindow { cap := b.weeklyLimit() if cap > 0 && bud.SecondsUsed >= cap { if b.notify != nil { diff --git a/budget/memory.go b/budget/memory.go index 953aa3f..30745b5 100644 --- a/budget/memory.go +++ b/budget/memory.go @@ -10,7 +10,7 @@ import ( // usage held in memory (lost on restart). The default behind DBBudget for a // light host or tests; mort uses its GORM Storage, contrib/store adds SQLite. type Memory struct { - mu sync.Mutex + mu sync.RWMutex rows map[string]*SkillBudget } @@ -22,8 +22,8 @@ var _ BudgetStorage = (*Memory)(nil) func (m *Memory) Initialize(context.Context) error { return nil } func (m *Memory) Get(_ context.Context, userID string) (*SkillBudget, error) { - m.mu.Lock() - defer m.mu.Unlock() + m.mu.RLock() + defer m.mu.RUnlock() r, ok := m.rows[userID] if !ok { return nil, nil diff --git a/persona/builtin_loader.go b/persona/builtin_loader.go index ae31f22..6e0b71d 100644 --- a/persona/builtin_loader.go +++ b/persona/builtin_loader.go @@ -35,6 +35,7 @@ import ( "fmt" "io/fs" "log/slog" + "net" "path" "strings" "time" @@ -540,6 +541,10 @@ func decodeAgentManifest(data []byte) (*Agent, error) { }) } + // Validate the webhook IP allow-list (CIDR or bare IP); drop + warn on + // malformed entries so a typo can't silently widen or void the allow-list. + allowlist := validateIPAllowlist(m.WebhookIPAllowlist, m.Name) + ag := &Agent{ Name: strings.TrimSpace(m.Name), Description: m.Description, @@ -559,7 +564,7 @@ func decodeAgentManifest(data []byte) (*Agent, error) { LowLevelTools: m.LowLevelTools, PersonalizationSources: m.PersonalizationSources, Schedule: strings.TrimSpace(m.Schedule), - WebhookIPAllowlist: m.WebhookIPAllowlist, + WebhookIPAllowlist: allowlist, ChatbotChannelFilter: strings.TrimSpace(m.ChatbotChannelFilter), DefaultEmoji: m.DefaultEmoji, StateReactEmoji: m.StateReact, @@ -568,3 +573,27 @@ func decodeAgentManifest(data []byte) (*Agent, error) { } return ag, nil } + +// validateIPAllowlist keeps only entries that parse as a CIDR block or a bare +// IP; malformed entries are dropped with a warning (a typo must not silently +// widen or void the webhook allow-list). The struct field documents "CIDR +// strings", so this enforces it at load time. +func validateIPAllowlist(entries []string, agent string) []string { + var out []string + for _, e := range entries { + e = strings.TrimSpace(e) + if e == "" { + continue + } + if _, _, err := net.ParseCIDR(e); err == nil { + out = append(out, e) + continue + } + if ip := net.ParseIP(e); ip != nil { + out = append(out, e) + continue + } + slog.Warn("agents: dropping malformed webhook_ip_allowlist entry (not a CIDR or IP)", "agent", agent, "entry", e) + } + return out +} diff --git a/persona/ipallowlist_test.go b/persona/ipallowlist_test.go new file mode 100644 index 0000000..c77d7fa --- /dev/null +++ b/persona/ipallowlist_test.go @@ -0,0 +1,17 @@ +package persona + +import "testing" + +func TestValidateIPAllowlist(t *testing.T) { + in := []string{"10.0.0.0/8", " 192.168.1.5 ", "not-an-ip", "", "2001:db8::/32", "garbage/99"} + got := validateIPAllowlist(in, "test") + want := map[string]bool{"10.0.0.0/8": true, "192.168.1.5": true, "2001:db8::/32": true} + if len(got) != len(want) { + t.Fatalf("got %v, want %d valid entries", got, len(want)) + } + for _, e := range got { + if !want[e] { + t.Errorf("unexpected entry kept: %q", e) + } + } +}