fix: address verified gadfly P4/#4 findings (audit/budget/persona)

Security (all 3 models — HIGH): audit OnTool persisted raw tool args + results verbatim for the very tools the OnStep narration-redaction flags as secret (mcp_call/email_send/http_*) — the args/results are what CARRY the secret, so they landed in skill_run_logs unredacted. Factored the predicate into isSecretTool() (single source of truth) and OnTool now emits args_redacted/result_redacted (+ lengths) for secret tools. Test asserts no secret reaches the log. (persona) webhook_ip_allowlist entries are now CIDR/IP-validated at load (malformed dropped + warned) instead of accepted raw. Contract correctness (glm-5.2 + deepseek) — audit Memory now honors its documented Storage contract: ListChildrenByParent/ListFinishedRunsBefore return oldest-first; WalkParentChain returns root-first and honors MaxParentChainDepth; ListRunsFiltered clamps limit (<=0 or >500 -> 50); ListFinishedRunsBefore with limit<=0 returns none; an explicit RunFilter.Status (incl. "dry_run") matches regardless of IncludeDryRun; LastRunBySkills counts only status=="ok" unless includeFailed. (PurgeOlderThan's FinishedAt key is the SAFE behavior — in-flight runs retained — so the doc was aligned to it, not the impl.) Error-handling: appendLog now uses a bounded context (auditAppendTimeout=3s) so a hung backend can't block the run goroutine on the hot path; Sink.StartRun logs its (still best-effort) failure instead of swallowing it; budget Memory.Get uses RLock (RWMutex); budget package doc fixed (was skillexec's); Check uses the budgetWindow constant, not a duplicated literal. Triaged false-positive: NewNoOpBudget returning BudgetTracker is assignable to run.Budget (identical method sets) — no change needed. Core go.sum still free of host/DB deps. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 23:44:34 -04:00
parent 2260480c81
commit d82cef46b4
8 changed files with 197 additions and 24 deletions
@@ -0,0 +1,58 @@
+package audit
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// TestOnToolRedactsSecretTools: a secret-bearing tool's args/result must NOT be
+// persisted verbatim in the audit log.
+func TestOnToolRedactsSecretTools(t *testing.T) {
+	ctx := context.Background()
+	mem := NewMemory()
+	mem.StartRun(ctx, SkillRun{ID: "r1"})
+	w := NewWriter(mem, "r1")
+
+	secret := `{"url":"https://x","headers":{"Authorization":"Bearer SUPERSECRET"}}`
+	w.OnTool(llm.ToolCall{Name: "http_get", ID: "1", Arguments: []byte(secret)}, "TOPSECRETBODY")
+	// a non-secret tool is logged verbatim
+	w.OnTool(llm.ToolCall{Name: "think", ID: "2", Arguments: []byte(`{"thought":"hi"}`)}, "ok")
+
+	logs, _ := mem.ListLogsByRun(ctx, "r1")
+	var dump strings.Builder
+	for _, l := range logs {
+		for k, v := range l.Payload {
+			dump.WriteString(k)
+			dump.WriteString("=")
+			if s, ok := v.(string); ok {
+				dump.WriteString(s)
+			}
+			dump.WriteString(" ")
+		}
+	}
+	all := dump.String()
+	if strings.Contains(all, "SUPERSECRET") || strings.Contains(all, "TOPSECRETBODY") {
+		t.Fatalf("secret leaked into audit log: %s", all)
+	}
+	// the redaction marker is present, and the non-secret tool's args survive
+	foundRedacted, foundThink := false, false
+	for _, l := range logs {
+		if l.EventType == "tool_call" {
+			if r, _ := l.Payload["args_redacted"].(bool); r {
+				foundRedacted = true
+			}
+			if a, _ := l.Payload["args"].(string); strings.Contains(a, "thought") {
+				foundThink = true
+			}
+		}
+	}
+	if !foundRedacted {
+		t.Error("secret tool_call should carry args_redacted=true")
+	}
+	if !foundThink {
+		t.Error("non-secret tool args should be logged verbatim")
+	}
+}