fix: address verified gadfly P4/#4 findings (audit/budget/persona)
executus CI / test (push) Failing after 1m4s
executus CI / test (push) Failing after 1m4s
Security (all 3 models — HIGH): audit OnTool persisted raw tool args + results verbatim for the very tools the OnStep narration-redaction flags as secret (mcp_call/email_send/http_*) — the args/results are what CARRY the secret, so they landed in skill_run_logs unredacted. Factored the predicate into isSecretTool() (single source of truth) and OnTool now emits args_redacted/result_redacted (+ lengths) for secret tools. Test asserts no secret reaches the log. (persona) webhook_ip_allowlist entries are now CIDR/IP-validated at load (malformed dropped + warned) instead of accepted raw. Contract correctness (glm-5.2 + deepseek) — audit Memory now honors its documented Storage contract: ListChildrenByParent/ListFinishedRunsBefore return oldest-first; WalkParentChain returns root-first and honors MaxParentChainDepth; ListRunsFiltered clamps limit (<=0 or >500 -> 50); ListFinishedRunsBefore with limit<=0 returns none; an explicit RunFilter.Status (incl. "dry_run") matches regardless of IncludeDryRun; LastRunBySkills counts only status=="ok" unless includeFailed. (PurgeOlderThan's FinishedAt key is the SAFE behavior — in-flight runs retained — so the doc was aligned to it, not the impl.) Error-handling: appendLog now uses a bounded context (auditAppendTimeout=3s) so a hung backend can't block the run goroutine on the hot path; Sink.StartRun logs its (still best-effort) failure instead of swallowing it; budget Memory.Get uses RLock (RWMutex); budget package doc fixed (was skillexec's); Check uses the budgetWindow constant, not a duplicated literal. Triaged false-positive: NewNoOpBudget returning BudgetTracker is assignable to run.Budget (identical method sets) — no change needed. Core go.sum still free of host/DB deps. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
package audit
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// TestOnToolRedactsSecretTools: a secret-bearing tool's args/result must NOT be
|
||||
// persisted verbatim in the audit log.
|
||||
func TestOnToolRedactsSecretTools(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mem := NewMemory()
|
||||
mem.StartRun(ctx, SkillRun{ID: "r1"})
|
||||
w := NewWriter(mem, "r1")
|
||||
|
||||
secret := `{"url":"https://x","headers":{"Authorization":"Bearer SUPERSECRET"}}`
|
||||
w.OnTool(llm.ToolCall{Name: "http_get", ID: "1", Arguments: []byte(secret)}, "TOPSECRETBODY")
|
||||
// a non-secret tool is logged verbatim
|
||||
w.OnTool(llm.ToolCall{Name: "think", ID: "2", Arguments: []byte(`{"thought":"hi"}`)}, "ok")
|
||||
|
||||
logs, _ := mem.ListLogsByRun(ctx, "r1")
|
||||
var dump strings.Builder
|
||||
for _, l := range logs {
|
||||
for k, v := range l.Payload {
|
||||
dump.WriteString(k)
|
||||
dump.WriteString("=")
|
||||
if s, ok := v.(string); ok {
|
||||
dump.WriteString(s)
|
||||
}
|
||||
dump.WriteString(" ")
|
||||
}
|
||||
}
|
||||
all := dump.String()
|
||||
if strings.Contains(all, "SUPERSECRET") || strings.Contains(all, "TOPSECRETBODY") {
|
||||
t.Fatalf("secret leaked into audit log: %s", all)
|
||||
}
|
||||
// the redaction marker is present, and the non-secret tool's args survive
|
||||
foundRedacted, foundThink := false, false
|
||||
for _, l := range logs {
|
||||
if l.EventType == "tool_call" {
|
||||
if r, _ := l.Payload["args_redacted"].(bool); r {
|
||||
foundRedacted = true
|
||||
}
|
||||
if a, _ := l.Payload["args"].(string); strings.Contains(a, "thought") {
|
||||
foundThink = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !foundRedacted {
|
||||
t.Error("secret tool_call should carry args_redacted=true")
|
||||
}
|
||||
if !foundThink {
|
||||
t.Error("non-secret tool args should be logged verbatim")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user