d82cef46b4
executus CI / test (push) Failing after 1m4s
Security (all 3 models — HIGH): audit OnTool persisted raw tool args + results verbatim for the very tools the OnStep narration-redaction flags as secret (mcp_call/email_send/http_*) — the args/results are what CARRY the secret, so they landed in skill_run_logs unredacted. Factored the predicate into isSecretTool() (single source of truth) and OnTool now emits args_redacted/result_redacted (+ lengths) for secret tools. Test asserts no secret reaches the log. (persona) webhook_ip_allowlist entries are now CIDR/IP-validated at load (malformed dropped + warned) instead of accepted raw. Contract correctness (glm-5.2 + deepseek) — audit Memory now honors its documented Storage contract: ListChildrenByParent/ListFinishedRunsBefore return oldest-first; WalkParentChain returns root-first and honors MaxParentChainDepth; ListRunsFiltered clamps limit (<=0 or >500 -> 50); ListFinishedRunsBefore with limit<=0 returns none; an explicit RunFilter.Status (incl. "dry_run") matches regardless of IncludeDryRun; LastRunBySkills counts only status=="ok" unless includeFailed. (PurgeOlderThan's FinishedAt key is the SAFE behavior — in-flight runs retained — so the doc was aligned to it, not the impl.) Error-handling: appendLog now uses a bounded context (auditAppendTimeout=3s) so a hung backend can't block the run goroutine on the hot path; Sink.StartRun logs its (still best-effort) failure instead of swallowing it; budget Memory.Get uses RLock (RWMutex); budget package doc fixed (was skillexec's); Check uses the budgetWindow constant, not a duplicated literal. Triaged false-positive: NewNoOpBudget returning BudgetTracker is assignable to run.Budget (identical method sets) — no change needed. Core go.sum still free of host/DB deps. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
59 lines
1.6 KiB
Go
59 lines
1.6 KiB
Go
package audit
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// TestOnToolRedactsSecretTools: a secret-bearing tool's args/result must NOT be
|
|
// persisted verbatim in the audit log.
|
|
func TestOnToolRedactsSecretTools(t *testing.T) {
|
|
ctx := context.Background()
|
|
mem := NewMemory()
|
|
mem.StartRun(ctx, SkillRun{ID: "r1"})
|
|
w := NewWriter(mem, "r1")
|
|
|
|
secret := `{"url":"https://x","headers":{"Authorization":"Bearer SUPERSECRET"}}`
|
|
w.OnTool(llm.ToolCall{Name: "http_get", ID: "1", Arguments: []byte(secret)}, "TOPSECRETBODY")
|
|
// a non-secret tool is logged verbatim
|
|
w.OnTool(llm.ToolCall{Name: "think", ID: "2", Arguments: []byte(`{"thought":"hi"}`)}, "ok")
|
|
|
|
logs, _ := mem.ListLogsByRun(ctx, "r1")
|
|
var dump strings.Builder
|
|
for _, l := range logs {
|
|
for k, v := range l.Payload {
|
|
dump.WriteString(k)
|
|
dump.WriteString("=")
|
|
if s, ok := v.(string); ok {
|
|
dump.WriteString(s)
|
|
}
|
|
dump.WriteString(" ")
|
|
}
|
|
}
|
|
all := dump.String()
|
|
if strings.Contains(all, "SUPERSECRET") || strings.Contains(all, "TOPSECRETBODY") {
|
|
t.Fatalf("secret leaked into audit log: %s", all)
|
|
}
|
|
// the redaction marker is present, and the non-secret tool's args survive
|
|
foundRedacted, foundThink := false, false
|
|
for _, l := range logs {
|
|
if l.EventType == "tool_call" {
|
|
if r, _ := l.Payload["args_redacted"].(bool); r {
|
|
foundRedacted = true
|
|
}
|
|
if a, _ := l.Payload["args"].(string); strings.Contains(a, "thought") {
|
|
foundThink = true
|
|
}
|
|
}
|
|
}
|
|
if !foundRedacted {
|
|
t.Error("secret tool_call should carry args_redacted=true")
|
|
}
|
|
if !foundThink {
|
|
t.Error("non-secret tool args should be logged verbatim")
|
|
}
|
|
}
|