fix: address verified gadfly P2 findings (9 real of 18)
Independently verified all 18 gadfly findings against the code (18-agent
fan-out). Fixed the 9 real ones; the other 9 were false-positive /
hallucinated / valid-tradeoff (no change).
High:
- F1 nil model: a Models resolver returning (ctx,nil,nil) flowed into the
agent loop and nil-panicked. Now a clean error (Run never panics). +test.
- F9 compactor data-leak: renderTranscript sent tool-call args verbatim to
the summarizer (a possibly-different provider/tier); secret-bearing tool
args (mcp_call/email_send/http_*/webhook_*) are now redacted, with a doc
note that result bodies still flow (summary needs them).
Medium/minor:
- F2 compactor error path returned the folded slice, not the original msgs
(contradicting the documented non-fatal contract) -> return msgs.
- F3 RunStats.Status only ok/error; now timeout (DeadlineExceeded) /
cancelled (Canceled) via statusFor. +test.
- F4 step-zip emitted empty-name "ghost" steps when results>calls; now pairs
min(calls,results) only.
- F5 SetIteration was never called -> RunState.Iteration always 0; the step
observer now updates it each loop.
- F6 matchPending fallback was LIFO; now FIFO (matches the per-key queue).
- F7 estimateTokens had no default arm (future Part kinds counted as 0);
unknown parts now counted conservatively.
- F8 cloud_sync silently truncated >1MiB responses -> opaque JSON error; now
a clear "response exceeded N bytes" via readCapped.
- F12 step observer captured the caller ctx; now the merged runCtx.
- F13 compaction onFire was nil (doc claimed it logged); now wired to
audit LogEvent("compaction_fired").
- F11 (no pre-dispatch hook in majordomo) documented honestly as a known
limitation; F18 UsageSink doc clarified cache tokens are subsets of input.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+42
-4
@@ -180,8 +180,11 @@ func compactIfNeeded(ctx context.Context, cfg CompactorConfig, st *compactionSta
|
||||
|
||||
summary, err := summariseMiddle(ctx, cfg, st.summaryText, middle)
|
||||
if err != nil {
|
||||
// Non-fatal upstream: the agent loop sends the original slice.
|
||||
return rendered, fmt.Errorf("compactor: summarise middle: %w", err)
|
||||
// Non-fatal upstream: the agent loop sends the ORIGINAL slice. Return
|
||||
// msgs, not `rendered` — on a second+ compaction `rendered` already
|
||||
// carries a prior synthetic summary, which is not the documented
|
||||
// "original slice" the loop expects on a compactor error.
|
||||
return msgs, fmt.Errorf("compactor: summarise middle: %w", err)
|
||||
}
|
||||
st.summaryText = summary
|
||||
st.prefixEnd = endMiddle
|
||||
@@ -285,6 +288,14 @@ func estimateTokens(msgs []llm.Message) int {
|
||||
chars += len(v.Text)
|
||||
case llm.ImagePart:
|
||||
chars += 4096
|
||||
default:
|
||||
// llm.Part is a sealed-but-extensible interface (future media
|
||||
// kinds). Count an unknown part conservatively (like an image)
|
||||
// rather than 0, so a transcript of unrecognised content can't
|
||||
// silently slip under the compaction threshold and 400 the
|
||||
// model. Bump this if a large new part kind lands.
|
||||
_ = v
|
||||
chars += 4096
|
||||
}
|
||||
}
|
||||
for _, tc := range m.ToolCalls {
|
||||
@@ -302,9 +313,36 @@ func estimateTokens(msgs []llm.Message) int {
|
||||
// summarizer.
|
||||
const transcriptMessageCap = 2048
|
||||
|
||||
// secretBearingTools name tools whose ARGUMENTS routinely carry credentials or
|
||||
// message bodies (bearer tokens, API keys, recipients, request bodies). Their
|
||||
// args are dropped before the transcript reaches the summarizer model — which
|
||||
// may be a different provider/tier than the run model — mirroring the redaction
|
||||
// run/steps.go applies to user-facing step summaries. http_* and webhook_* are
|
||||
// matched by prefix below.
|
||||
var secretBearingTools = map[string]bool{
|
||||
"mcp_call": true,
|
||||
"email_send": true,
|
||||
}
|
||||
|
||||
// redactToolArgs returns a summariser-safe rendering of a tool call's args:
|
||||
// "[redacted]" for known secret-bearing tools, the args verbatim otherwise.
|
||||
func redactToolArgs(name, args string) string {
|
||||
if secretBearingTools[name] ||
|
||||
strings.HasPrefix(name, "http_") ||
|
||||
strings.HasPrefix(name, "webhook_") {
|
||||
return "[redacted]"
|
||||
}
|
||||
return args
|
||||
}
|
||||
|
||||
// renderTranscript flattens a message slice to a plain-text transcript
|
||||
// suitable for the summarisation prompt. Tool calls show name + args,
|
||||
// suitable for the summarisation prompt. Tool calls show name + (redacted) args,
|
||||
// tool results show name + body. Empty fields are skipped.
|
||||
//
|
||||
// NOTE: tool-RESULT bodies are forwarded to the summarizer (the summary needs
|
||||
// the findings). A host whose tool results may contain secrets and whose
|
||||
// summarizer tier resolves to an untrusted provider should ensure that tier is
|
||||
// trusted, or pre-sanitise results before they reach the agent loop.
|
||||
func renderTranscript(msgs []llm.Message) string {
|
||||
var sb strings.Builder
|
||||
for i, m := range msgs {
|
||||
@@ -314,7 +352,7 @@ func renderTranscript(msgs []llm.Message) string {
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
for _, tc := range m.ToolCalls {
|
||||
fmt.Fprintf(&sb, "tool_call name=%s args=%s\n", tc.Name, truncate(string(tc.Arguments), transcriptMessageCap))
|
||||
fmt.Fprintf(&sb, "tool_call name=%s args=%s\n", tc.Name, truncate(redactToolArgs(tc.Name, string(tc.Arguments)), transcriptMessageCap))
|
||||
}
|
||||
for _, tr := range m.ToolResults {
|
||||
fmt.Fprintf(&sb, "tool_result name=%s body=%s\n", tr.Name, truncate(tr.Content, transcriptMessageCap))
|
||||
|
||||
Reference in New Issue
Block a user