fix: address verified gadfly P2 findings (9 real of 18)
Independently verified all 18 gadfly findings against the code (18-agent
fan-out). Fixed the 9 real ones; the other 9 were false-positive /
hallucinated / valid-tradeoff (no change).
High:
- F1 nil model: a Models resolver returning (ctx,nil,nil) flowed into the
agent loop and nil-panicked. Now a clean error (Run never panics). +test.
- F9 compactor data-leak: renderTranscript sent tool-call args verbatim to
the summarizer (a possibly-different provider/tier); secret-bearing tool
args (mcp_call/email_send/http_*/webhook_*) are now redacted, with a doc
note that result bodies still flow (summary needs them).
Medium/minor:
- F2 compactor error path returned the folded slice, not the original msgs
(contradicting the documented non-fatal contract) -> return msgs.
- F3 RunStats.Status only ok/error; now timeout (DeadlineExceeded) /
cancelled (Canceled) via statusFor. +test.
- F4 step-zip emitted empty-name "ghost" steps when results>calls; now pairs
min(calls,results) only.
- F5 SetIteration was never called -> RunState.Iteration always 0; the step
observer now updates it each loop.
- F6 matchPending fallback was LIFO; now FIFO (matches the per-key queue).
- F7 estimateTokens had no default arm (future Part kinds counted as 0);
unknown parts now counted conservatively.
- F8 cloud_sync silently truncated >1MiB responses -> opaque JSON error; now
a clear "response exceeded N bytes" via readCapped.
- F12 step observer captured the caller ctx; now the merged runCtx.
- F13 compaction onFire was nil (doc claimed it logged); now wired to
audit LogEvent("compaction_fired").
- F11 (no pre-dispatch hook in majordomo) documented honestly as a known
limitation; F18 UsageSink doc clarified cache tokens are subsets of input.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+68
-24
@@ -2,6 +2,7 @@ package run
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
@@ -130,11 +131,18 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
res.Err = fmt.Errorf("resolve model %q: %w", tier, err)
|
||||
return res
|
||||
}
|
||||
if model == nil {
|
||||
// A resolver returning (ctx, nil, nil) would otherwise nil-panic inside
|
||||
// the agent loop; surface it as a clean error (Run never panics out).
|
||||
res.Err = fmt.Errorf("resolve model %q: resolver returned a nil model", tier)
|
||||
return res
|
||||
}
|
||||
ctx = modelCtx
|
||||
|
||||
// Audit start (optional). The recorder satisfies RunTally; stamp it on the
|
||||
// invocation so a self-status tool can read live progress.
|
||||
var rec RunRecorder
|
||||
var stateAcc *RunStateAccessor
|
||||
if e.cfg.Ports.Audit != nil {
|
||||
rec = e.cfg.Ports.Audit.StartRun(ctx, RunInfo{
|
||||
RunID: inv.RunID,
|
||||
@@ -148,7 +156,8 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
})
|
||||
}
|
||||
if rec != nil {
|
||||
inv.RunState = NewRunStateAccessor(rec, maxIter, 0, started)
|
||||
stateAcc = NewRunStateAccessor(rec, maxIter, 0, started)
|
||||
inv.RunState = stateAcc
|
||||
}
|
||||
|
||||
// Build the toolbox from the agent's low-level tools.
|
||||
@@ -159,11 +168,27 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
return res
|
||||
}
|
||||
|
||||
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, and feed
|
||||
// the audit recorder. majordomo's step observer hands us each completed
|
||||
// iteration; we zip the model's tool calls with their executed results.
|
||||
// Run context: bound by MaxRuntime, detached from the caller's deadline so a
|
||||
// lane/queue wait doesn't eat the run budget (mort's V10 lesson). Caller
|
||||
// cancellation still propagates via MergeCancellation. Created BEFORE the
|
||||
// step observer so the observer forwards the merged run context (not a
|
||||
// possibly-cancelled caller ctx) to OnStep consumers.
|
||||
runCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime)
|
||||
defer cancel()
|
||||
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
|
||||
defer mergeCancel()
|
||||
|
||||
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
|
||||
// audit recorder, and keep the live iteration counter fresh. majordomo's
|
||||
// step observer hands us each completed iteration; we zip the model's tool
|
||||
// calls with their executed results PAIRWISE — a result without a matching
|
||||
// call (or a call without a result) is skipped rather than recorded as an
|
||||
// empty-name "ghost" step.
|
||||
emitter := newStepEmitter(inv.OnStep)
|
||||
stepObserver := func(s agent.Step) {
|
||||
if stateAcc != nil {
|
||||
stateAcc.SetIteration(s.Index)
|
||||
}
|
||||
if rec != nil {
|
||||
rec.OnStep(s.Index, s.Response)
|
||||
}
|
||||
@@ -171,27 +196,20 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
if s.Response != nil {
|
||||
calls = s.Response.ToolCalls
|
||||
}
|
||||
for i, r := range s.Results {
|
||||
var call llm.ToolCall
|
||||
if i < len(calls) {
|
||||
call = calls[i]
|
||||
}
|
||||
emitter.toolStart(ctx, call.Name, call.Arguments)
|
||||
emitter.toolEnd(ctx, call, r.Content, r.IsError)
|
||||
n := len(s.Results)
|
||||
if len(calls) < n {
|
||||
n = len(calls)
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
call, r := calls[i], s.Results[i]
|
||||
emitter.toolStart(runCtx, call.Name, call.Arguments)
|
||||
emitter.toolEnd(runCtx, call, r.Content, r.IsError)
|
||||
if rec != nil {
|
||||
rec.OnTool(call, r.Content)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run context: bound by MaxRuntime, detached from the caller's deadline so a
|
||||
// lane/queue wait doesn't eat the run budget (mort's V10 lesson). Caller
|
||||
// cancellation still propagates via MergeCancellation.
|
||||
runCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), maxRuntime)
|
||||
defer cancel()
|
||||
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
|
||||
defer mergeCancel()
|
||||
|
||||
opts := []agent.Option{
|
||||
agent.WithToolbox(toolbox),
|
||||
agent.WithMaxSteps(maxIter),
|
||||
@@ -200,17 +218,27 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
}
|
||||
if e.cfg.Compactor != nil && e.cfg.ContextTokens != nil {
|
||||
if threshold := e.compactionThreshold(tier); threshold > 0 {
|
||||
opts = append(opts, agent.WithCompactor(e.cfg.Compactor(threshold, nil)))
|
||||
// Forward compaction events to the audit log (makes the
|
||||
// CompactionEvent doc's "logged to the run trace" promise true).
|
||||
var onFire func(compact.CompactionEvent)
|
||||
if rec != nil {
|
||||
onFire = func(ev compact.CompactionEvent) {
|
||||
rec.LogEvent("compaction_fired", map[string]any{
|
||||
"messages_before": ev.MessagesBefore,
|
||||
"messages_after": ev.MessagesAfter,
|
||||
"tokens_before": ev.TokensBefore,
|
||||
"tokens_after": ev.TokensAfter,
|
||||
})
|
||||
}
|
||||
}
|
||||
opts = append(opts, agent.WithCompactor(e.cfg.Compactor(threshold, onFire)))
|
||||
}
|
||||
}
|
||||
|
||||
ag := agent.New(model, e.systemPrompt(ra), opts...)
|
||||
runRes, runErr := ag.Run(runCtx, input)
|
||||
|
||||
status := "ok"
|
||||
if runErr != nil {
|
||||
status = "error"
|
||||
}
|
||||
status := statusFor(runErr)
|
||||
if runRes != nil {
|
||||
res.Output = runRes.Output
|
||||
res.Usage = runRes.Usage
|
||||
@@ -225,6 +253,22 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
|
||||
return res
|
||||
}
|
||||
|
||||
// statusFor maps a run error to a RunStats.Status, distinguishing a deadline
|
||||
// (timeout) and a cancellation (cancelled — caller cancel or shutdown) from a
|
||||
// generic error so audit consumers can tell them apart.
|
||||
func statusFor(runErr error) string {
|
||||
switch {
|
||||
case runErr == nil:
|
||||
return "ok"
|
||||
case errors.Is(runErr, context.DeadlineExceeded):
|
||||
return "timeout"
|
||||
case errors.Is(runErr, context.Canceled):
|
||||
return "cancelled"
|
||||
default:
|
||||
return "error"
|
||||
}
|
||||
}
|
||||
|
||||
// finishAudit writes the terminal roll-up on a detached context so a cancelled
|
||||
// run still records (mort's CleanupContextTimeout lesson).
|
||||
func (e *Executor) finishAudit(ctx context.Context, rec RunRecorder, status string, res Result, started time.Time, runErr error) {
|
||||
|
||||
Reference in New Issue
Block a user