fix: address verified gadfly P2 findings (9 real of 18)
Independently verified all 18 gadfly findings against the code (18-agent
fan-out). Fixed the 9 real ones; the other 9 were false-positive /
hallucinated / valid-tradeoff (no change).
High:
- F1 nil model: a Models resolver returning (ctx,nil,nil) flowed into the
agent loop and nil-panicked. Now a clean error (Run never panics). +test.
- F9 compactor data-leak: renderTranscript sent tool-call args verbatim to
the summarizer (a possibly-different provider/tier); secret-bearing tool
args (mcp_call/email_send/http_*/webhook_*) are now redacted, with a doc
note that result bodies still flow (summary needs them).
Medium/minor:
- F2 compactor error path returned the folded slice, not the original msgs
(contradicting the documented non-fatal contract) -> return msgs.
- F3 RunStats.Status only ok/error; now timeout (DeadlineExceeded) /
cancelled (Canceled) via statusFor. +test.
- F4 step-zip emitted empty-name "ghost" steps when results>calls; now pairs
min(calls,results) only.
- F5 SetIteration was never called -> RunState.Iteration always 0; the step
observer now updates it each loop.
- F6 matchPending fallback was LIFO; now FIFO (matches the per-key queue).
- F7 estimateTokens had no default arm (future Part kinds counted as 0);
unknown parts now counted conservatively.
- F8 cloud_sync silently truncated >1MiB responses -> opaque JSON error; now
a clear "response exceeded N bytes" via readCapped.
- F12 step observer captured the caller ctx; now the merged runCtx.
- F13 compaction onFire was nil (doc claimed it logged); now wired to
audit LogEvent("compaction_fired").
- F11 (no pre-dispatch hook in majordomo) documented honestly as a known
limitation; F18 UsageSink doc clarified cache tokens are subsets of input.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ package run
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
@@ -130,3 +131,38 @@ func (r *captureRecorder) OnTool(llm.ToolCall, string) { r.tools++ }
|
||||
func (r *captureRecorder) LogEvent(string, map[string]any) {}
|
||||
func (r *captureRecorder) LogError(string) {}
|
||||
func (r *captureRecorder) Close(_ context.Context, s RunStats) { r.closed = true; r.stats = s }
|
||||
|
||||
// TestExecutorNilModelNoPanic: a resolver returning (ctx, nil, nil) yields a
|
||||
// clean error, not a nil-pointer panic (gadfly F1, high severity).
|
||||
func TestExecutorNilModelNoPanic(t *testing.T) {
|
||||
ex := New(Config{
|
||||
Registry: tool.NewRegistry(),
|
||||
Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
||||
return ctx, nil, nil // nil model, nil error
|
||||
},
|
||||
})
|
||||
res := ex.Run(context.Background(),
|
||||
RunnableAgent{ModelTier: "x"}, tool.Invocation{RunID: "r"}, "hi")
|
||||
if res.Err == nil {
|
||||
t.Fatal("expected an error for a nil model, got nil (would have panicked in the loop)")
|
||||
}
|
||||
}
|
||||
|
||||
// TestStatusFor maps run errors to RunStats.Status (gadfly F3).
|
||||
func TestStatusFor(t *testing.T) {
|
||||
cases := []struct {
|
||||
err error
|
||||
want string
|
||||
}{
|
||||
{nil, "ok"},
|
||||
{context.DeadlineExceeded, "timeout"},
|
||||
{context.Canceled, "cancelled"},
|
||||
{fmt.Errorf("wrapped: %w", context.DeadlineExceeded), "timeout"},
|
||||
{errors.New("boom"), "error"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := statusFor(c.err); got != c.want {
|
||||
t.Errorf("statusFor(%v) = %q, want %q", c.err, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user