fix: address verified gadfly P5 findings (canary robustness)
executus CI / test (pull_request) Failing after 3s
executus CI / test (push) Failing after 1m9s

All 3 cloud models converged (all "minor" — example code, no blocking):
- Consolidate: a model whose every lens errored now reads "review incomplete",
  not a misleading "no issues found" (all 3 models). + test.
- Consolidate: swarm-cancelled (unattributed) cells now surface a "swarm
  cancelled — N cell(s) did not run" banner instead of vanishing (all 3). + test.
- main: io.ReadAll(os.Stdin) error is surfaced (all 3); a TTY stdin no longer
  hangs forever (TTY guard, minimax).
- providerOf: a bare tier name now keys its own PerKey bucket instead of all
  bare tiers collapsing onto "tier" (minimax, glm-5.2) — distinct tiers throttle
  independently.
- Review doc reworded (the closure, not fanout, carries per-cell errors).

Left as documented example-scope behavior: no per-cell timeout (caller supplies
ctx), unknown-severity → lowest rank (no crash).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-27 00:34:01 -04:00
parent ea9475da54
commit a87e7d2c72
3 changed files with 54 additions and 6 deletions
+12 -3
View File
@@ -43,7 +43,16 @@ func main() {
flag.Parse()
diff := *diffFlag
if strings.TrimSpace(diff) == "" {
b, _ := io.ReadAll(os.Stdin)
// Guard against blocking forever on an interactive TTY (no piped input).
if fi, _ := os.Stdin.Stat(); fi != nil && fi.Mode()&os.ModeCharDevice != 0 {
fmt.Fprintln(os.Stderr, "reviewer: no diff (pass -diff or pipe one on stdin)")
os.Exit(2)
}
b, err := io.ReadAll(os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "reviewer: reading stdin: %v\n", err)
os.Exit(2)
}
diff = string(b)
}
if strings.TrimSpace(diff) == "" {
@@ -80,12 +89,12 @@ func splitCSV(s string) []string {
}
// providerOf returns a model spec's provider (the first path segment, e.g.
// "anthropic/claude-…" → "anthropic"; a bare tier name → "tier").
// "anthropic/claude-…" → "anthropic"; a bare tier name → itself).
func providerOf(spec string) string {
if i := strings.IndexByte(spec, '/'); i > 0 {
return spec[:i]
}
return "tier"
return spec // bare tier name → its own bucket (don't collapse distinct tiers)
}
// perKeyCaps builds the PerKey map: each distinct provider capped at perProvider.
+16 -3
View File
@@ -92,7 +92,7 @@ const baseSystemPrompt = "You are an adversarial code reviewer. Review the diff
// Review runs every (model × lens) cell of the swarm concurrently, bounded by
// opts (total + per-provider caps), and returns one LensResult per cell. A cell
// whose model call fails carries the error in LensResult.Err — one bad cell
// never aborts the swarm (fanout captures per-item errors).
// never aborts the swarm (the closure embeds per-cell errors in LensResult.Err).
func Review(ctx context.Context, models []NamedModel, lenses []Lens, diff string, opts fanout.Options[cell]) []LensResult {
cells := make([]cell, 0, len(models)*len(lenses))
for _, m := range models {
@@ -139,8 +139,12 @@ type cell struct {
func Consolidate(results []LensResult) string {
byModel := map[string][]LensResult{}
var order []string
aborted := 0 // cells dropped before running (swarm cancelled) — no model attribution
for _, r := range results {
if r.Model == "" {
if r.Err != nil {
aborted++
}
continue
}
if _, ok := byModel[r.Model]; !ok {
@@ -151,6 +155,9 @@ func Consolidate(results []LensResult) string {
sort.Strings(order)
var b strings.Builder
if aborted > 0 {
fmt.Fprintf(&b, "> ⚠ swarm cancelled — %d cell(s) did not run; results below are partial.\n\n", aborted)
}
for _, m := range order {
rs := byModel[m]
var all []Finding
@@ -168,10 +175,16 @@ func Consolidate(results []LensResult) string {
}
}
}
// A model whose every lens errored produced NO data — saying "no issues
// found" would be misleading, so it gets its own verdict.
successful := len(rs) - errored
verdict := "no issues found"
if worst >= severityRank(SevHigh) {
switch {
case successful == 0 && errored > 0:
verdict = "review incomplete"
case worst >= severityRank(SevHigh):
verdict = "blocking issues found"
} else if worst >= 0 {
case worst >= 0:
verdict = "minor issues"
}
fmt.Fprintf(&b, "## %s — %s", m, verdict)
+26
View File
@@ -100,3 +100,29 @@ func TestConsolidateVerdicts(t *testing.T) {
t.Errorf("critical + errored lens header wrong:\n%s", got)
}
}
// TestConsolidateAllErrored: a model whose every lens errored must NOT be
// labelled "no issues found" (the gadfly P5 finding).
func TestConsolidateAllErrored(t *testing.T) {
got := Consolidate([]LensResult{
{Model: "m", Lens: "a", Err: context.DeadlineExceeded},
{Model: "m", Lens: "b", Err: context.DeadlineExceeded},
})
if !strings.Contains(got, "m — review incomplete") {
t.Errorf("all-errored model should be 'review incomplete', got:\n%s", got)
}
if strings.Contains(got, "no issues found") {
t.Errorf("all-errored model must not say 'no issues found':\n%s", got)
}
}
// TestConsolidateSwarmCancelled: dropped (unattributed) cells surface a banner.
func TestConsolidateSwarmCancelled(t *testing.T) {
got := Consolidate([]LensResult{
{Err: context.Canceled}, // dropped cell, no model
{Model: "m", Lens: "a", Findings: []Finding{{Severity: SevSmall, Title: "x"}}},
})
if !strings.Contains(got, "swarm cancelled") {
t.Errorf("dropped cells should surface a cancellation banner:\n%s", got)
}
}