fix: address verified gadfly P5 findings (canary robustness)
All 3 cloud models converged (all "minor" — example code, no blocking): - Consolidate: a model whose every lens errored now reads "review incomplete", not a misleading "no issues found" (all 3 models). + test. - Consolidate: swarm-cancelled (unattributed) cells now surface a "swarm cancelled — N cell(s) did not run" banner instead of vanishing (all 3). + test. - main: io.ReadAll(os.Stdin) error is surfaced (all 3); a TTY stdin no longer hangs forever (TTY guard, minimax). - providerOf: a bare tier name now keys its own PerKey bucket instead of all bare tiers collapsing onto "tier" (minimax, glm-5.2) — distinct tiers throttle independently. - Review doc reworded (the closure, not fanout, carries per-cell errors). Left as documented example-scope behavior: no per-cell timeout (caller supplies ctx), unknown-severity → lowest rank (no crash). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -43,7 +43,16 @@ func main() {
|
||||
flag.Parse()
|
||||
diff := *diffFlag
|
||||
if strings.TrimSpace(diff) == "" {
|
||||
b, _ := io.ReadAll(os.Stdin)
|
||||
// Guard against blocking forever on an interactive TTY (no piped input).
|
||||
if fi, _ := os.Stdin.Stat(); fi != nil && fi.Mode()&os.ModeCharDevice != 0 {
|
||||
fmt.Fprintln(os.Stderr, "reviewer: no diff (pass -diff or pipe one on stdin)")
|
||||
os.Exit(2)
|
||||
}
|
||||
b, err := io.ReadAll(os.Stdin)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "reviewer: reading stdin: %v\n", err)
|
||||
os.Exit(2)
|
||||
}
|
||||
diff = string(b)
|
||||
}
|
||||
if strings.TrimSpace(diff) == "" {
|
||||
@@ -80,12 +89,12 @@ func splitCSV(s string) []string {
|
||||
}
|
||||
|
||||
// providerOf returns a model spec's provider (the first path segment, e.g.
|
||||
// "anthropic/claude-…" → "anthropic"; a bare tier name → "tier").
|
||||
// "anthropic/claude-…" → "anthropic"; a bare tier name → itself).
|
||||
func providerOf(spec string) string {
|
||||
if i := strings.IndexByte(spec, '/'); i > 0 {
|
||||
return spec[:i]
|
||||
}
|
||||
return "tier"
|
||||
return spec // bare tier name → its own bucket (don't collapse distinct tiers)
|
||||
}
|
||||
|
||||
// perKeyCaps builds the PerKey map: each distinct provider capped at perProvider.
|
||||
|
||||
@@ -92,7 +92,7 @@ const baseSystemPrompt = "You are an adversarial code reviewer. Review the diff
|
||||
// Review runs every (model × lens) cell of the swarm concurrently, bounded by
|
||||
// opts (total + per-provider caps), and returns one LensResult per cell. A cell
|
||||
// whose model call fails carries the error in LensResult.Err — one bad cell
|
||||
// never aborts the swarm (fanout captures per-item errors).
|
||||
// never aborts the swarm (the closure embeds per-cell errors in LensResult.Err).
|
||||
func Review(ctx context.Context, models []NamedModel, lenses []Lens, diff string, opts fanout.Options[cell]) []LensResult {
|
||||
cells := make([]cell, 0, len(models)*len(lenses))
|
||||
for _, m := range models {
|
||||
@@ -139,8 +139,12 @@ type cell struct {
|
||||
func Consolidate(results []LensResult) string {
|
||||
byModel := map[string][]LensResult{}
|
||||
var order []string
|
||||
aborted := 0 // cells dropped before running (swarm cancelled) — no model attribution
|
||||
for _, r := range results {
|
||||
if r.Model == "" {
|
||||
if r.Err != nil {
|
||||
aborted++
|
||||
}
|
||||
continue
|
||||
}
|
||||
if _, ok := byModel[r.Model]; !ok {
|
||||
@@ -151,6 +155,9 @@ func Consolidate(results []LensResult) string {
|
||||
sort.Strings(order)
|
||||
|
||||
var b strings.Builder
|
||||
if aborted > 0 {
|
||||
fmt.Fprintf(&b, "> ⚠ swarm cancelled — %d cell(s) did not run; results below are partial.\n\n", aborted)
|
||||
}
|
||||
for _, m := range order {
|
||||
rs := byModel[m]
|
||||
var all []Finding
|
||||
@@ -168,10 +175,16 @@ func Consolidate(results []LensResult) string {
|
||||
}
|
||||
}
|
||||
}
|
||||
// A model whose every lens errored produced NO data — saying "no issues
|
||||
// found" would be misleading, so it gets its own verdict.
|
||||
successful := len(rs) - errored
|
||||
verdict := "no issues found"
|
||||
if worst >= severityRank(SevHigh) {
|
||||
switch {
|
||||
case successful == 0 && errored > 0:
|
||||
verdict = "review incomplete"
|
||||
case worst >= severityRank(SevHigh):
|
||||
verdict = "blocking issues found"
|
||||
} else if worst >= 0 {
|
||||
case worst >= 0:
|
||||
verdict = "minor issues"
|
||||
}
|
||||
fmt.Fprintf(&b, "## %s — %s", m, verdict)
|
||||
|
||||
@@ -100,3 +100,29 @@ func TestConsolidateVerdicts(t *testing.T) {
|
||||
t.Errorf("critical + errored lens header wrong:\n%s", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestConsolidateAllErrored: a model whose every lens errored must NOT be
|
||||
// labelled "no issues found" (the gadfly P5 finding).
|
||||
func TestConsolidateAllErrored(t *testing.T) {
|
||||
got := Consolidate([]LensResult{
|
||||
{Model: "m", Lens: "a", Err: context.DeadlineExceeded},
|
||||
{Model: "m", Lens: "b", Err: context.DeadlineExceeded},
|
||||
})
|
||||
if !strings.Contains(got, "m — review incomplete") {
|
||||
t.Errorf("all-errored model should be 'review incomplete', got:\n%s", got)
|
||||
}
|
||||
if strings.Contains(got, "no issues found") {
|
||||
t.Errorf("all-errored model must not say 'no issues found':\n%s", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestConsolidateSwarmCancelled: dropped (unattributed) cells surface a banner.
|
||||
func TestConsolidateSwarmCancelled(t *testing.T) {
|
||||
got := Consolidate([]LensResult{
|
||||
{Err: context.Canceled}, // dropped cell, no model
|
||||
{Model: "m", Lens: "a", Findings: []Finding{{Severity: SevSmall, Title: "x"}}},
|
||||
})
|
||||
if !strings.Contains(got, "swarm cancelled") {
|
||||
t.Errorf("dropped cells should surface a cancellation banner:\n%s", got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user