fix: address verified gadfly P5 findings (canary robustness)
All 3 cloud models converged (all "minor" — example code, no blocking): - Consolidate: a model whose every lens errored now reads "review incomplete", not a misleading "no issues found" (all 3 models). + test. - Consolidate: swarm-cancelled (unattributed) cells now surface a "swarm cancelled — N cell(s) did not run" banner instead of vanishing (all 3). + test. - main: io.ReadAll(os.Stdin) error is surfaced (all 3); a TTY stdin no longer hangs forever (TTY guard, minimax). - providerOf: a bare tier name now keys its own PerKey bucket instead of all bare tiers collapsing onto "tier" (minimax, glm-5.2) — distinct tiers throttle independently. - Review doc reworded (the closure, not fanout, carries per-cell errors). Left as documented example-scope behavior: no per-cell timeout (caller supplies ctx), unknown-severity → lowest rank (no crash). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -43,7 +43,16 @@ func main() {
|
|||||||
flag.Parse()
|
flag.Parse()
|
||||||
diff := *diffFlag
|
diff := *diffFlag
|
||||||
if strings.TrimSpace(diff) == "" {
|
if strings.TrimSpace(diff) == "" {
|
||||||
b, _ := io.ReadAll(os.Stdin)
|
// Guard against blocking forever on an interactive TTY (no piped input).
|
||||||
|
if fi, _ := os.Stdin.Stat(); fi != nil && fi.Mode()&os.ModeCharDevice != 0 {
|
||||||
|
fmt.Fprintln(os.Stderr, "reviewer: no diff (pass -diff or pipe one on stdin)")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
b, err := io.ReadAll(os.Stdin)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "reviewer: reading stdin: %v\n", err)
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
diff = string(b)
|
diff = string(b)
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(diff) == "" {
|
if strings.TrimSpace(diff) == "" {
|
||||||
@@ -80,12 +89,12 @@ func splitCSV(s string) []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// providerOf returns a model spec's provider (the first path segment, e.g.
|
// providerOf returns a model spec's provider (the first path segment, e.g.
|
||||||
// "anthropic/claude-…" → "anthropic"; a bare tier name → "tier").
|
// "anthropic/claude-…" → "anthropic"; a bare tier name → itself).
|
||||||
func providerOf(spec string) string {
|
func providerOf(spec string) string {
|
||||||
if i := strings.IndexByte(spec, '/'); i > 0 {
|
if i := strings.IndexByte(spec, '/'); i > 0 {
|
||||||
return spec[:i]
|
return spec[:i]
|
||||||
}
|
}
|
||||||
return "tier"
|
return spec // bare tier name → its own bucket (don't collapse distinct tiers)
|
||||||
}
|
}
|
||||||
|
|
||||||
// perKeyCaps builds the PerKey map: each distinct provider capped at perProvider.
|
// perKeyCaps builds the PerKey map: each distinct provider capped at perProvider.
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ const baseSystemPrompt = "You are an adversarial code reviewer. Review the diff
|
|||||||
// Review runs every (model × lens) cell of the swarm concurrently, bounded by
|
// Review runs every (model × lens) cell of the swarm concurrently, bounded by
|
||||||
// opts (total + per-provider caps), and returns one LensResult per cell. A cell
|
// opts (total + per-provider caps), and returns one LensResult per cell. A cell
|
||||||
// whose model call fails carries the error in LensResult.Err — one bad cell
|
// whose model call fails carries the error in LensResult.Err — one bad cell
|
||||||
// never aborts the swarm (fanout captures per-item errors).
|
// never aborts the swarm (the closure embeds per-cell errors in LensResult.Err).
|
||||||
func Review(ctx context.Context, models []NamedModel, lenses []Lens, diff string, opts fanout.Options[cell]) []LensResult {
|
func Review(ctx context.Context, models []NamedModel, lenses []Lens, diff string, opts fanout.Options[cell]) []LensResult {
|
||||||
cells := make([]cell, 0, len(models)*len(lenses))
|
cells := make([]cell, 0, len(models)*len(lenses))
|
||||||
for _, m := range models {
|
for _, m := range models {
|
||||||
@@ -139,8 +139,12 @@ type cell struct {
|
|||||||
func Consolidate(results []LensResult) string {
|
func Consolidate(results []LensResult) string {
|
||||||
byModel := map[string][]LensResult{}
|
byModel := map[string][]LensResult{}
|
||||||
var order []string
|
var order []string
|
||||||
|
aborted := 0 // cells dropped before running (swarm cancelled) — no model attribution
|
||||||
for _, r := range results {
|
for _, r := range results {
|
||||||
if r.Model == "" {
|
if r.Model == "" {
|
||||||
|
if r.Err != nil {
|
||||||
|
aborted++
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if _, ok := byModel[r.Model]; !ok {
|
if _, ok := byModel[r.Model]; !ok {
|
||||||
@@ -151,6 +155,9 @@ func Consolidate(results []LensResult) string {
|
|||||||
sort.Strings(order)
|
sort.Strings(order)
|
||||||
|
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
|
if aborted > 0 {
|
||||||
|
fmt.Fprintf(&b, "> ⚠ swarm cancelled — %d cell(s) did not run; results below are partial.\n\n", aborted)
|
||||||
|
}
|
||||||
for _, m := range order {
|
for _, m := range order {
|
||||||
rs := byModel[m]
|
rs := byModel[m]
|
||||||
var all []Finding
|
var all []Finding
|
||||||
@@ -168,10 +175,16 @@ func Consolidate(results []LensResult) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// A model whose every lens errored produced NO data — saying "no issues
|
||||||
|
// found" would be misleading, so it gets its own verdict.
|
||||||
|
successful := len(rs) - errored
|
||||||
verdict := "no issues found"
|
verdict := "no issues found"
|
||||||
if worst >= severityRank(SevHigh) {
|
switch {
|
||||||
|
case successful == 0 && errored > 0:
|
||||||
|
verdict = "review incomplete"
|
||||||
|
case worst >= severityRank(SevHigh):
|
||||||
verdict = "blocking issues found"
|
verdict = "blocking issues found"
|
||||||
} else if worst >= 0 {
|
case worst >= 0:
|
||||||
verdict = "minor issues"
|
verdict = "minor issues"
|
||||||
}
|
}
|
||||||
fmt.Fprintf(&b, "## %s — %s", m, verdict)
|
fmt.Fprintf(&b, "## %s — %s", m, verdict)
|
||||||
|
|||||||
@@ -100,3 +100,29 @@ func TestConsolidateVerdicts(t *testing.T) {
|
|||||||
t.Errorf("critical + errored lens header wrong:\n%s", got)
|
t.Errorf("critical + errored lens header wrong:\n%s", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestConsolidateAllErrored: a model whose every lens errored must NOT be
|
||||||
|
// labelled "no issues found" (the gadfly P5 finding).
|
||||||
|
func TestConsolidateAllErrored(t *testing.T) {
|
||||||
|
got := Consolidate([]LensResult{
|
||||||
|
{Model: "m", Lens: "a", Err: context.DeadlineExceeded},
|
||||||
|
{Model: "m", Lens: "b", Err: context.DeadlineExceeded},
|
||||||
|
})
|
||||||
|
if !strings.Contains(got, "m — review incomplete") {
|
||||||
|
t.Errorf("all-errored model should be 'review incomplete', got:\n%s", got)
|
||||||
|
}
|
||||||
|
if strings.Contains(got, "no issues found") {
|
||||||
|
t.Errorf("all-errored model must not say 'no issues found':\n%s", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestConsolidateSwarmCancelled: dropped (unattributed) cells surface a banner.
|
||||||
|
func TestConsolidateSwarmCancelled(t *testing.T) {
|
||||||
|
got := Consolidate([]LensResult{
|
||||||
|
{Err: context.Canceled}, // dropped cell, no model
|
||||||
|
{Model: "m", Lens: "a", Findings: []Finding{{Severity: SevSmall, Title: "x"}}},
|
||||||
|
})
|
||||||
|
if !strings.Contains(got, "swarm cancelled") {
|
||||||
|
t.Errorf("dropped cells should surface a cancellation banner:\n%s", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user