Files
gadfly/cmd/gadfly/consensus_test.go
T
steve 7577c21039
Build & push image / build-and-push (pull_request) Successful in 7s
Adversarial Review (Gadfly) / review (pull_request) Successful in 15m18s
feat: inline COMMENT-state PR review (findings anchored to changed lines)
Phase 3. Alongside the consensus comment, the consolidator now posts ONE Gitea
pull review (state COMMENT — advisory, never request-changes/approve, so it can
never block a merge) whose inline comments anchor each consensus finding to the
exact changed line. The "reviewer integrated with Gitea" the project wanted,
without the blocking.

- review.go: a unified-diff parser (parseDiffNewLines — hunk-length-bounded, so a
  content line that looks like "+++ "/"@@" isn't mistaken for a header), inline
  comment building filtered to lines actually in the diff, and the Gitea reviews
  API client (raw new_position/event=COMMENT fields). Re-runs delete the prior
  gadfly review (body marker) instead of stacking.
- consensus.go: cluster.detail restored (now consumed by the inline comment body);
  renderConsensus takes precomputed clusters; runConsolidate clusters once and
  drives both the consensus comment and the inline review.
- entrypoint.sh: fetch the PR diff and pass GADFLY_DIFF_FILE to the consolidator
  (GITEA_API/TOKEN/PR already in its env).
- README + reusable workflow (`inline_review` input) + entrypoint docs.

Best-effort throughout: no diff / API creds, or any post error → the consensus
comment still stands; the review is never required and never blocks. Validated
the Gitea reviews API + line anchoring live before building. Tests: diff parser
(incl. header-like content + multi-file), inline filtering. gofmt/vet/bash -n clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 19:07:32 -04:00

218 lines
8.1 KiB
Go

package main
import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
)
func TestClusterFindingsAgreementAndTolerance(t *testing.T) {
models := []modelFindings{
{Model: "m1", Verdict: "Blocking issues found", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 10, Severity: "high", Title: "auth bypass"},
{Lens: "perf", File: "b.go", Line: 5, Severity: "trivial", Title: "tiny nit"},
}},
{Model: "m2", Verdict: "Minor issues", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 11, Severity: "critical", Title: "auth bypass (crit)"}, // within tolerance of a.go:10
}},
{Model: "m3", Verdict: "Minor issues", Findings: []outFinding{
{Lens: "correctness", File: "a.go", Line: 10, Severity: "medium", Title: "auth bypass"},
}},
}
clusters := clusterFindings(models)
if len(clusters) != 2 {
t.Fatalf("want 2 clusters (a.go:10±, b.go:5), got %d: %+v", len(clusters), clusters)
}
// First cluster (highest agreement) is the a.go auth one: 3 models, severity
// escalated to critical, representative line the smallest (10).
c := clusters[0]
if len(c.models) != 3 {
t.Errorf("want 3 models on the top cluster, got %d", len(c.models))
}
if c.severity != "critical" {
t.Errorf("want escalated severity critical, got %q", c.severity)
}
if c.line != 10 {
t.Errorf("want representative line 10, got %d", c.line)
}
if !c.lenses["security"] || !c.lenses["correctness"] {
t.Errorf("want union of lenses, got %v", c.lenses)
}
}
func TestRenderConsensusFoldsSingleModelNits(t *testing.T) {
models := []modelFindings{
{Model: "m1", Provider: "p", Verdict: "Blocking issues found", Markdown: "m1 detail", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 10, Severity: "high", Title: "auth bypass"},
{Lens: "perf", File: "b.go", Line: 5, Severity: "trivial", Title: "tiny nit"},
}},
{Model: "m2", Provider: "p", Verdict: "Minor issues", Markdown: "m2 detail", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 10, Severity: "high", Title: "auth bypass"},
}},
}
out := renderConsensus(models, clusterFindings(models))
// Headline table: the agreed finding with a 2/2 badge.
if !strings.Contains(out, "2/2") {
t.Errorf("expected a 2/2 agreement badge in headline:\n%s", out)
}
if !strings.Contains(out, "auth bypass") || !strings.Contains(out, "a.go:10") {
t.Errorf("headline missing the consensus finding:\n%s", out)
}
// The lone trivial finding is folded, not in the headline table.
if !strings.Contains(out, "single-model finding") {
t.Errorf("expected a folded single-model section:\n%s", out)
}
// Per-model detail is preserved (folded).
if !strings.Contains(out, "m1 detail") || !strings.Contains(out, "m2 detail") {
t.Errorf("per-model detail not preserved:\n%s", out)
}
}
func TestRenderConsensusHighSeverityLoneFindingStaysHeadline(t *testing.T) {
// A single model, single critical finding must still surface in the headline
// (not be folded as "low confidence").
models := []modelFindings{
{Model: "solo", Verdict: "Blocking issues found", Markdown: "x", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 1, Severity: "critical", Title: "rce"},
}},
}
out := renderConsensus(models, clusterFindings(models))
headline := out
if i := strings.Index(out, "single-model finding"); i >= 0 {
headline = out[:i]
}
if !strings.Contains(headline, "rce") {
t.Errorf("lone critical should be in the headline, not folded:\n%s", out)
}
}
func TestClusterSlidingWindowMergesChain(t *testing.T) {
// Findings at 10, 13, 16 (each 3 apart) from three models must merge into ONE
// cluster — the window slides with the span instead of anchoring at line 10.
models := []modelFindings{
{Model: "m1", Findings: []outFinding{{Lens: "x", File: "a.go", Line: 10, Severity: "medium", Title: "t"}}},
{Model: "m2", Findings: []outFinding{{Lens: "x", File: "a.go", Line: 13, Severity: "medium", Title: "t"}}},
{Model: "m3", Findings: []outFinding{{Lens: "x", File: "a.go", Line: 16, Severity: "medium", Title: "t"}}},
}
clusters := clusterFindings(models)
if len(clusters) != 1 {
t.Fatalf("chain 10/13/16 should merge into 1 cluster, got %d", len(clusters))
}
if len(clusters[0].models) != 3 {
t.Errorf("want 3 models in the merged cluster, got %d", len(clusters[0].models))
}
}
func TestRenderConsensusExcludesErroredFromDenominator(t *testing.T) {
models := []modelFindings{
{Model: "m1", Verdict: "Minor issues", Markdown: "a", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 9, Severity: "medium", Title: "leak"}}},
{Model: "m2", Verdict: "Minor issues", Markdown: "b", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 9, Severity: "medium", Title: "leak"}}},
{Model: "broken", Verdict: "reviewer failed", Errored: true, Markdown: "boom"},
}
out := renderConsensus(models, clusterFindings(models))
// Denominator is the 2 effective models, not 3; the failure is noted.
if !strings.Contains(out, "2/2") {
t.Errorf("errored model must be excluded from the denominator (want 2/2):\n%s", out)
}
if !strings.Contains(out, "1 failed") {
t.Errorf("expected a '1 failed' note:\n%s", out)
}
if !strings.Contains(out, "reviewer failed") {
t.Errorf("errored model should still appear (folded) as failed:\n%s", out)
}
}
func TestRenderConsensusLoneHighFolds(t *testing.T) {
// A single-model HIGH (not critical) folds — only consensus or a lone CRITICAL
// earns the headline, so a lone Blocking-lens finding doesn't reintroduce noise.
models := []modelFindings{
{Model: "solo", Verdict: "Blocking issues found", Markdown: "x", Findings: []outFinding{
{Lens: "security", File: "a.go", Line: 1, Severity: "high", Title: "maybe-bug"}}},
}
out := renderConsensus(models, clusterFindings(models))
head := out
if i := strings.Index(out, "single-model finding"); i >= 0 {
head = out[:i]
}
if strings.Contains(head, "maybe-bug") {
t.Errorf("a lone HIGH should fold, not headline:\n%s", out)
}
}
func TestWriteAndConsolidateRoundTrip(t *testing.T) {
dir := t.TempDir()
// Two model artifacts on disk.
write := func(name string, mf modelFindings) {
data, _ := json.Marshal(mf)
if err := os.WriteFile(filepath.Join(dir, name), data, 0o644); err != nil {
t.Fatal(err)
}
}
write("m1.json", modelFindings{Model: "m1", Provider: "ollama", Verdict: "Minor issues", Markdown: "md1",
Findings: []outFinding{{Lens: "security", File: "x.go", Line: 3, Severity: "medium", Title: "leak"}}})
write("m2.json", modelFindings{Model: "m2", Provider: "ollama", Verdict: "Minor issues", Markdown: "md2",
Findings: []outFinding{{Lens: "security", File: "x.go", Line: 3, Severity: "high", Title: "leak"}}})
// A junk file must be skipped, not crash consolidation.
if err := os.WriteFile(filepath.Join(dir, "notes.txt"), []byte("ignore me"), 0o644); err != nil {
t.Fatal(err)
}
t.Setenv("GADFLY_CONSOLIDATE_DIR", dir)
// runConsolidate prints to stdout; capture it.
out := captureStdout(t, func() {
if err := runConsolidate(); err != nil {
t.Fatalf("runConsolidate: %v", err)
}
})
if !strings.HasPrefix(strings.TrimSpace(out), consensusMarker) {
t.Errorf("consolidated output must lead with the marker:\n%s", out)
}
if !strings.Contains(out, "2/2") || !strings.Contains(out, "x.go:3") {
t.Errorf("expected the agreed x.go:3 finding at 2/2:\n%s", out)
}
}
func TestRunConsolidateEmptyDirErrors(t *testing.T) {
t.Setenv("GADFLY_CONSOLIDATE_DIR", t.TempDir())
if err := runConsolidate(); err == nil {
t.Error("want an error for an empty consolidate dir (entrypoint falls back)")
}
}
// captureStdout redirects os.Stdout for the duration of fn and returns what was
// written.
func captureStdout(t *testing.T, fn func()) string {
t.Helper()
orig := os.Stdout
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
os.Stdout = w
defer func() { os.Stdout = orig }() // restore even if fn panics
done := make(chan string)
go func() {
var sb strings.Builder
buf := make([]byte, 4096)
for {
n, err := r.Read(buf)
if n > 0 {
sb.Write(buf[:n])
}
if err != nil {
break
}
}
r.Close()
done <- sb.String()
}()
fn()
w.Close()
return <-done
}