fix: address swarm review of the structured-findings contract

From PR #16's own adversarial review (graded in gadfly-reports): - Empty/empty-result fallback: emit now scrapes the prose when the structured block is absent, unterminated, malformed, OR parses to zero usable findings (e.g. an empty [] emitted alongside real prose) — the safety net is no longer defeated by a contradictory empty block. (extractStructuredFindingsOrScrape) - Unterminated fence no longer swallows the comment: extract + strip share findingsSpan, which only recognizes a TERMINATED block; an unterminated fence is left in place. - confidence is now consumed: normalized (normalizeConfidence) and emitted to the telemetry store (reportPayload.confidence), not write-only. - Canonical raw_severity everywhere: heuristic findings derive a canonical word from the verdict (verdict.severity) instead of the full verdict phrase. - normalizeSeverity table made consistent ("minor"/"low" -> small). - Exact info-string match (isFindingsOpen) instead of substring; shared fenceInfo predicate removes the duplicated open/close fence logic. - Dedupe structured findings by file:line; never emit an empty title (fall back to detail, then the location); validate the parsed line number. - Skip the prose scan when findings already carry detail; refresh stale emit.go doc comments. Tests added for the empty-[] fallback, unterminated-fence strip, confidence normalization, and verdict.severity. All green, gofmt clean, vet quiet. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 18:22:43 -04:00
parent 9bcd7b9696
commit 66682aa1d5
5 changed files with 210 additions and 94 deletions
@@ -28,6 +28,22 @@ func (v verdict) label() string {
 	}
 }
 // severity maps a verdict to a canonical severity word (the same vocabulary as
 // the structured findings: critical/high/medium/small/trivial). Used as the
 // raw_severity for heuristic-scraped findings, which carry no per-finding
 // severity of their own — so the telemetry store sees one consistent vocabulary
 // instead of a mix of canonical words and full verdict phrases.
 func (v verdict) severity() string {
 	switch v {
 	case verdictBlocking:
 		return "high"
 	case verdictMinor:
 		return "small"
 	default:
 		return "trivial"
 	}
 }
 // parseVerdict extracts a specialist's self-reported verdict from its output.
 // The base prompt tells each lens to lead with one of the three phrases.
 func parseVerdict(out string) verdict {
@@ -10,11 +10,14 @@ package main
 //     the review markdown consumed by run.sh) and never changes the exit code.
 //   - It depends only on the Go stdlib (net/http).
 //
-// Findings are extracted heuristically from each lens's markdown: a "path:line"
+// Findings come PRIMARILY from each lens's machine-readable ```gadfly-findings
-// reference (e.g. run/executor.go:166) anchors a finding, whose title is the
+// block (findings.go: exact file/line + per-finding severity/confidence). When a
-// nearest preceding markdown heading / numbered item / bold lead-in (else the
+// model emits no parseable block, emit falls back to a heuristic prose scrape: a
-// first sentence of the finding's paragraph). This is best-effort signal for the
+// "path:line" reference (e.g. run/executor.go:166) anchors a finding, whose title
-// store to aggregate — not a structured contract the reviewer guarantees.
+// is the nearest preceding markdown heading / numbered item / bold lead-in (else
 // the first sentence of the finding's paragraph). The scrape is best-effort
 // signal for the store to aggregate, not a structured contract the reviewer
 // guarantees.
 import (
 	"bytes"
@@ -91,6 +94,7 @@ type reportPayload struct {
 	Provider    string `json:"provider"`
 	RunID       string `json:"run_id"`
 	RawSeverity string `json:"raw_severity"`
 	Confidence  string `json:"confidence"` // per-finding high/medium/low ("" if heuristic)
 	Detail      string `json:"detail"`
 }
@@ -135,17 +139,17 @@ func emit(results []specialistResult, elapsed time.Duration) {
 			continue
 		}
 		// Prefer the model's machine-readable ```gadfly-findings block (exact
-		// location + per-finding severity); fall back to the prose scrape when a
+		// location + per-finding severity). Fall back to the prose scrape when it
-		// model didn't emit a parseable one.
+		// yields NOTHING — no block, an unterminated/malformed one, or a block that
-		fs, structured := extractStructuredFindings(r.out)
+		// parsed to zero usable findings on a non-clean lens (e.g. an empty []
-		if !structured {
+		// alongside prose findings). Falling back on empty, not just on absent,
-			fs = parseFindings(r.spec, r.out)
+		// keeps the safety net from being defeated by a contradictory empty block.
-		}
+		fs := extractStructuredFindingsOrScrape(r)
-		lensSev := r.verdict.label()
+		lensSev := r.verdict.severity() // canonical word for heuristic findings (no per-finding severity)
 		for _, f := range fs {
 			sev := f.severity
 			if sev == "" {
-				sev = lensSev // heuristic finding: best signal is the lens verdict
+				sev = lensSev
 			}
 			reports = append(reports, reportPayload{
 				Repo:        repo,
@@ -158,6 +162,7 @@ func emit(results []specialistResult, elapsed time.Duration) {
 				Provider:    provider,
 				RunID:       runID,
 				RawSeverity: sev,
 				Confidence:  f.confidence,
 				Detail:      f.detail,
 			})
 		}
@@ -157,7 +157,7 @@ func TestEmit_PostsRunsAndReports(t *testing.T) {
 	if len(reportBody) != 2 {
 		t.Fatalf("/reports array length = %d, want 2", len(reportBody))
 	}
-	for _, k := range []string{"repo", "pr", "lens", "file", "line", "title", "model", "provider", "run_id", "raw_severity", "detail"} {
+	for _, k := range []string{"repo", "pr", "lens", "file", "line", "title", "model", "provider", "run_id", "raw_severity", "confidence", "detail"} {
 		if _, ok := reportBody[0][k]; !ok {
 			t.Errorf("/reports[0] missing field %q (got keys %v)", k, keysOf(reportBody[0]))
 		}
@@ -171,8 +171,11 @@ func TestEmit_PostsRunsAndReports(t *testing.T) {
 	if reportBody[0]["line"] != float64(166) {
 		t.Errorf("reports[0].line = %v, want 166", reportBody[0]["line"])
 	}
-	if reportBody[0]["raw_severity"] != "Blocking issues found" {
+	// No structured block in sampleLensMarkdown, so this is a heuristic-scraped
-		t.Errorf("reports[0].raw_severity = %v, want 'Blocking issues found'", reportBody[0]["raw_severity"])
+	// finding: raw_severity is the canonical word derived from the lens verdict
 	// (Blocking -> "high"), not the full verdict phrase.
 	if reportBody[0]["raw_severity"] != "high" {
 		t.Errorf("reports[0].raw_severity = %v, want 'high'", reportBody[0]["raw_severity"])
 	}
 	if reportBody[0]["run_id"] != "owner/repo#7:ollama-cloud/qwen3" {
 		t.Errorf("reports[0].run_id = %v, want owner/repo#7:ollama-cloud/qwen3", reportBody[0]["run_id"])
@@ -7,10 +7,10 @@ package main
 // reliable than scraping prose with a path:line regex (emit.go's heuristic),
 // and it carries PER-FINDING severity + confidence the prose verdict can't.
 //
-// Everything here degrades gracefully: a missing or malformed block makes
+// Everything here degrades gracefully: a missing, unterminated, or malformed
-// extractStructuredFindings return ok=false, and the caller falls back to the
+// block makes extractStructuredFindings return ok=false (and yield no findings),
-// heuristic scrape — so a weak model that ignores the contract still contributes
+// so the caller falls back to the heuristic scrape — a weak model that ignores
-// findings, exactly as before.
+// the contract still contributes findings, exactly as before.
 import (
 	"encoding/json"
@@ -26,49 +26,68 @@ type structuredFinding struct {
 	Severity   string      `json:"severity"`
 	Confidence string      `json:"confidence"`
 	Title      string      `json:"title"`
-	Detail     string      `json:"detail"` // optional; prose paragraph is used when absent
+	Detail     string      `json:"detail"` // optional; the prose paragraph is used when absent
 }
 // findingsFence is the info-string that tags the machine-readable block.
 const findingsFence = "gadfly-findings"
 // extractStructuredFindings parses the ```gadfly-findings JSON block out of a
-// lens's markdown. It returns the findings and ok=true when a block is present
+// lens's markdown. It returns the findings and ok=true when a TERMINATED block is
-// AND parses as a JSON array (an empty array is a valid "nothing found" — ok is
+// present AND parses as a JSON array (an empty array is valid "nothing found" —
-// still true so the caller does NOT fall back to the regex scrape). A missing or
+// ok is still true). A missing, unterminated, or unparseable block returns
-// unparseable block returns ok=false, signalling the caller to use the heuristic.
+// ok=false so the caller falls back to the heuristic scrape.
 //
-// detail for each finding is filled from the JSON when present, else from the
+// Findings are deduped by file:line (keeping the first, matching parseFindings),
-// prose paragraph that references the same file:line (best of both: exact
+// findings with no usable file are dropped, and each title/detail is backfilled
-// location + rich human context). Findings with no usable file are dropped.
+// from the prose when the JSON omits it (best of both: exact location + the human
 // context the model already wrote).
 func extractStructuredFindings(out string) ([]finding, bool) {
-	body, ok := findingsBlock(out)
+	lines := strings.Split(out, "\n")
 	start, end, ok := findingsSpan(lines)
 	if !ok {
 		return nil, false
 	}
 	var raw []structuredFinding
-	if err := json.Unmarshal([]byte(body), &raw); err != nil {
+	if err := json.Unmarshal([]byte(strings.Join(lines[start+1:end], "\n")), &raw); err != nil {
 		return nil, false
 	}
 	prose := proseParagraphs(out)
 	findings := make([]finding, 0, len(raw))
 	seen := map[string]bool{}
 	var prose map[string]string // built lazily — only if a finding lacks its own detail
 	for _, sf := range raw {
 		file := strings.TrimSpace(sf.File)
 		if file == "" {
 			continue
 		}
 		ln := 0
-		if n, err := strconv.Atoi(strings.TrimSpace(sf.Line.String())); err == nil {
+		if n, err := strconv.Atoi(strings.TrimSpace(sf.Line.String())); err == nil && n > 0 {
 			ln = n
 		}
 		key := file + ":" + strconv.Itoa(ln)
 		if ln > 0 { // only dedupe concrete locations; unknown-line findings are kept
 			if seen[key] {
 				continue
 			}
 			seen[key] = true
 		}
 		detail := strings.TrimSpace(sf.Detail)
-		if detail == "" {
+		if detail == "" && ln > 0 {
-			detail = prose[file+":"+strconv.Itoa(ln)]
+			if prose == nil {
 				prose = proseParagraphs(out)
 			}
 			detail = prose[key]
 		}
 		title := strings.TrimSpace(sf.Title)
-		if title == "" {
+		if title == "" { // never empty: fall back to the prose detail, then the location
 			if detail != "" {
 				title = truncate(detail, 120)
 			} else if ln > 0 {
 				title = key
 			} else {
 				title = file
 			}
 		}
 		findings = append(findings, finding{
 			file:       file,
@@ -76,7 +95,7 @@ func extractStructuredFindings(out string) ([]finding, bool) {
 			title:      title,
 			detail:     truncate(detail, 500),
 			severity:   normalizeSeverity(sf.Severity),
-			confidence: strings.ToLower(strings.TrimSpace(sf.Confidence)),
+			confidence: normalizeConfidence(sf.Confidence),
 		})
 		if len(findings) >= maxFindingsPerLens {
 			break
@@ -85,58 +104,81 @@ func extractStructuredFindings(out string) ([]finding, bool) {
 	return findings, true
 }
-// findingsBlock returns the inner JSON text of the first ```gadfly-findings fenced
+// extractStructuredFindingsOrScrape returns a lens's findings, preferring the
-// block in out (ok=false if there is none). It scans line-by-line so it tolerates
+// structured ```gadfly-findings block and falling back to the heuristic prose
-// 3+ backticks and trailing whitespace on the fence lines.
+// scrape when the block is absent, unterminated/malformed, OR parsed to zero
-func findingsBlock(out string) (string, bool) {
+// usable findings (e.g. an empty [] emitted alongside real prose findings).
 // Factored out of emit() so the fallback rule is unit-testable.
 func extractStructuredFindingsOrScrape(r specialistResult) []finding {
 	if fs, _ := extractStructuredFindings(r.out); len(fs) > 0 {
 		return fs
 	}
 	return parseFindings(r.spec, r.out)
 }
 // stripFindingsBlock removes every TERMINATED ```gadfly-findings block from out so
 // the machine-readable JSON never shows in the rendered comment. An UNTERMINATED
 // fence is left in place — treating it as a block would swallow the rest of the
 // comment (e.g. when a model's output was truncated mid-block). Trailing
 // whitespace is trimmed.
 func stripFindingsBlock(out string) string {
 	lines := strings.Split(out, "\n")
-	start := -1
+	for {
 		start, end, ok := findingsSpan(lines)
 		if !ok {
 			break
 		}
 		lines = append(lines[:start:start], lines[end+1:]...)
 	}
 	return strings.TrimRight(strings.Join(lines, "\n"), "\n")
 }
 // findingsSpan returns the [start,end] inclusive line indices of the first
 // TERMINATED ```gadfly-findings block in lines (start = the opening fence, end =
 // the closing fence), or ok=false when there is none or it is unterminated.
 // extract and strip share it so they always agree on what is (and isn't) a block.
 func findingsSpan(lines []string) (start, end int, ok bool) {
 	start = -1
 	for i, ln := range lines {
-		t := strings.TrimSpace(ln)
+		if isFindingsOpen(ln) {
 		if strings.HasPrefix(t, "```") && strings.Contains(t, findingsFence) {
 			start = i
 			break
 		}
 	}
 	if start < 0 {
-		return "", false
+		return 0, 0, false
 	}
 	for j := start + 1; j < len(lines); j++ {
 		if isFenceClose(lines[j]) {
-			return strings.Join(lines[start+1:j], "\n"), true
+			return start, j, true
 		}
 	}
-	return "", false // unterminated block
+	return 0, 0, false // unterminated
 }
-// stripFindingsBlock removes every ```gadfly-findings fenced block from out so the
+// fenceInfo returns the info-string (text after the backticks) of a code-fence
-// machine-readable JSON never shows in the rendered comment. Other content is
+// line and whether the line opens/closes a fence at all. A bare ``` yields ("",
-// preserved verbatim.
+// true); ```gadfly-findings yields ("gadfly-findings", true).
-func stripFindingsBlock(out string) string {
+func fenceInfo(line string) (string, bool) {
 	lines := strings.Split(out, "\n")
 	var kept []string
 	skipping := false
 	for _, ln := range lines {
 		t := strings.TrimSpace(ln)
 		switch {
 		case skipping:
 			if isFenceClose(ln) {
 				skipping = false
 			}
 		case strings.HasPrefix(t, "```") && strings.Contains(t, findingsFence):
 			skipping = true
 		default:
 			kept = append(kept, ln)
 		}
 	}
 	return strings.TrimRight(strings.Join(kept, "\n"), "\n")
 }
 // isFenceClose reports whether a line is a bare code-fence close (``` plus only
 // whitespace), i.e. not an opening fence with an info string.
 func isFenceClose(line string) bool {
 	t := strings.TrimSpace(line)
-	return strings.HasPrefix(t, "```") && strings.TrimLeft(t, "`") == ""
+	if !strings.HasPrefix(t, "```") {
 		return "", false
 	}
 	return strings.TrimSpace(strings.TrimLeft(t, "`")), true
 }
 // isFindingsOpen reports whether line opens a ```gadfly-findings block, matching
 // the info-string EXACTLY (not as a substring) so a fence like ```not-findings
 // can't masquerade as ours.
 func isFindingsOpen(line string) bool {
 	info, ok := fenceInfo(line)
 	return ok && info == findingsFence
 }
 // isFenceClose reports whether line is a bare closing fence (``` with no info).
 func isFenceClose(line string) bool {
 	info, ok := fenceInfo(line)
 	return ok && info == ""
 }
 // proseParagraphs maps "file:line" -> the prose paragraph that first references
@@ -149,7 +191,7 @@ func proseParagraphs(out string) map[string]string {
 	m := map[string]string{}
 	for _, loc := range pathLineRe.FindAllStringSubmatchIndex(prose, -1) {
 		key := prose[loc[2]:loc[3]] + ":" + prose[loc[4]:loc[5]]
-		if _, seen := m[key]; seen {
+		if _, dup := m[key]; dup {
 			continue
 		}
 		li := strings.Count(prose[:loc[0]], "\n")
@@ -162,18 +204,33 @@ func proseParagraphs(out string) map[string]string {
 // (critical/high/medium/small/trivial), accepting common synonyms. An
 // unrecognized value is returned lowercased so the store still sees the raw word.
 func normalizeSeverity(s string) string {
-	switch strings.ToLower(strings.TrimSpace(s)) {
+	switch t := strings.ToLower(strings.TrimSpace(s)); t {
 	case "critical", "crit", "blocker", "blocking":
 		return "critical"
-	case "high", "major":
+	case "high", "major", "severe":
 		return "high"
-	case "medium", "moderate", "minor":
+	case "medium", "moderate":
 		return "medium"
-	case "small", "low", "minor-issue":
+	case "small", "low", "minor":
 		return "small"
-	case "trivial", "nit", "nitpick", "info", "informational", "style":
+	case "trivial", "nit", "nitpick", "info", "informational", "style", "cosmetic":
 		return "trivial"
 	default:
-		return strings.ToLower(strings.TrimSpace(s))
+		return t
 	}
 }
 // normalizeConfidence maps a model's confidence word onto high/medium/low,
 // accepting common synonyms; an unrecognized value is returned lowercased.
 func normalizeConfidence(s string) string {
 	switch t := strings.ToLower(strings.TrimSpace(s)); t {
 	case "high", "certain", "confirmed", "verified":
 		return "high"
 	case "medium", "med", "moderate":
 		return "medium"
 	case "low", "unsure", "tentative", "unverified", "speculative":
 		return "low"
 	default:
 		return t
 	}
 }
@@ -1,6 +1,9 @@
 package main
-import "testing"
+import (
 	"strings"
 	"testing"
 )
 const sampleReview = "**Blocking issues found**\n\n" +
 	"- **Unauthenticated endpoint** — `model.go:184` leaks PR content to a third party.\n" +
@@ -67,15 +70,24 @@ func TestExtractStructuredFindingsStringLine(t *testing.T) {
 func TestStripFindingsBlock(t *testing.T) {
 	stripped := stripFindingsBlock(sampleReview)
-	if got := stripped; containsFence(got) {
+	if strings.Contains(stripped, findingsFence) {
-		t.Errorf("block not stripped: %q", got)
+		t.Errorf("block not stripped: %q", stripped)
 	}
 	// The prose findings must survive.
-	if !contains(stripped, "Unauthenticated endpoint") || !contains(stripped, "util.go:12") {
+	if !strings.Contains(stripped, "Unauthenticated endpoint") || !strings.Contains(stripped, "util.go:12") {
 		t.Errorf("prose lost during strip: %q", stripped)
 	}
 }
 func TestStripFindingsBlockUnterminated(t *testing.T) {
 	// A truncated, unterminated block must NOT swallow the prose before it.
 	out := "Minor issues\n\n- real finding at `x.go:1`\n\n```gadfly-findings\n[{\"file\":\"x.go\""
 	got := stripFindingsBlock(out)
 	if !strings.Contains(got, "real finding at `x.go:1`") {
 		t.Errorf("unterminated block swallowed the prose: %q", got)
 	}
 }
 func TestStripFindingsBlockNoBlock(t *testing.T) {
 	in := "Minor issues\n\n- finding at `x.go:9`"
 	if out := stripFindingsBlock(in); out != in {
@@ -87,8 +99,8 @@ func TestNormalizeSeverity(t *testing.T) {
 	cases := map[string]string{
 		"Critical": "critical", "blocker": "critical",
 		"major": "high", "HIGH": "high",
-		"moderate": "medium", "minor": "medium",
+		"moderate": "medium",
-		"low": "small",
+		"minor":    "small", "low": "small", // "minor" and "low" both map to small (consistently)
 		"nit": "trivial", "Style": "trivial",
 		"weird": "weird", // unknown passes through, lowercased
 	}
@@ -99,13 +111,36 @@ func TestNormalizeSeverity(t *testing.T) {
 	}
 }
-func containsFence(s string) bool { return contains(s, findingsFence) }
+func TestNormalizeConfidence(t *testing.T) {
-
+	cases := map[string]string{
-func contains(s, sub string) bool {
+		"High": "high", "confirmed": "high",
-	for i := 0; i+len(sub) <= len(s); i++ {
+		"MEDIUM": "medium", "moderate": "medium",
-		if s[i:i+len(sub)] == sub {
+		"low": "low", "unverified": "low",
-			return true
+		"hunch": "hunch", // unknown passes through, lowercased
 	}
 	for in, want := range cases {
 		if got := normalizeConfidence(in); got != want {
 			t.Errorf("normalizeConfidence(%q) = %q, want %q", in, got, want)
 		}
 	}
-	return false
+}
 func TestExtractStructuredFindingsFallbackOnEmpty(t *testing.T) {
 	// A non-clean lens that emitted an empty [] but listed prose findings must
 	// fall through to the heuristic scrape, not silently drop everything.
 	out := "Minor issues\n\n- bug at `pkg/a.go:7`\n\n```gadfly-findings\n[]\n```\n"
 	r := specialistResult{spec: Specialist{Name: "correctness"}, out: out, verdict: verdictMinor}
 	fs := extractStructuredFindingsOrScrape(r)
 	if len(fs) == 0 {
 		t.Fatal("empty [] must fall back to the heuristic scrape, got no findings")
 	}
 	if fs[0].file != "pkg/a.go" || fs[0].line != 7 {
 		t.Errorf("heuristic fallback wrong: %+v", fs[0])
 	}
 }
 func TestVerdictSeverity(t *testing.T) {
 	if verdictBlocking.severity() != "high" || verdictMinor.severity() != "small" || verdictUnknown.severity() != "trivial" {
 		t.Error("verdict.severity mapping changed unexpectedly")
 	}
 }