feat: structured findings contract (machine-readable gadfly-findings block)
Each lens now appends a fenced ```gadfly-findings JSON array of its findings
({file, line, severity, confidence, title}) after the prose review. The binary
parses it exactly instead of scraping prose with a path:line regex, so telemetry
carries PER-FINDING severity (not just the lens-level verdict) and downstream
consolidation gets a clean contract to cluster on.
- system-prompt.txt + recheck prompt: ask for / regenerate the block.
- findings.go: extractStructuredFindings + stripFindingsBlock, with severity
normalization and prose-paragraph detail enrichment.
- emit.go: prefer the structured block; gracefully fall back to the heuristic
scrape when a model doesn't emit a parseable one (provider-agnostic, robust).
- consolidate.go: strip the block from the rendered comment (tooling-only).
This is the enabling refactor for consensus consolidation + inline reviews.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -100,7 +100,9 @@ func renderConsolidated(results []specialistResult) string {
|
|||||||
headline, len(results), strings.Join(specialistNames(results), ", "))
|
headline, len(results), strings.Join(specialistNames(results), ", "))
|
||||||
|
|
||||||
for _, r := range results {
|
for _, r := range results {
|
||||||
body := strings.TrimSpace(r.out)
|
// Strip the machine-readable ```gadfly-findings block — it's for tooling
|
||||||
|
// (telemetry/consolidation), not for human readers of the comment.
|
||||||
|
body := strings.TrimSpace(stripFindingsBlock(r.out))
|
||||||
if body == "" {
|
if body == "" {
|
||||||
body = "_(no output)_"
|
body = "_(no output)_"
|
||||||
}
|
}
|
||||||
|
|||||||
+22
-6
@@ -50,11 +50,16 @@ var pathLineRe = regexp.MustCompile(`([A-Za-z0-9_./-]+\.[A-Za-z0-9]+):(\d+)`)
|
|||||||
var numberedRe = regexp.MustCompile(`^\d+[.)]\s+(.+)$`)
|
var numberedRe = regexp.MustCompile(`^\d+[.)]\s+(.+)$`)
|
||||||
|
|
||||||
// finding is one extracted issue: where it points and a derived human title.
|
// finding is one extracted issue: where it points and a derived human title.
|
||||||
|
// severity/confidence are populated from the structured ```gadfly-findings block
|
||||||
|
// (extractStructuredFindings); they are empty for findings recovered by the
|
||||||
|
// heuristic prose scrape (parseFindings), which has no per-finding signal.
|
||||||
type finding struct {
|
type finding struct {
|
||||||
file string
|
file string
|
||||||
line int
|
line int
|
||||||
title string
|
title string
|
||||||
detail string
|
detail string
|
||||||
|
severity string // per-finding: critical/high/medium/small/trivial ("" if heuristic)
|
||||||
|
confidence string // per-finding: high/medium/low ("" if heuristic)
|
||||||
}
|
}
|
||||||
|
|
||||||
// runPayload is the POST /runs body. Field names match the gadfly-reports API EXACTLY.
|
// runPayload is the POST /runs body. Field names match the gadfly-reports API EXACTLY.
|
||||||
@@ -129,8 +134,19 @@ func emit(results []specialistResult, elapsed time.Duration) {
|
|||||||
if r.verdict == verdictClean {
|
if r.verdict == verdictClean {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
sev := r.verdict.label()
|
// Prefer the model's machine-readable ```gadfly-findings block (exact
|
||||||
for _, f := range parseFindings(r.spec, r.out) {
|
// location + per-finding severity); fall back to the prose scrape when a
|
||||||
|
// model didn't emit a parseable one.
|
||||||
|
fs, structured := extractStructuredFindings(r.out)
|
||||||
|
if !structured {
|
||||||
|
fs = parseFindings(r.spec, r.out)
|
||||||
|
}
|
||||||
|
lensSev := r.verdict.label()
|
||||||
|
for _, f := range fs {
|
||||||
|
sev := f.severity
|
||||||
|
if sev == "" {
|
||||||
|
sev = lensSev // heuristic finding: best signal is the lens verdict
|
||||||
|
}
|
||||||
reports = append(reports, reportPayload{
|
reports = append(reports, reportPayload{
|
||||||
Repo: repo,
|
Repo: repo,
|
||||||
PR: pr,
|
PR: pr,
|
||||||
|
|||||||
@@ -0,0 +1,179 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Structured findings: the machine-readable contract between a lens's output and
|
||||||
|
// the telemetry/consolidation pipeline. Each lens is asked (see
|
||||||
|
// scripts/system-prompt.txt) to append a fenced ```gadfly-findings code block
|
||||||
|
// holding a JSON array of its findings. Parsing that exact block is far more
|
||||||
|
// reliable than scraping prose with a path:line regex (emit.go's heuristic),
|
||||||
|
// and it carries PER-FINDING severity + confidence the prose verdict can't.
|
||||||
|
//
|
||||||
|
// Everything here degrades gracefully: a missing or malformed block makes
|
||||||
|
// extractStructuredFindings return ok=false, and the caller falls back to the
|
||||||
|
// heuristic scrape — so a weak model that ignores the contract still contributes
|
||||||
|
// findings, exactly as before.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// structuredFinding mirrors one element of the ```gadfly-findings JSON array.
|
||||||
|
// Line is json.Number so we tolerate both 123 and "123" from less-precise models.
|
||||||
|
type structuredFinding struct {
|
||||||
|
File string `json:"file"`
|
||||||
|
Line json.Number `json:"line"`
|
||||||
|
Severity string `json:"severity"`
|
||||||
|
Confidence string `json:"confidence"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Detail string `json:"detail"` // optional; prose paragraph is used when absent
|
||||||
|
}
|
||||||
|
|
||||||
|
// findingsFence is the info-string that tags the machine-readable block.
|
||||||
|
const findingsFence = "gadfly-findings"
|
||||||
|
|
||||||
|
// extractStructuredFindings parses the ```gadfly-findings JSON block out of a
|
||||||
|
// lens's markdown. It returns the findings and ok=true when a block is present
|
||||||
|
// AND parses as a JSON array (an empty array is a valid "nothing found" — ok is
|
||||||
|
// still true so the caller does NOT fall back to the regex scrape). A missing or
|
||||||
|
// unparseable block returns ok=false, signalling the caller to use the heuristic.
|
||||||
|
//
|
||||||
|
// detail for each finding is filled from the JSON when present, else from the
|
||||||
|
// prose paragraph that references the same file:line (best of both: exact
|
||||||
|
// location + rich human context). Findings with no usable file are dropped.
|
||||||
|
func extractStructuredFindings(out string) ([]finding, bool) {
|
||||||
|
body, ok := findingsBlock(out)
|
||||||
|
if !ok {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
var raw []structuredFinding
|
||||||
|
if err := json.Unmarshal([]byte(body), &raw); err != nil {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
prose := proseParagraphs(out)
|
||||||
|
findings := make([]finding, 0, len(raw))
|
||||||
|
for _, sf := range raw {
|
||||||
|
file := strings.TrimSpace(sf.File)
|
||||||
|
if file == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ln := 0
|
||||||
|
if n, err := strconv.Atoi(strings.TrimSpace(sf.Line.String())); err == nil {
|
||||||
|
ln = n
|
||||||
|
}
|
||||||
|
detail := strings.TrimSpace(sf.Detail)
|
||||||
|
if detail == "" {
|
||||||
|
detail = prose[file+":"+strconv.Itoa(ln)]
|
||||||
|
}
|
||||||
|
title := strings.TrimSpace(sf.Title)
|
||||||
|
if title == "" {
|
||||||
|
title = truncate(detail, 120)
|
||||||
|
}
|
||||||
|
findings = append(findings, finding{
|
||||||
|
file: file,
|
||||||
|
line: ln,
|
||||||
|
title: title,
|
||||||
|
detail: truncate(detail, 500),
|
||||||
|
severity: normalizeSeverity(sf.Severity),
|
||||||
|
confidence: strings.ToLower(strings.TrimSpace(sf.Confidence)),
|
||||||
|
})
|
||||||
|
if len(findings) >= maxFindingsPerLens {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return findings, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// findingsBlock returns the inner JSON text of the first ```gadfly-findings fenced
|
||||||
|
// block in out (ok=false if there is none). It scans line-by-line so it tolerates
|
||||||
|
// 3+ backticks and trailing whitespace on the fence lines.
|
||||||
|
func findingsBlock(out string) (string, bool) {
|
||||||
|
lines := strings.Split(out, "\n")
|
||||||
|
start := -1
|
||||||
|
for i, ln := range lines {
|
||||||
|
t := strings.TrimSpace(ln)
|
||||||
|
if strings.HasPrefix(t, "```") && strings.Contains(t, findingsFence) {
|
||||||
|
start = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if start < 0 {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
for j := start + 1; j < len(lines); j++ {
|
||||||
|
if isFenceClose(lines[j]) {
|
||||||
|
return strings.Join(lines[start+1:j], "\n"), true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false // unterminated block
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripFindingsBlock removes every ```gadfly-findings fenced block from out so the
|
||||||
|
// machine-readable JSON never shows in the rendered comment. Other content is
|
||||||
|
// preserved verbatim.
|
||||||
|
func stripFindingsBlock(out string) string {
|
||||||
|
lines := strings.Split(out, "\n")
|
||||||
|
var kept []string
|
||||||
|
skipping := false
|
||||||
|
for _, ln := range lines {
|
||||||
|
t := strings.TrimSpace(ln)
|
||||||
|
switch {
|
||||||
|
case skipping:
|
||||||
|
if isFenceClose(ln) {
|
||||||
|
skipping = false
|
||||||
|
}
|
||||||
|
case strings.HasPrefix(t, "```") && strings.Contains(t, findingsFence):
|
||||||
|
skipping = true
|
||||||
|
default:
|
||||||
|
kept = append(kept, ln)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimRight(strings.Join(kept, "\n"), "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// isFenceClose reports whether a line is a bare code-fence close (``` plus only
|
||||||
|
// whitespace), i.e. not an opening fence with an info string.
|
||||||
|
func isFenceClose(line string) bool {
|
||||||
|
t := strings.TrimSpace(line)
|
||||||
|
return strings.HasPrefix(t, "```") && strings.TrimLeft(t, "`") == ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// proseParagraphs maps "file:line" -> the prose paragraph that first references
|
||||||
|
// it, so a structured finding without its own detail can borrow the human
|
||||||
|
// context the model already wrote. Built from the markdown OUTSIDE the findings
|
||||||
|
// block (the block is JSON, not prose).
|
||||||
|
func proseParagraphs(out string) map[string]string {
|
||||||
|
prose := stripFindingsBlock(out)
|
||||||
|
lines := strings.Split(prose, "\n")
|
||||||
|
m := map[string]string{}
|
||||||
|
for _, loc := range pathLineRe.FindAllStringSubmatchIndex(prose, -1) {
|
||||||
|
key := prose[loc[2]:loc[3]] + ":" + prose[loc[4]:loc[5]]
|
||||||
|
if _, seen := m[key]; seen {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
li := strings.Count(prose[:loc[0]], "\n")
|
||||||
|
m[key] = paragraphAt(lines, li)
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeSeverity maps a model's severity word onto the canonical set
|
||||||
|
// (critical/high/medium/small/trivial), accepting common synonyms. An
|
||||||
|
// unrecognized value is returned lowercased so the store still sees the raw word.
|
||||||
|
func normalizeSeverity(s string) string {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(s)) {
|
||||||
|
case "critical", "crit", "blocker", "blocking":
|
||||||
|
return "critical"
|
||||||
|
case "high", "major":
|
||||||
|
return "high"
|
||||||
|
case "medium", "moderate", "minor":
|
||||||
|
return "medium"
|
||||||
|
case "small", "low", "minor-issue":
|
||||||
|
return "small"
|
||||||
|
case "trivial", "nit", "nitpick", "info", "informational", "style":
|
||||||
|
return "trivial"
|
||||||
|
default:
|
||||||
|
return strings.ToLower(strings.TrimSpace(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,111 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
const sampleReview = "**Blocking issues found**\n\n" +
|
||||||
|
"- **Unauthenticated endpoint** — `model.go:184` leaks PR content to a third party.\n" +
|
||||||
|
"- A nit about naming in `util.go:12`.\n\n" +
|
||||||
|
"```gadfly-findings\n" +
|
||||||
|
"[\n" +
|
||||||
|
" {\"file\": \"model.go\", \"line\": 184, \"severity\": \"high\", \"confidence\": \"high\", \"title\": \"Unauthenticated endpoint\"},\n" +
|
||||||
|
" {\"file\": \"util.go\", \"line\": 12, \"severity\": \"nit\", \"confidence\": \"medium\", \"title\": \"naming\"}\n" +
|
||||||
|
"]\n" +
|
||||||
|
"```\n"
|
||||||
|
|
||||||
|
func TestExtractStructuredFindings(t *testing.T) {
|
||||||
|
fs, ok := extractStructuredFindings(sampleReview)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("expected ok=true for a well-formed block")
|
||||||
|
}
|
||||||
|
if len(fs) != 2 {
|
||||||
|
t.Fatalf("want 2 findings, got %d", len(fs))
|
||||||
|
}
|
||||||
|
if fs[0].file != "model.go" || fs[0].line != 184 || fs[0].severity != "high" || fs[0].confidence != "high" {
|
||||||
|
t.Errorf("finding[0] mismatch: %+v", fs[0])
|
||||||
|
}
|
||||||
|
// "nit" must normalize to the canonical "trivial".
|
||||||
|
if fs[1].severity != "trivial" {
|
||||||
|
t.Errorf("want severity normalized to trivial, got %q", fs[1].severity)
|
||||||
|
}
|
||||||
|
// Detail is borrowed from the prose paragraph referencing the same file:line.
|
||||||
|
if fs[0].detail == "" {
|
||||||
|
t.Error("expected detail borrowed from prose, got empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractStructuredFindingsEmptyArray(t *testing.T) {
|
||||||
|
out := "No material issues found\n\n```gadfly-findings\n[]\n```\n"
|
||||||
|
fs, ok := extractStructuredFindings(out)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("an empty array is a valid block; want ok=true")
|
||||||
|
}
|
||||||
|
if len(fs) != 0 {
|
||||||
|
t.Fatalf("want 0 findings, got %d", len(fs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractStructuredFindingsFallback(t *testing.T) {
|
||||||
|
// No block at all -> ok=false so the caller uses the heuristic scrape.
|
||||||
|
if _, ok := extractStructuredFindings("Minor issues\n\n- something at `x.go:1`\n"); ok {
|
||||||
|
t.Error("want ok=false when there is no block")
|
||||||
|
}
|
||||||
|
// Malformed JSON -> ok=false (graceful fallback).
|
||||||
|
bad := "Minor issues\n\n```gadfly-findings\n{not json}\n```\n"
|
||||||
|
if _, ok := extractStructuredFindings(bad); ok {
|
||||||
|
t.Error("want ok=false for malformed JSON")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractStructuredFindingsStringLine(t *testing.T) {
|
||||||
|
// Tolerate a quoted line number from a less-precise model.
|
||||||
|
out := "Minor issues\n\n```gadfly-findings\n[{\"file\":\"a.go\",\"line\":\"42\",\"severity\":\"medium\",\"title\":\"x\"}]\n```\n"
|
||||||
|
fs, ok := extractStructuredFindings(out)
|
||||||
|
if !ok || len(fs) != 1 || fs[0].line != 42 {
|
||||||
|
t.Fatalf("want one finding at line 42, got ok=%v %+v", ok, fs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripFindingsBlock(t *testing.T) {
|
||||||
|
stripped := stripFindingsBlock(sampleReview)
|
||||||
|
if got := stripped; containsFence(got) {
|
||||||
|
t.Errorf("block not stripped: %q", got)
|
||||||
|
}
|
||||||
|
// The prose findings must survive.
|
||||||
|
if !contains(stripped, "Unauthenticated endpoint") || !contains(stripped, "util.go:12") {
|
||||||
|
t.Errorf("prose lost during strip: %q", stripped)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripFindingsBlockNoBlock(t *testing.T) {
|
||||||
|
in := "Minor issues\n\n- finding at `x.go:9`"
|
||||||
|
if out := stripFindingsBlock(in); out != in {
|
||||||
|
t.Errorf("strip changed block-free text: %q != %q", out, in)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeSeverity(t *testing.T) {
|
||||||
|
cases := map[string]string{
|
||||||
|
"Critical": "critical", "blocker": "critical",
|
||||||
|
"major": "high", "HIGH": "high",
|
||||||
|
"moderate": "medium", "minor": "medium",
|
||||||
|
"low": "small",
|
||||||
|
"nit": "trivial", "Style": "trivial",
|
||||||
|
"weird": "weird", // unknown passes through, lowercased
|
||||||
|
}
|
||||||
|
for in, want := range cases {
|
||||||
|
if got := normalizeSeverity(in); got != want {
|
||||||
|
t.Errorf("normalizeSeverity(%q) = %q, want %q", in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func containsFence(s string) bool { return contains(s, findingsFence) }
|
||||||
|
|
||||||
|
func contains(s, sub string) bool {
|
||||||
|
for i := 0; i+len(sub) <= len(s); i++ {
|
||||||
|
if s[i:i+len(sub)] == sub {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
@@ -49,6 +49,11 @@ Output rules:
|
|||||||
- Do NOT invent new findings; this is a verification gate, not a fresh review.
|
- Do NOT invent new findings; this is a verification gate, not a fresh review.
|
||||||
- Do NOT include meta-commentary about the verification process or which
|
- Do NOT include meta-commentary about the verification process or which
|
||||||
findings you dropped — output only the final, corrected review markdown.
|
findings you dropped — output only the final, corrected review markdown.
|
||||||
|
- The draft ends with a fenced ` + "`gadfly-findings`" + ` JSON block. Regenerate it
|
||||||
|
so it lists ONLY the findings that SURVIVED your verification, in the same schema
|
||||||
|
({"file","line","severity","confidence","title"}; severity one of
|
||||||
|
critical/high/medium/small/trivial, confidence one of high/medium/low). If every
|
||||||
|
finding was dropped, emit an empty array ` + "`[]`" + `. Keep the block last.
|
||||||
- When done investigating, STOP calling tools and reply with the review.`
|
- When done investigating, STOP calling tools and reply with the review.`
|
||||||
|
|
||||||
// recheckEnabled reports whether the verification pass should run. On unless
|
// recheckEnabled reports whether the verification pass should run. On unless
|
||||||
|
|||||||
@@ -43,3 +43,21 @@ Output rules:
|
|||||||
- Only report issues you are reasonably confident are real after checking. If the diff
|
- Only report issues you are reasonably confident are real after checking. If the diff
|
||||||
is clean, say so plainly rather than inventing nits.
|
is clean, say so plainly rather than inventing nits.
|
||||||
- When you are done investigating, STOP calling tools and reply with the final review.
|
- When you are done investigating, STOP calling tools and reply with the final review.
|
||||||
|
|
||||||
|
Machine-readable findings — AFTER the prose review, append ONE fenced code block,
|
||||||
|
tagged `gadfly-findings`, holding a JSON array of the SAME findings you described above
|
||||||
|
(this block is consumed by tooling and hidden from the rendered comment):
|
||||||
|
|
||||||
|
```gadfly-findings
|
||||||
|
[
|
||||||
|
{"file": "path/to/file.go", "line": 123, "severity": "high", "confidence": "high", "title": "one-line summary of the issue"}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
- One object per real finding, in the same order as your prose. `file`/`line` must be a
|
||||||
|
concrete location you verified (the line the issue is at). `severity` is one of
|
||||||
|
`critical`, `high`, `medium`, `small`, `trivial`. `confidence` is your post-verification
|
||||||
|
confidence the issue is real: one of `high`, `medium`, `low`.
|
||||||
|
- Include ONLY genuine problems — never verification notes ("confirmed X is safe at f:line"),
|
||||||
|
and never an "Outside my lens:" aside. If your lens is clean, emit an empty array `[]`.
|
||||||
|
- This block is in ADDITION to the prose; do not drop the human-readable findings.
|
||||||
|
|||||||
Reference in New Issue
Block a user