diff --git a/cmd/gadfly/consolidate.go b/cmd/gadfly/consolidate.go index 47f2e26..cf254c8 100644 --- a/cmd/gadfly/consolidate.go +++ b/cmd/gadfly/consolidate.go @@ -100,7 +100,9 @@ func renderConsolidated(results []specialistResult) string { headline, len(results), strings.Join(specialistNames(results), ", ")) for _, r := range results { - body := strings.TrimSpace(r.out) + // Strip the machine-readable ```gadfly-findings block — it's for tooling + // (telemetry/consolidation), not for human readers of the comment. + body := strings.TrimSpace(stripFindingsBlock(r.out)) if body == "" { body = "_(no output)_" } diff --git a/cmd/gadfly/emit.go b/cmd/gadfly/emit.go index 2cfc9c0..5a43ca4 100644 --- a/cmd/gadfly/emit.go +++ b/cmd/gadfly/emit.go @@ -50,11 +50,16 @@ var pathLineRe = regexp.MustCompile(`([A-Za-z0-9_./-]+\.[A-Za-z0-9]+):(\d+)`) var numberedRe = regexp.MustCompile(`^\d+[.)]\s+(.+)$`) // finding is one extracted issue: where it points and a derived human title. +// severity/confidence are populated from the structured ```gadfly-findings block +// (extractStructuredFindings); they are empty for findings recovered by the +// heuristic prose scrape (parseFindings), which has no per-finding signal. type finding struct { - file string - line int - title string - detail string + file string + line int + title string + detail string + severity string // per-finding: critical/high/medium/small/trivial ("" if heuristic) + confidence string // per-finding: high/medium/low ("" if heuristic) } // runPayload is the POST /runs body. Field names match the gadfly-reports API EXACTLY. @@ -129,8 +134,19 @@ func emit(results []specialistResult, elapsed time.Duration) { if r.verdict == verdictClean { continue } - sev := r.verdict.label() - for _, f := range parseFindings(r.spec, r.out) { + // Prefer the model's machine-readable ```gadfly-findings block (exact + // location + per-finding severity); fall back to the prose scrape when a + // model didn't emit a parseable one. + fs, structured := extractStructuredFindings(r.out) + if !structured { + fs = parseFindings(r.spec, r.out) + } + lensSev := r.verdict.label() + for _, f := range fs { + sev := f.severity + if sev == "" { + sev = lensSev // heuristic finding: best signal is the lens verdict + } reports = append(reports, reportPayload{ Repo: repo, PR: pr, diff --git a/cmd/gadfly/findings.go b/cmd/gadfly/findings.go new file mode 100644 index 0000000..4e3ba9f --- /dev/null +++ b/cmd/gadfly/findings.go @@ -0,0 +1,179 @@ +package main + +// Structured findings: the machine-readable contract between a lens's output and +// the telemetry/consolidation pipeline. Each lens is asked (see +// scripts/system-prompt.txt) to append a fenced ```gadfly-findings code block +// holding a JSON array of its findings. Parsing that exact block is far more +// reliable than scraping prose with a path:line regex (emit.go's heuristic), +// and it carries PER-FINDING severity + confidence the prose verdict can't. +// +// Everything here degrades gracefully: a missing or malformed block makes +// extractStructuredFindings return ok=false, and the caller falls back to the +// heuristic scrape — so a weak model that ignores the contract still contributes +// findings, exactly as before. + +import ( + "encoding/json" + "strconv" + "strings" +) + +// structuredFinding mirrors one element of the ```gadfly-findings JSON array. +// Line is json.Number so we tolerate both 123 and "123" from less-precise models. +type structuredFinding struct { + File string `json:"file"` + Line json.Number `json:"line"` + Severity string `json:"severity"` + Confidence string `json:"confidence"` + Title string `json:"title"` + Detail string `json:"detail"` // optional; prose paragraph is used when absent +} + +// findingsFence is the info-string that tags the machine-readable block. +const findingsFence = "gadfly-findings" + +// extractStructuredFindings parses the ```gadfly-findings JSON block out of a +// lens's markdown. It returns the findings and ok=true when a block is present +// AND parses as a JSON array (an empty array is a valid "nothing found" — ok is +// still true so the caller does NOT fall back to the regex scrape). A missing or +// unparseable block returns ok=false, signalling the caller to use the heuristic. +// +// detail for each finding is filled from the JSON when present, else from the +// prose paragraph that references the same file:line (best of both: exact +// location + rich human context). Findings with no usable file are dropped. +func extractStructuredFindings(out string) ([]finding, bool) { + body, ok := findingsBlock(out) + if !ok { + return nil, false + } + var raw []structuredFinding + if err := json.Unmarshal([]byte(body), &raw); err != nil { + return nil, false + } + + prose := proseParagraphs(out) + findings := make([]finding, 0, len(raw)) + for _, sf := range raw { + file := strings.TrimSpace(sf.File) + if file == "" { + continue + } + ln := 0 + if n, err := strconv.Atoi(strings.TrimSpace(sf.Line.String())); err == nil { + ln = n + } + detail := strings.TrimSpace(sf.Detail) + if detail == "" { + detail = prose[file+":"+strconv.Itoa(ln)] + } + title := strings.TrimSpace(sf.Title) + if title == "" { + title = truncate(detail, 120) + } + findings = append(findings, finding{ + file: file, + line: ln, + title: title, + detail: truncate(detail, 500), + severity: normalizeSeverity(sf.Severity), + confidence: strings.ToLower(strings.TrimSpace(sf.Confidence)), + }) + if len(findings) >= maxFindingsPerLens { + break + } + } + return findings, true +} + +// findingsBlock returns the inner JSON text of the first ```gadfly-findings fenced +// block in out (ok=false if there is none). It scans line-by-line so it tolerates +// 3+ backticks and trailing whitespace on the fence lines. +func findingsBlock(out string) (string, bool) { + lines := strings.Split(out, "\n") + start := -1 + for i, ln := range lines { + t := strings.TrimSpace(ln) + if strings.HasPrefix(t, "```") && strings.Contains(t, findingsFence) { + start = i + break + } + } + if start < 0 { + return "", false + } + for j := start + 1; j < len(lines); j++ { + if isFenceClose(lines[j]) { + return strings.Join(lines[start+1:j], "\n"), true + } + } + return "", false // unterminated block +} + +// stripFindingsBlock removes every ```gadfly-findings fenced block from out so the +// machine-readable JSON never shows in the rendered comment. Other content is +// preserved verbatim. +func stripFindingsBlock(out string) string { + lines := strings.Split(out, "\n") + var kept []string + skipping := false + for _, ln := range lines { + t := strings.TrimSpace(ln) + switch { + case skipping: + if isFenceClose(ln) { + skipping = false + } + case strings.HasPrefix(t, "```") && strings.Contains(t, findingsFence): + skipping = true + default: + kept = append(kept, ln) + } + } + return strings.TrimRight(strings.Join(kept, "\n"), "\n") +} + +// isFenceClose reports whether a line is a bare code-fence close (``` plus only +// whitespace), i.e. not an opening fence with an info string. +func isFenceClose(line string) bool { + t := strings.TrimSpace(line) + return strings.HasPrefix(t, "```") && strings.TrimLeft(t, "`") == "" +} + +// proseParagraphs maps "file:line" -> the prose paragraph that first references +// it, so a structured finding without its own detail can borrow the human +// context the model already wrote. Built from the markdown OUTSIDE the findings +// block (the block is JSON, not prose). +func proseParagraphs(out string) map[string]string { + prose := stripFindingsBlock(out) + lines := strings.Split(prose, "\n") + m := map[string]string{} + for _, loc := range pathLineRe.FindAllStringSubmatchIndex(prose, -1) { + key := prose[loc[2]:loc[3]] + ":" + prose[loc[4]:loc[5]] + if _, seen := m[key]; seen { + continue + } + li := strings.Count(prose[:loc[0]], "\n") + m[key] = paragraphAt(lines, li) + } + return m +} + +// normalizeSeverity maps a model's severity word onto the canonical set +// (critical/high/medium/small/trivial), accepting common synonyms. An +// unrecognized value is returned lowercased so the store still sees the raw word. +func normalizeSeverity(s string) string { + switch strings.ToLower(strings.TrimSpace(s)) { + case "critical", "crit", "blocker", "blocking": + return "critical" + case "high", "major": + return "high" + case "medium", "moderate", "minor": + return "medium" + case "small", "low", "minor-issue": + return "small" + case "trivial", "nit", "nitpick", "info", "informational", "style": + return "trivial" + default: + return strings.ToLower(strings.TrimSpace(s)) + } +} diff --git a/cmd/gadfly/findings_test.go b/cmd/gadfly/findings_test.go new file mode 100644 index 0000000..ac03068 --- /dev/null +++ b/cmd/gadfly/findings_test.go @@ -0,0 +1,111 @@ +package main + +import "testing" + +const sampleReview = "**Blocking issues found**\n\n" + + "- **Unauthenticated endpoint** — `model.go:184` leaks PR content to a third party.\n" + + "- A nit about naming in `util.go:12`.\n\n" + + "```gadfly-findings\n" + + "[\n" + + " {\"file\": \"model.go\", \"line\": 184, \"severity\": \"high\", \"confidence\": \"high\", \"title\": \"Unauthenticated endpoint\"},\n" + + " {\"file\": \"util.go\", \"line\": 12, \"severity\": \"nit\", \"confidence\": \"medium\", \"title\": \"naming\"}\n" + + "]\n" + + "```\n" + +func TestExtractStructuredFindings(t *testing.T) { + fs, ok := extractStructuredFindings(sampleReview) + if !ok { + t.Fatal("expected ok=true for a well-formed block") + } + if len(fs) != 2 { + t.Fatalf("want 2 findings, got %d", len(fs)) + } + if fs[0].file != "model.go" || fs[0].line != 184 || fs[0].severity != "high" || fs[0].confidence != "high" { + t.Errorf("finding[0] mismatch: %+v", fs[0]) + } + // "nit" must normalize to the canonical "trivial". + if fs[1].severity != "trivial" { + t.Errorf("want severity normalized to trivial, got %q", fs[1].severity) + } + // Detail is borrowed from the prose paragraph referencing the same file:line. + if fs[0].detail == "" { + t.Error("expected detail borrowed from prose, got empty") + } +} + +func TestExtractStructuredFindingsEmptyArray(t *testing.T) { + out := "No material issues found\n\n```gadfly-findings\n[]\n```\n" + fs, ok := extractStructuredFindings(out) + if !ok { + t.Fatal("an empty array is a valid block; want ok=true") + } + if len(fs) != 0 { + t.Fatalf("want 0 findings, got %d", len(fs)) + } +} + +func TestExtractStructuredFindingsFallback(t *testing.T) { + // No block at all -> ok=false so the caller uses the heuristic scrape. + if _, ok := extractStructuredFindings("Minor issues\n\n- something at `x.go:1`\n"); ok { + t.Error("want ok=false when there is no block") + } + // Malformed JSON -> ok=false (graceful fallback). + bad := "Minor issues\n\n```gadfly-findings\n{not json}\n```\n" + if _, ok := extractStructuredFindings(bad); ok { + t.Error("want ok=false for malformed JSON") + } +} + +func TestExtractStructuredFindingsStringLine(t *testing.T) { + // Tolerate a quoted line number from a less-precise model. + out := "Minor issues\n\n```gadfly-findings\n[{\"file\":\"a.go\",\"line\":\"42\",\"severity\":\"medium\",\"title\":\"x\"}]\n```\n" + fs, ok := extractStructuredFindings(out) + if !ok || len(fs) != 1 || fs[0].line != 42 { + t.Fatalf("want one finding at line 42, got ok=%v %+v", ok, fs) + } +} + +func TestStripFindingsBlock(t *testing.T) { + stripped := stripFindingsBlock(sampleReview) + if got := stripped; containsFence(got) { + t.Errorf("block not stripped: %q", got) + } + // The prose findings must survive. + if !contains(stripped, "Unauthenticated endpoint") || !contains(stripped, "util.go:12") { + t.Errorf("prose lost during strip: %q", stripped) + } +} + +func TestStripFindingsBlockNoBlock(t *testing.T) { + in := "Minor issues\n\n- finding at `x.go:9`" + if out := stripFindingsBlock(in); out != in { + t.Errorf("strip changed block-free text: %q != %q", out, in) + } +} + +func TestNormalizeSeverity(t *testing.T) { + cases := map[string]string{ + "Critical": "critical", "blocker": "critical", + "major": "high", "HIGH": "high", + "moderate": "medium", "minor": "medium", + "low": "small", + "nit": "trivial", "Style": "trivial", + "weird": "weird", // unknown passes through, lowercased + } + for in, want := range cases { + if got := normalizeSeverity(in); got != want { + t.Errorf("normalizeSeverity(%q) = %q, want %q", in, got, want) + } + } +} + +func containsFence(s string) bool { return contains(s, findingsFence) } + +func contains(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/cmd/gadfly/recheck.go b/cmd/gadfly/recheck.go index 0255744..d23502c 100644 --- a/cmd/gadfly/recheck.go +++ b/cmd/gadfly/recheck.go @@ -49,6 +49,11 @@ Output rules: - Do NOT invent new findings; this is a verification gate, not a fresh review. - Do NOT include meta-commentary about the verification process or which findings you dropped — output only the final, corrected review markdown. +- The draft ends with a fenced ` + "`gadfly-findings`" + ` JSON block. Regenerate it + so it lists ONLY the findings that SURVIVED your verification, in the same schema + ({"file","line","severity","confidence","title"}; severity one of + critical/high/medium/small/trivial, confidence one of high/medium/low). If every + finding was dropped, emit an empty array ` + "`[]`" + `. Keep the block last. - When done investigating, STOP calling tools and reply with the review.` // recheckEnabled reports whether the verification pass should run. On unless diff --git a/scripts/system-prompt.txt b/scripts/system-prompt.txt index 28960c8..ff4ebe8 100644 --- a/scripts/system-prompt.txt +++ b/scripts/system-prompt.txt @@ -43,3 +43,21 @@ Output rules: - Only report issues you are reasonably confident are real after checking. If the diff is clean, say so plainly rather than inventing nits. - When you are done investigating, STOP calling tools and reply with the final review. + +Machine-readable findings — AFTER the prose review, append ONE fenced code block, +tagged `gadfly-findings`, holding a JSON array of the SAME findings you described above +(this block is consumed by tooling and hidden from the rendered comment): + +```gadfly-findings +[ + {"file": "path/to/file.go", "line": 123, "severity": "high", "confidence": "high", "title": "one-line summary of the issue"} +] +``` + +- One object per real finding, in the same order as your prose. `file`/`line` must be a + concrete location you verified (the line the issue is at). `severity` is one of + `critical`, `high`, `medium`, `small`, `trivial`. `confidence` is your post-verification + confidence the issue is real: one of `high`, `medium`, `low`. +- Include ONLY genuine problems — never verification notes ("confirmed X is safe at f:line"), + and never an "Outside my lens:" aside. If your lens is clean, emit an empty array `[]`. +- This block is in ADDITION to the prose; do not drop the human-readable findings.