diff --git a/.gitea/workflows/review-reusable.yml b/.gitea/workflows/review-reusable.yml index 0a18ff7..8343fba 100644 --- a/.gitea/workflows/review-reusable.yml +++ b/.gitea/workflows/review-reusable.yml @@ -61,6 +61,7 @@ on: allowed_users: { type: string, default: "" } # GADFLY_ALLOWED_USERS (consumer-specific; set in your stub) trigger_phrase: { type: string, default: "" } # GADFLY_TRIGGER_PHRASE consolidate: { type: string, default: "" } # GADFLY_CONSOLIDATE — "" => auto (one consensus comment for >=2 models); "0" => one comment per model + inline_review: { type: string, default: "" } # GADFLY_INLINE_REVIEW — "" => on (post a COMMENT-state PR review with inline comments on changed lines); "0" => off # Job wall-clock cap. 90 as a default: the 5-lens suite across a slow lane # (claude-code with extended thinking) over two passes can run long. timeout_minutes: { type: number, default: 90 } @@ -145,3 +146,4 @@ jobs: GADFLY_ALLOWED_USERS: ${{ inputs.allowed_users }} GADFLY_TRIGGER_PHRASE: ${{ inputs.trigger_phrase }} GADFLY_CONSOLIDATE: ${{ inputs.consolidate }} + GADFLY_INLINE_REVIEW: ${{ inputs.inline_review }} diff --git a/README.md b/README.md index 59ba535..dedc675 100644 --- a/README.md +++ b/README.md @@ -306,6 +306,14 @@ back to posting the per-model comments. Controlled by `GADFLY_CONSOLIDATE`: `aut for ≥2 models), `1` (force on), `0` (force off, one comment per model). Single-model runs are unaffected. +**Inline PR review.** Alongside the consensus comment, Gadfly also posts a single Gitea **pull +review** (state `COMMENT` — advisory, **never** request-changes or approve, so it can't block a +merge) whose inline comments anchor each consensus finding to the exact changed line it's about. +Only findings that land on a line in the diff are anchored (Gitea rejects comments off the diff); +the rest stay in the consensus comment. A re-run replaces the previous review instead of stacking. +It's the "reviewer integrated with Gitea" without the blocking — turn it off with +`GADFLY_INLINE_REVIEW=0`. + ### Triggers 1. A **new/reopened/ready** non-draft PR — automatic. @@ -392,6 +400,7 @@ The reviewer binary reads these (the stub/entrypoint set sane defaults): | `GADFLY_STATUS_BOARD` | on | set `0` to disable the live status-board comment | | `GADFLY_STATUS_POLL_SECS` | 12 | how often the status board re-renders/upserts | | `GADFLY_CONSOLIDATE` | `auto` | cross-model consensus comment: `auto` (on for ≥2 models), `1` (force on), `0` (off — one comment per model) | +| `GADFLY_INLINE_REVIEW` | on | when consolidating, also post a `COMMENT`-state PR review with inline comments on changed lines; `0` disables | | `GADFLY_TRIGGER_PHRASE` | `@gadfly review` | comment phrase that re-triggers | | `GADFLY_ALLOWED_USERS` | *(collaborators)* | comma-separated allow-list for comment triggers | | `GADFLY_FINDINGS_URL` | — | gadfly-reports store base URL; set to enable findings telemetry (off when empty) | diff --git a/cmd/gadfly/consensus.go b/cmd/gadfly/consensus.go index be04092..cb0bca7 100644 --- a/cmd/gadfly/consensus.go +++ b/cmd/gadfly/consensus.go @@ -170,10 +170,18 @@ func runConsolidate() error { if len(models) == 0 { return errors.New("no model findings to consolidate") } + // Cluster once, then render the consensus comment and (best-effort) the inline + // PR review from the same clusters so the two views can't drift. + clusters := clusterFindings(models) + // Lead with the marker so entrypoint.sh can upsert this comment in place // (same pattern as run.sh's per-model marker); it appends the advisory footer. fmt.Println(consensusMarker) - fmt.Println(renderConsensus(models)) + fmt.Println(renderConsensus(models, clusters)) + + // Inline PR review (COMMENT state) anchoring findings to changed lines. + // Best-effort: no-op without diff/API creds, never affects stdout/exit. + postInlineReview(clusters) return nil } @@ -187,6 +195,7 @@ type cluster struct { maxLine int // largest line in the cluster — the span's upper edge severity string title string + detail string // highest-severity report's detail; rendered in the inline review comment models map[string]bool lenses map[string]bool } @@ -206,7 +215,7 @@ const lineTolerance = 3 // renderConsensus builds the single consolidated comment body from every model's // findings. It does NOT emit the marker or advisory footer — entrypoint.sh wraps // it (mirroring run.sh's per-model framing). -func renderConsensus(models []modelFindings) string { +func renderConsensus(models []modelFindings, clusters []cluster) string { // effective = models that actually produced a review. Errored models are // shown (folded, below) but excluded from the agreement denominator so a // failed model doesn't dilute every ratio. @@ -217,7 +226,6 @@ func renderConsensus(models []modelFindings) string { } } errored := len(models) - effective - clusters := clusterFindings(models) worst := verdictClean for _, m := range models { @@ -333,6 +341,7 @@ func clusterFindings(models []modelFindings) []cluster { maxLine: it.f.Line, severity: it.f.Severity, title: it.f.Title, + detail: it.f.Detail, models: map[string]bool{}, lenses: map[string]bool{}, } @@ -378,6 +387,9 @@ func mergeIntoCluster(c *cluster, f outFinding, model string) { if strings.TrimSpace(f.Title) != "" { c.title = f.Title } + if strings.TrimSpace(f.Detail) != "" { + c.detail = f.Detail + } } } diff --git a/cmd/gadfly/consensus_test.go b/cmd/gadfly/consensus_test.go index 595d54b..81b5a4a 100644 --- a/cmd/gadfly/consensus_test.go +++ b/cmd/gadfly/consensus_test.go @@ -52,7 +52,7 @@ func TestRenderConsensusFoldsSingleModelNits(t *testing.T) { {Lens: "security", File: "a.go", Line: 10, Severity: "high", Title: "auth bypass"}, }}, } - out := renderConsensus(models) + out := renderConsensus(models, clusterFindings(models)) // Headline table: the agreed finding with a 2/2 badge. if !strings.Contains(out, "2/2") { t.Errorf("expected a 2/2 agreement badge in headline:\n%s", out) @@ -78,7 +78,7 @@ func TestRenderConsensusHighSeverityLoneFindingStaysHeadline(t *testing.T) { {Lens: "security", File: "a.go", Line: 1, Severity: "critical", Title: "rce"}, }}, } - out := renderConsensus(models) + out := renderConsensus(models, clusterFindings(models)) headline := out if i := strings.Index(out, "single-model finding"); i >= 0 { headline = out[:i] @@ -113,7 +113,7 @@ func TestRenderConsensusExcludesErroredFromDenominator(t *testing.T) { {Lens: "security", File: "a.go", Line: 9, Severity: "medium", Title: "leak"}}}, {Model: "broken", Verdict: "reviewer failed", Errored: true, Markdown: "boom"}, } - out := renderConsensus(models) + out := renderConsensus(models, clusterFindings(models)) // Denominator is the 2 effective models, not 3; the failure is noted. if !strings.Contains(out, "2/2") { t.Errorf("errored model must be excluded from the denominator (want 2/2):\n%s", out) @@ -133,7 +133,7 @@ func TestRenderConsensusLoneHighFolds(t *testing.T) { {Model: "solo", Verdict: "Blocking issues found", Markdown: "x", Findings: []outFinding{ {Lens: "security", File: "a.go", Line: 1, Severity: "high", Title: "maybe-bug"}}}, } - out := renderConsensus(models) + out := renderConsensus(models, clusterFindings(models)) head := out if i := strings.Index(out, "single-model finding"); i >= 0 { head = out[:i] diff --git a/cmd/gadfly/review.go b/cmd/gadfly/review.go new file mode 100644 index 0000000..588ecfa --- /dev/null +++ b/cmd/gadfly/review.go @@ -0,0 +1,252 @@ +package main + +// Inline PR review. After the consensus comment is rendered, Gadfly also posts a +// single Gitea pull review (state COMMENT — advisory, NEVER request-changes or +// approve) whose inline comments anchor consensus findings to the exact changed +// lines. The issue comment stays the ranked overview; the review puts each +// finding next to the code it's about — the "reviewer integrated with Gitea" the +// project wanted, without ever blocking a merge. +// +// All of this is best-effort: disabled by GADFLY_INLINE_REVIEW=0 or when the diff +// / API creds aren't available, only anchors findings that land on a line present +// in the diff (Gitea rejects comments off the diff), and any error is logged to +// stderr without touching the consensus comment (already on stdout) or the exit +// code. + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "regexp" + "strconv" + "strings" + "time" +) + +const ( + // inlineReviewMarker tags our review body so a re-run can delete the previous + // one instead of stacking duplicate inline comments. + inlineReviewMarker = "" + // maxInlineComments caps how many inline comments one review carries, so a + // huge diff can't produce a wall of annotations. Clusters are pre-sorted by + // agreement×severity, so the cap keeps the most important. + maxInlineComments = 25 + inlineReviewHTTPTime = 20 * time.Second +) + +// reviewComment is one inline comment in a Gitea pull review. Field names match +// the Gitea API EXACTLY (new_position = line in the new/head file). +type reviewComment struct { + Path string `json:"path"` + Body string `json:"body"` + NewPosition int `json:"new_position"` +} + +// createReview is the POST /pulls/{n}/reviews body. event is ALWAYS "COMMENT". +type createReview struct { + Body string `json:"body"` + Event string `json:"event"` + Comments []reviewComment `json:"comments"` +} + +// postInlineReview posts one COMMENT-state pull review with inline comments for +// consensus findings on changed lines. No-op + best-effort (see file comment). +func postInlineReview(clusters []cluster) { + if strings.EqualFold(strings.TrimSpace(os.Getenv("GADFLY_INLINE_REVIEW")), "0") { + return + } + api := strings.TrimRight(strings.TrimSpace(os.Getenv("GITEA_API")), "/") + token := strings.TrimSpace(os.Getenv("GITEA_TOKEN")) + pr := strings.TrimSpace(os.Getenv("GADFLY_PR")) + diffPath := strings.TrimSpace(os.Getenv("GADFLY_DIFF_FILE")) + if api == "" || token == "" || pr == "" || diffPath == "" { + return + } + diff, err := os.ReadFile(diffPath) + if err != nil { + fmt.Fprintln(os.Stderr, "gadfly: inline review: read diff:", err) + return + } + + comments := inlineComments(clusters, parseDiffNewLines(string(diff))) + if len(comments) == 0 { + return // nothing anchors to a changed line; the consensus comment covers it + } + + client := &http.Client{Timeout: inlineReviewHTTPTime} + base := fmt.Sprintf("%s/pulls/%s/reviews", api, pr) + deletePriorReviews(client, base, token) // avoid stacking on re-runs + + body := fmt.Sprintf("%s\n🪰 **Gadfly consensus review** — %d inline finding%s on changed lines. See the consensus comment for the full ranked summary.\n\nAdvisory only — does not block merge.", + inlineReviewMarker, len(comments), plural(len(comments))) + if err := giteaSend(client, http.MethodPost, base, token, createReview{Body: body, Event: "COMMENT", Comments: comments}); err != nil { + fmt.Fprintln(os.Stderr, "gadfly: inline review post:", err) + } +} + +// inlineComments builds inline comments for the clusters that anchor to a line +// present in the diff, in priority order (clusters are pre-sorted), capped. +func inlineComments(clusters []cluster, addable map[string]map[int]bool) []reviewComment { + var out []reviewComment + for _, c := range clusters { + lines := addable[c.file] + if lines == nil || c.line <= 0 || !lines[c.line] { + continue + } + out = append(out, reviewComment{Path: c.file, NewPosition: c.line, Body: inlineBody(c)}) + if len(out) >= maxInlineComments { + break + } + } + return out +} + +// inlineBody renders one inline comment: severity + title, who flagged it, detail. +func inlineBody(c cluster) string { + var b strings.Builder + fmt.Fprintf(&b, "%s **%s**", sevIcon(c.severity), strings.TrimSpace(c.title)) + fmt.Fprintf(&b, "\n\n_%s · flagged by %d model%s_", lensList(c.lenses), len(c.models), plural(len(c.models))) + if d := strings.TrimSpace(c.detail); d != "" { + fmt.Fprintf(&b, "\n\n%s", d) + } + b.WriteString("\n\n🪰 Gadfly · advisory") + return b.String() +} + +// parseDiffNewLines returns, per file, the set of NEW-file line numbers that are +// present in the unified diff (added '+' or context ' ' lines) — the lines an +// inline comment can anchor to. Hunk lengths from the @@ header bound each hunk, +// so a content line that happens to start with "+++ " or "@@" is still read as +// content, not mistaken for a header. +func parseDiffNewLines(diff string) map[string]map[int]bool { + out := map[string]map[int]bool{} + var file string + var newLine, oldRem, newRem int + inHunk := false + for _, line := range strings.Split(diff, "\n") { + if inHunk && (newRem > 0 || oldRem > 0) { + switch { + case strings.HasPrefix(line, "+"): + record(out, file, newLine) + newLine++ + newRem-- + case strings.HasPrefix(line, "-"): + oldRem-- + case strings.HasPrefix(line, "\\"): // "\ No newline at end of file" + default: // context line (leading space, or an empty line) + record(out, file, newLine) + newLine++ + newRem-- + oldRem-- + } + if newRem <= 0 && oldRem <= 0 { + inHunk = false + } + continue + } + switch { + case strings.HasPrefix(line, "+++ "): + p := strings.TrimPrefix(strings.TrimPrefix(line, "+++ "), "b/") + file = strings.TrimSpace(p) + if file == "/dev/null" { + file = "" + } + case strings.HasPrefix(line, "@@"): + if m := hunkRe.FindStringSubmatch(line); m != nil && file != "" { + newLine, _ = strconv.Atoi(m[3]) + oldRem = atoiOr(m[2], 1) + newRem = atoiOr(m[4], 1) + inHunk = newRem > 0 || oldRem > 0 + } + } + } + return out +} + +// hunkRe captures a unified-diff hunk header's old/new start+length: +// @@ -[,] +[,] @@ +var hunkRe = regexp.MustCompile(`^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@`) + +func record(out map[string]map[int]bool, file string, line int) { + if file == "" { + return + } + if out[file] == nil { + out[file] = map[int]bool{} + } + out[file][line] = true +} + +// atoiOr parses s, returning def when s is empty or unparseable. Used for the +// optional hunk-length fields (absent => length 1). +func atoiOr(s string, def int) int { + if s == "" { + return def + } + if n, err := strconv.Atoi(s); err == nil { + return n + } + return def +} + +// deletePriorReviews removes our previous inline reviews (matched by the body +// marker) so a re-run replaces rather than stacks. Best-effort and quiet. +func deletePriorReviews(client *http.Client, base, token string) { + req, err := http.NewRequest(http.MethodGet, base+"?limit=50", nil) + if err != nil { + return + } + req.Header.Set("Authorization", "token "+token) + resp, err := client.Do(req) + if err != nil { + return + } + var reviews []struct { + ID int `json:"id"` + Body string `json:"body"` + } + _ = json.NewDecoder(resp.Body).Decode(&reviews) + resp.Body.Close() + for _, r := range reviews { + if !strings.Contains(r.Body, inlineReviewMarker) { + continue + } + dreq, err := http.NewRequest(http.MethodDelete, fmt.Sprintf("%s/%d", base, r.ID), nil) + if err != nil { + continue + } + dreq.Header.Set("Authorization", "token "+token) + if dresp, err := client.Do(dreq); err == nil { + io.Copy(io.Discard, dresp.Body) + dresp.Body.Close() + } + } +} + +// giteaSend marshals payload and sends it with Gitea's "token" auth scheme, +// treating a non-2xx response as an error (with a snippet of the body). +func giteaSend(client *http.Client, method, url, token string, payload any) error { + body, err := json.Marshal(payload) + if err != nil { + return err + } + req, err := http.NewRequest(method, url, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "token "+token) + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + snippet, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("%s %s: status %d: %s", method, url, resp.StatusCode, strings.TrimSpace(string(snippet))) + } + return nil +} diff --git a/cmd/gadfly/review_test.go b/cmd/gadfly/review_test.go new file mode 100644 index 0000000..3f51b44 --- /dev/null +++ b/cmd/gadfly/review_test.go @@ -0,0 +1,88 @@ +package main + +import "testing" + +func TestParseDiffNewLines(t *testing.T) { + diff := "diff --git a/a.go b/a.go\n" + + "index 111..222 100644\n" + + "--- a/a.go\n" + + "+++ b/a.go\n" + + "@@ -1,3 +1,4 @@\n" + + " line1\n" + + "-old2\n" + + "+new2\n" + + "+new3\n" + + " line4\n" + got := parseDiffNewLines(diff) + want := map[int]bool{1: true, 2: true, 3: true, 4: true} // context+added new-file lines + if len(got["a.go"]) != len(want) { + t.Fatalf("a.go lines = %v, want %v", got["a.go"], want) + } + for ln := range want { + if !got["a.go"][ln] { + t.Errorf("expected a.go line %d anchorable", ln) + } + } + // The deleted line's old number must NOT be an anchor target beyond the new set. + if got["a.go"][5] { + t.Error("line 5 should not be anchorable") + } +} + +func TestParseDiffNewLinesContentLooksLikeHeader(t *testing.T) { + // An added line whose CONTENT is "++ weird" appears as "+++ weird" in the + // diff. Hunk-length tracking must read it as content, not a file header. + diff := "--- a/x\n+++ b/x\n@@ -1,1 +1,2 @@\n ctx\n+++ weird\n" + got := parseDiffNewLines(diff) + if !got["x"][1] || !got["x"][2] { + t.Errorf("want lines 1 (ctx) and 2 (added) anchorable, got %v", got["x"]) + } +} + +func TestParseDiffNewLinesMultiFile(t *testing.T) { + diff := "diff --git a/one.go b/one.go\n--- a/one.go\n+++ b/one.go\n@@ -0,0 +1,2 @@\n+a\n+b\n" + + "diff --git a/two.go b/two.go\n--- a/two.go\n+++ b/two.go\n@@ -5,0 +6,1 @@\n+c\n" + got := parseDiffNewLines(diff) + if !got["one.go"][1] || !got["one.go"][2] { + t.Errorf("one.go: want 1,2 got %v", got["one.go"]) + } + if !got["two.go"][6] { + t.Errorf("two.go: want 6 got %v", got["two.go"]) + } +} + +func TestInlineCommentsFiltersToDiffLines(t *testing.T) { + addable := map[string]map[int]bool{"a.go": {10: true, 11: true}} + clusters := []cluster{ + {file: "a.go", line: 10, severity: "high", title: "anchored", models: set("m1", "m2"), lenses: set("security"), detail: "d"}, + {file: "a.go", line: 99, severity: "high", title: "off-diff line", models: set("m1"), lenses: set("security")}, + {file: "b.go", line: 1, severity: "high", title: "off-diff file", models: set("m1"), lenses: set("security")}, + } + cs := inlineComments(clusters, addable) + if len(cs) != 1 { + t.Fatalf("want 1 anchorable inline comment, got %d", len(cs)) + } + if cs[0].Path != "a.go" || cs[0].NewPosition != 10 { + t.Errorf("wrong anchor: %+v", cs[0]) + } + if !containsStr(cs[0].Body, "anchored") || !containsStr(cs[0].Body, "2 model") { + t.Errorf("inline body missing title/agreement: %q", cs[0].Body) + } +} + +func set(xs ...string) map[string]bool { + m := map[string]bool{} + for _, x := range xs { + m[x] = true + } + return m +} + +func containsStr(s, sub string) bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/entrypoint.sh b/entrypoint.sh index 46d4e76..034f38f 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -47,6 +47,8 @@ # GADFLY_FINDINGS_TOKEN optional bearer token for the gadfly-reports store # GADFLY_CONSOLIDATE cross-model consensus comment: "auto" (default; on for >=2 # models), "1" force on, "0" force off (one comment per model) +# GADFLY_INLINE_REVIEW when consolidating, also post a COMMENT-state PR review with +# inline comments on changed lines (default on; "0" disables) set -uo pipefail # One model by default: the specialist suite already provides breadth, so a @@ -320,7 +322,14 @@ fi if [ "$CONSOLIDATE" = "1" ]; then n_files="$(ls -1 "${FINDINGS_DIR}"/*.json 2>/dev/null | wc -l | tr -d '[:space:]')" log "consolidating findings from ${n_files} model(s)" - CONSENSUS="$(GADFLY_CONSOLIDATE_DIR="$FINDINGS_DIR" /usr/local/bin/gadfly 2>"${WORKDIR}/consolidate.err" || true)" + # Fetch the PR diff so the consolidator can also post an inline PR review, + # anchoring findings to changed lines (GADFLY_DIFF_FILE). GITEA_API/GITEA_TOKEN/ + # GADFLY_PR are already in the binary's environment. Best-effort: an empty diff + # just means no inline review. + DIFF_FILE="${WORKDIR}/pr.diff" + API "${GITEA_API}/pulls/${PR}.diff" > "$DIFF_FILE" 2>/dev/null || true + CONSENSUS="$(GADFLY_CONSOLIDATE_DIR="$FINDINGS_DIR" GADFLY_DIFF_FILE="$DIFF_FILE" \ + /usr/local/bin/gadfly 2>"${WORKDIR}/consolidate.err" || true)" if [ -n "$CONSENSUS" ]; then BODY="$(printf '%s\n\nAutomated adversarial review by Gadfly — consensus across the model swarm. Advisory only — does not block merge.' "$CONSENSUS")" upsert_comment_body "" "$BODY"