gadfly/cmd/gadfly/emit.go

package main

// Optional findings telemetry. After a review, Gadfly can POST the run and the
// findings it surfaced to a "gadfly-reports" store (gitea.stevedudenhoeffer.com-style
// HTTP API) so model quality can be tracked over time and graded later.
//
// This is strictly ADVISORY plumbing and must never affect the review itself:
//   - It is OFF unless GADFLY_FINDINGS_URL is set.
//   - It writes ONLY to stderr on any error; it never touches stdout (stdout is
//     the review markdown consumed by run.sh) and never changes the exit code.
//   - It depends only on the Go stdlib (net/http).
//
// Findings are extracted heuristically from each lens's markdown: a "path:line"
// reference (e.g. run/executor.go:166) anchors a finding, whose title is the
// nearest preceding markdown heading / numbered item / bold lead-in (else the
// first sentence of the finding's paragraph). This is best-effort signal for the
// store to aggregate — not a structured contract the reviewer guarantees.

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"os"
	"regexp"
	"strconv"
	"strings"
	"time"
)

const (
	// findingsHTTPTimeout bounds each telemetry POST. Short on purpose — a slow
	// or dead store must never delay the review's stdout (already printed) or the
	// process exit.
	findingsHTTPTimeout = 10 * time.Second
	// maxFindingsPerLens caps how many findings a single lens contributes, so a
	// pathological lens output can't blow up the /reports payload.
	maxFindingsPerLens = 20
)

// pathLineRe matches a source-path-with-extension followed by ":<line>", e.g.
// run/executor.go:166 or compact/compactor.go:225. The path may contain
// directories, dots, dashes and underscores; backtracking lets the final ".ext"
// be split off the greedy path run. Surrounding backticks/parens are ignored
// because they are not in the character class.
var pathLineRe = regexp.MustCompile(`([A-Za-z0-9_./-]+\.[A-Za-z0-9]+):(\d+)`)

// numberedRe matches a numbered list/heading lead-in: "1. Title" or "1) Title".
var numberedRe = regexp.MustCompile(`^\d+[.)]\s+(.+)$`)

// finding is one extracted issue: where it points and a derived human title.
type finding struct {
	file   string
	line   int
	title  string
	detail string
}

// runPayload is the POST /runs body. Field names match the gadfly-reports API EXACTLY.
// Token/cost fields are pointers so they marshal to JSON null when unknown
// (Gadfly does not currently meter tokens or cost).
type runPayload struct {
	RunID        string   `json:"run_id"`
	Repo         string   `json:"repo"`
	PR           int      `json:"pr"`
	Model        string   `json:"model"`
	Provider     string   `json:"provider"`
	Lenses       int      `json:"lenses"`
	DurationSecs float64  `json:"duration_secs"`
	InputTokens  *int     `json:"input_tokens"`
	OutputTokens *int     `json:"output_tokens"`
	CostUSD      *float64 `json:"cost_usd"`
}

// reportPayload is one element of the POST /reports JSON array. Field names match
// the gadfly-reports API EXACTLY.
type reportPayload struct {
	Repo        string `json:"repo"`
	PR          int    `json:"pr"`
	Lens        string `json:"lens"`
	File        string `json:"file"`
	Line        int    `json:"line"`
	Title       string `json:"title"`
	Model       string `json:"model"`
	Provider    string `json:"provider"`
	RunID       string `json:"run_id"`
	RawSeverity string `json:"raw_severity"`
	Detail      string `json:"detail"`
}

// emit posts the run and its findings to the gadfly-reports store. It is a no-op unless
// GADFLY_FINDINGS_URL is set. Best-effort: any error is logged to stderr and
// otherwise ignored. It NEVER returns an error, never calls os.Exit, and never
// writes to stdout.
func emit(results []specialistResult, elapsed time.Duration) {
	base := strings.TrimRight(strings.TrimSpace(os.Getenv("GADFLY_FINDINGS_URL")), "/")
	if base == "" {
		return // telemetry disabled
	}

	repo := strings.TrimSpace(os.Getenv("GADFLY_REPO"))
	pr := envAtoi("GADFLY_PR")
	model := strings.TrimSpace(os.Getenv("GADFLY_MODEL"))
	provider := modelProvider()
	runID := fmt.Sprintf("%s#%d:%s", repo, pr, model)

	run := runPayload{
		RunID:        runID,
		Repo:         repo,
		PR:           pr,
		Model:        model,
		Provider:     provider,
		Lenses:       len(results),
		DurationSecs: elapsed.Seconds(),
		// InputTokens/OutputTokens/CostUSD stay nil -> JSON null (not metered).
	}

	var reports []reportPayload
	for _, r := range results {
		if r.errored {
			continue // a failed lens contributes no findings
		}
		// A lens that reports "No material issues found" has nothing to flag —
		// its path:line references are verification notes ("verified X at
		// file:line is safe"), not problems. Extracting them pollutes the
		// findings store with false positives and unfairly penalizes thorough
		// reviewers that do clean passes, so a clean lens emits no findings.
		if r.verdict == verdictClean {
			continue
		}
		sev := r.verdict.label()
		for _, f := range parseFindings(r.spec, r.out) {
			reports = append(reports, reportPayload{
				Repo:        repo,
				PR:          pr,
				Lens:        r.spec.Name,
				File:        f.file,
				Line:        f.line,
				Title:       f.title,
				Model:       model,
				Provider:    provider,
				RunID:       runID,
				RawSeverity: sev,
				Detail:      f.detail,
			})
		}
	}

	client := &http.Client{Timeout: findingsHTTPTimeout}
	token := strings.TrimSpace(os.Getenv("GADFLY_FINDINGS_TOKEN"))

	if err := postJSON(client, base+"/runs", token, run); err != nil {
		fmt.Fprintln(os.Stderr, "gadfly: findings emit (/runs) failed:", err)
	}
	if len(reports) > 0 {
		if err := postJSON(client, base+"/reports", token, reports); err != nil {
			fmt.Fprintln(os.Stderr, "gadfly: findings emit (/reports) failed:", err)
		}
	}
}

// parseFindings extracts findings from one lens's markdown output. A lens whose
// result errored (its output is the inline failure notice) yields nothing.
// Findings are anchored on "path:line" references, deduped by (file,line) keeping
// the first occurrence, and capped at maxFindingsPerLens.
func parseFindings(sp Specialist, out string) []finding {
	_ = sp // signature kept symmetric with the caller; lens identity isn't needed here
	trimmed := strings.TrimSpace(out)
	if trimmed == "" || strings.HasPrefix(trimmed, "⚠️") {
		return nil // empty or a "reviewer failed" notice
	}

	lines := strings.Split(out, "\n")
	var findings []finding
	seen := map[string]bool{}

	for _, loc := range pathLineRe.FindAllStringSubmatchIndex(out, -1) {
		file := out[loc[2]:loc[3]]
		lineStr := out[loc[4]:loc[5]]
		ln, err := strconv.Atoi(lineStr)
		if err != nil {
			continue
		}
		key := file + ":" + lineStr
		if seen[key] {
			continue
		}
		seen[key] = true

		li := strings.Count(out[:loc[0]], "\n") // line index of the match
		findings = append(findings, finding{
			file:   file,
			line:   ln,
			title:  deriveTitle(lines, li),
			detail: paragraphAt(lines, li),
		})
		if len(findings) >= maxFindingsPerLens {
			break
		}
	}
	return findings
}

// deriveTitle finds the nearest preceding markdown heading / numbered item /
// bold lead-in at or above line li; failing that, the first sentence of the
// paragraph containing li.
func deriveTitle(lines []string, li int) string {
	for i := li; i >= 0 && i < len(lines); i-- {
		if t, ok := headingTitle(lines[i]); ok {
			return t
		}
	}
	return paragraphTitle(lines, li)
}

// headingTitle returns the title carried by a markdown heading-like line:
// an ATX "#"/"##"/"###" heading, a numbered "1." lead-in, or a "**bold**"
// lead-in (optionally behind a list bullet). ok is false for ordinary lines.
func headingTitle(line string) (string, bool) {
	t := strings.TrimSpace(line)
	if t == "" {
		return "", false
	}
	if strings.HasPrefix(t, "#") {
		h := strings.TrimSpace(strings.TrimLeft(t, "#"))
		if h != "" {
			return cleanHeading(h), true
		}
		return "", false
	}
	if m := numberedRe.FindStringSubmatch(t); m != nil {
		if rest := strings.TrimSpace(m[1]); rest != "" {
			return cleanHeading(rest), true
		}
		return "", false
	}
	// Bold lead-in, possibly behind a "- "/"* "/"+ " bullet. Strip only the
	// bullet marker (NOT the bold asterisks) before testing for "**".
	b := t
	for _, pfx := range []string{"- ", "* ", "+ "} {
		if strings.HasPrefix(b, pfx) {
			b = strings.TrimSpace(b[len(pfx):])
			break
		}
	}
	if strings.HasPrefix(b, "**") {
		if title := firstBold(b); title != "" {
			return title, true
		}
	}
	return "", false
}

// cleanHeading normalizes an extracted heading into a concise title: prefer a
// leading "**bold**" span, otherwise trim a trailing colon and truncate.
func cleanHeading(s string) string {
	s = strings.TrimSpace(s)
	if strings.HasPrefix(s, "**") {
		if b := firstBold(s); b != "" {
			return b
		}
	}
	s = strings.TrimRight(s, ": ")
	return truncate(s, 120)
}

// paragraphTitle falls back to the first sentence/line of the paragraph (the
// contiguous block of non-blank lines) containing line li, stripped of markdown
// list/emphasis markers.
func paragraphTitle(lines []string, li int) string {
	if li < 0 || li >= len(lines) {
		return ""
	}
	start := li
	for start > 0 && strings.TrimSpace(lines[start-1]) != "" {
		start--
	}
	first := leadingSentence(stripMarkers(lines[start]))
	return truncate(strings.TrimSpace(first), 120)
}

// leadingSentence returns text up to the first sentence terminator that is
// followed by a space (". " / "! " / "? "), so a dot inside a filename like
// "util.go" does not prematurely cut the sentence. With no internal terminator
// it returns the line minus any trailing terminal punctuation. (auto.go's
// firstSentence splits on any ".", which would mis-cut a "path:line" sentence.)
func leadingSentence(s string) string {
	best := -1
	for _, sep := range []string{". ", "! ", "? "} {
		if i := strings.Index(s, sep); i >= 0 && (best == -1 || i < best) {
			best = i
		}
	}
	if best >= 0 {
		return s[:best]
	}
	return strings.TrimRight(s, ".!?")
}

// paragraphAt returns the paragraph (contiguous non-blank lines) containing li,
// joined onto one line and truncated — used as the finding's detail context.
func paragraphAt(lines []string, li int) string {
	if li < 0 || li >= len(lines) {
		return ""
	}
	start, end := li, li
	for start > 0 && strings.TrimSpace(lines[start-1]) != "" {
		start--
	}
	for end < len(lines)-1 && strings.TrimSpace(lines[end+1]) != "" {
		end++
	}
	var parts []string
	for i := start; i <= end; i++ {
		if t := strings.TrimSpace(lines[i]); t != "" {
			parts = append(parts, t)
		}
	}
	return truncate(strings.Join(parts, " "), 500)
}

// stripMarkers removes a leading list bullet / numbered marker and surrounding
// emphasis/backticks from a line, leaving plain text.
func stripMarkers(s string) string {
	s = strings.TrimSpace(s)
	for _, pfx := range []string{"- ", "* ", "+ "} {
		if strings.HasPrefix(s, pfx) {
			s = strings.TrimSpace(s[len(pfx):])
			break
		}
	}
	if m := numberedRe.FindStringSubmatch(s); m != nil {
		s = strings.TrimSpace(m[1])
	}
	return strings.Trim(s, "*`# ")
}

// firstBold returns the text inside the first "**...**" span of s, or "".
func firstBold(s string) string {
	i := strings.Index(s, "**")
	if i < 0 {
		return ""
	}
	rest := s[i+2:]
	j := strings.Index(rest, "**")
	if j < 0 {
		return ""
	}
	return strings.TrimSpace(rest[:j])
}

// truncate shortens s to at most n runes, appending an ellipsis when cut.
func truncate(s string, n int) string {
	r := []rune(s)
	if len(r) <= n {
		return s
	}
	return strings.TrimSpace(string(r[:n])) + "…"
}

// postJSON marshals payload and POSTs it as application/json, attaching a Bearer
// token when non-empty. A non-2xx response is an error. The body is drained and
// closed so the connection can be reused.
func postJSON(client *http.Client, url, token string, payload any) error {
	body, err := json.Marshal(payload)
	if err != nil {
		return err
	}
	req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(body))
	if err != nil {
		return err
	}
	req.Header.Set("Content-Type", "application/json")
	if token != "" {
		req.Header.Set("Authorization", "Bearer "+token)
	}
	resp, err := client.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	_, _ = io.Copy(io.Discard, resp.Body)
	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
		return fmt.Errorf("%s: status %d", url, resp.StatusCode)
	}
	return nil
}

// envAtoi reads an integer env var, returning 0 when unset or unparseable.
func envAtoi(name string) int {
	n, _ := strconv.Atoi(strings.TrimSpace(os.Getenv(name)))
	return n
}