package main import ( "crypto/sha256" "database/sql" "encoding/hex" "fmt" "sort" "strings" "time" _ "modernc.org/sqlite" ) // gadfly-reports stores only RAW review facts: which model reported which finding, how // long each model's review took, and a human/Claude grade (is_real + severity + // usefulness). It deliberately does NOT compute points or rankings — the // dashboard owns the scoring curve (severity -> points, value-per-minute), so it // can be retuned without re-scoring or migrating stored data. The severity // vocabulary below is the only scoring-related contract. // validSeverities is the closed set a grade may assign to a REAL finding. The // client maps these to points however it likes (e.g. trivial=1 … critical=20). var validSeverities = map[string]bool{ "trivial": true, "small": true, "medium": true, "high": true, "critical": true, } const schema = ` CREATE TABLE IF NOT EXISTS runs ( run_id TEXT PRIMARY KEY, repo TEXT NOT NULL, pr INTEGER NOT NULL, model TEXT NOT NULL, provider TEXT NOT NULL, lenses INTEGER NOT NULL DEFAULT 0, duration_secs REAL NOT NULL DEFAULT 0, input_tokens INTEGER, output_tokens INTEGER, cost_usd REAL, created_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS findings ( id TEXT PRIMARY KEY, repo TEXT NOT NULL, pr INTEGER NOT NULL, lens TEXT NOT NULL, file TEXT, line INTEGER, title TEXT NOT NULL, first_seen TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS reports ( id INTEGER PRIMARY KEY AUTOINCREMENT, finding_id TEXT NOT NULL, run_id TEXT NOT NULL, model TEXT NOT NULL, provider TEXT NOT NULL, raw_severity TEXT, detail TEXT, created_at TEXT NOT NULL, UNIQUE(finding_id, run_id) ); CREATE INDEX IF NOT EXISTS idx_reports_finding ON reports(finding_id); CREATE INDEX IF NOT EXISTS idx_reports_model ON reports(model); CREATE TABLE IF NOT EXISTS grades ( id INTEGER PRIMARY KEY AUTOINCREMENT, finding_id TEXT NOT NULL, is_real INTEGER NOT NULL, severity TEXT, usefulness INTEGER, notes TEXT, grader TEXT, created_at TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_grades_finding ON grades(finding_id); -- latest_grades: the most recent grade per finding (grade history is kept; the -- latest wins). Used by every read path so a re-grade supersedes the old one. CREATE VIEW IF NOT EXISTS latest_grades AS SELECT g.* FROM grades g JOIN (SELECT finding_id, MAX(id) AS max_id FROM grades GROUP BY finding_id) m ON g.id = m.max_id; ` // Store is the SQLite-backed fact store. type Store struct{ db *sql.DB } // Open opens (creating if needed) the SQLite database at path and applies the // schema. WAL + a busy timeout keep the single-writer daemon honest under the // occasional concurrent reader. func Open(path string) (*Store, error) { db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)&_pragma=foreign_keys(on)") if err != nil { return nil, fmt.Errorf("open %s: %w", path, err) } // modernc's pure-Go driver is happiest with a single writer connection. db.SetMaxOpenConns(1) if _, err := db.Exec(schema); err != nil { db.Close() return nil, fmt.Errorf("migrate: %w", err) } return &Store{db: db}, nil } func (s *Store) Close() error { return s.db.Close() } func now() string { return time.Now().UTC().Format(time.RFC3339) } // findingID content-addresses a finding by location, NOT by wording, so the same // issue raised by different models (or re-raised on a re-review) collapses to one // finding with many reports — that collapse is what makes cross-model consensus // and per-model precision measurable. Title is intentionally excluded. func findingID(repo string, pr int, lens, file string, line int) string { key := fmt.Sprintf("%s|%d|%s|%s|%d", strings.TrimSpace(repo), pr, strings.ToLower(strings.TrimSpace(lens)), strings.TrimSpace(file), line) sum := sha256.Sum256([]byte(key)) return hex.EncodeToString(sum[:])[:16] } // Run is one model's review of one PR — the unit run.sh times. type Run struct { RunID string `json:"run_id"` Repo string `json:"repo"` PR int `json:"pr"` Model string `json:"model"` Provider string `json:"provider"` Lenses int `json:"lenses"` DurationSecs float64 `json:"duration_secs"` InputTokens *int64 `json:"input_tokens,omitempty"` OutputTokens *int64 `json:"output_tokens,omitempty"` CostUSD *float64 `json:"cost_usd,omitempty"` CreatedAt string `json:"created_at,omitempty"` // set on read by ListRuns; ignored by AddRun } // AddRun upserts a run by run_id (a re-posted run overwrites timing/tokens). func (s *Store) AddRun(r Run) error { if strings.TrimSpace(r.RunID) == "" || strings.TrimSpace(r.Model) == "" { return fmt.Errorf("run_id and model are required") } _, err := s.db.Exec(` INSERT INTO runs (run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens, output_tokens, cost_usd, created_at) VALUES (?,?,?,?,?,?,?,?,?,?,?) ON CONFLICT(run_id) DO UPDATE SET repo=excluded.repo, pr=excluded.pr, model=excluded.model, provider=excluded.provider, lenses=excluded.lenses, duration_secs=excluded.duration_secs, input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens, cost_usd=excluded.cost_usd`, r.RunID, r.Repo, r.PR, r.Model, r.Provider, r.Lenses, r.DurationSecs, r.InputTokens, r.OutputTokens, r.CostUSD, now()) return err } // ListRuns returns every run (oldest first), including runs that produced no // findings — so a dashboard can charge a model for all the time it spent, not // just the runs that surfaced something. Read-only. func (s *Store) ListRuns() ([]Run, error) { rows, err := s.db.Query(` SELECT run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens, output_tokens, cost_usd, created_at FROM runs ORDER BY created_at, run_id`) if err != nil { return nil, err } defer rows.Close() var out []Run for rows.Next() { var r Run var in, outTok sql.NullInt64 var cost sql.NullFloat64 if err := rows.Scan(&r.RunID, &r.Repo, &r.PR, &r.Model, &r.Provider, &r.Lenses, &r.DurationSecs, &in, &outTok, &cost, &r.CreatedAt); err != nil { return nil, err } if in.Valid { v := in.Int64 r.InputTokens = &v } if outTok.Valid { v := outTok.Int64 r.OutputTokens = &v } if cost.Valid { v := cost.Float64 r.CostUSD = &v } out = append(out, r) } return out, rows.Err() } // ReportIn is one finding as a single model reported it. type ReportIn struct { Repo string `json:"repo"` PR int `json:"pr"` Lens string `json:"lens"` File string `json:"file"` Line int `json:"line"` Title string `json:"title"` Model string `json:"model"` Provider string `json:"provider"` RunID string `json:"run_id"` RawSeverity string `json:"raw_severity"` Detail string `json:"detail"` } // AddReports records a batch of findings: each upserts its (content-addressed) // finding row and adds this model's report of it. Returns the finding id per // input (same order). A model re-reporting the same finding in the same run is a // no-op (UNIQUE finding_id,run_id). func (s *Store) AddReports(in []ReportIn) ([]string, error) { tx, err := s.db.Begin() if err != nil { return nil, err } defer tx.Rollback() ts := now() ids := make([]string, len(in)) for i, r := range in { if strings.TrimSpace(r.Title) == "" || strings.TrimSpace(r.Lens) == "" { return nil, fmt.Errorf("report %d: lens and title are required", i) } id := findingID(r.Repo, r.PR, r.Lens, r.File, r.Line) ids[i] = id if _, err := tx.Exec(` INSERT INTO findings (id, repo, pr, lens, file, line, title, first_seen) VALUES (?,?,?,?,?,?,?,?) ON CONFLICT(id) DO NOTHING`, id, r.Repo, r.PR, strings.ToLower(strings.TrimSpace(r.Lens)), r.File, r.Line, r.Title, ts); err != nil { return nil, err } if _, err := tx.Exec(` INSERT INTO reports (finding_id, run_id, model, provider, raw_severity, detail, created_at) VALUES (?,?,?,?,?,?,?) ON CONFLICT(finding_id, run_id) DO NOTHING`, id, r.RunID, r.Model, r.Provider, r.RawSeverity, r.Detail, ts); err != nil { return nil, err } } return ids, tx.Commit() } // Grade is a triage verdict on a finding. Severity is required when is_real and // must be one of validSeverities; it is cleared when !is_real. No points here — // the client maps severity -> points. type Grade struct { FindingID string `json:"finding_id"` IsReal bool `json:"is_real"` Severity string `json:"severity,omitempty"` Usefulness *int `json:"usefulness,omitempty"` Notes string `json:"notes,omitempty"` Grader string `json:"grader,omitempty"` } // AddGrade appends a grade (history is kept; latest wins). func (s *Store) AddGrade(g Grade) error { if strings.TrimSpace(g.FindingID) == "" { return fmt.Errorf("finding_id is required") } var exists bool if err := s.db.QueryRow(`SELECT EXISTS(SELECT 1 FROM findings WHERE id=?)`, g.FindingID).Scan(&exists); err != nil { return err } if !exists { return fmt.Errorf("unknown finding_id %q", g.FindingID) } sev := strings.ToLower(strings.TrimSpace(g.Severity)) if g.IsReal { if !validSeverities[sev] { return fmt.Errorf("severity %q invalid for a real finding (want one of: %s)", g.Severity, strings.Join(sortedSeverities(), ", ")) } } else { sev = "" // a false positive carries no severity } if g.Usefulness != nil && (*g.Usefulness < 1 || *g.Usefulness > 5) { return fmt.Errorf("usefulness must be 1..5, got %d", *g.Usefulness) } _, err := s.db.Exec(` INSERT INTO grades (finding_id, is_real, severity, usefulness, notes, grader, created_at) VALUES (?,?,?,?,?,?,?)`, g.FindingID, g.IsReal, nullStr(sev), g.Usefulness, nullStr(g.Notes), nullStr(g.Grader), now()) return err } // ExportRow is one report joined with its finding, run timing, and latest grade // — the flat shape a dashboard consumes. Grade fields are nil/empty until graded. type ExportRow struct { FindingID string `json:"finding_id"` Repo string `json:"repo"` PR int `json:"pr"` Lens string `json:"lens"` File string `json:"file,omitempty"` Line int `json:"line,omitempty"` Title string `json:"title"` Model string `json:"model"` Provider string `json:"provider,omitempty"` RunID string `json:"run_id"` RawSeverity string `json:"raw_severity,omitempty"` ReportedAt string `json:"reported_at"` DurationSecs float64 `json:"duration_secs"` InputTokens *int64 `json:"input_tokens,omitempty"` OutputTokens *int64 `json:"output_tokens,omitempty"` Graded bool `json:"graded"` IsReal *bool `json:"is_real,omitempty"` Severity string `json:"severity,omitempty"` Usefulness *int `json:"usefulness,omitempty"` Notes string `json:"notes,omitempty"` Grader string `json:"grader,omitempty"` GradedAt string `json:"graded_at,omitempty"` } // Export returns every report joined with finding, run timing, and latest grade, // oldest first. The dashboard does all weighting from these raw rows. func (s *Store) Export() ([]ExportRow, error) { rows, err := s.db.Query(` SELECT r.finding_id, f.repo, f.pr, f.lens, f.file, f.line, f.title, r.model, r.provider, r.run_id, r.raw_severity, r.created_at, COALESCE(ru.duration_secs, 0), ru.input_tokens, ru.output_tokens, lg.is_real, lg.severity, lg.usefulness, lg.notes, lg.grader, lg.created_at FROM reports r JOIN findings f ON f.id = r.finding_id LEFT JOIN runs ru ON ru.run_id = r.run_id LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id ORDER BY r.created_at, r.id`) if err != nil { return nil, err } defer rows.Close() var out []ExportRow for rows.Next() { var e ExportRow var file, rawSev, sev, notes, grader, gradedAt sql.NullString var line sql.NullInt64 var isReal sql.NullBool var useful sql.NullInt64 if err := rows.Scan(&e.FindingID, &e.Repo, &e.PR, &e.Lens, &file, &line, &e.Title, &e.Model, &e.Provider, &e.RunID, &rawSev, &e.ReportedAt, &e.DurationSecs, &e.InputTokens, &e.OutputTokens, &isReal, &sev, &useful, ¬es, &grader, &gradedAt); err != nil { return nil, err } e.File, e.Line = file.String, int(line.Int64) e.RawSeverity = rawSev.String if isReal.Valid { e.Graded = true v := isReal.Bool e.IsReal = &v e.Severity, e.Notes, e.Grader, e.GradedAt = sev.String, notes.String, grader.String, gradedAt.String if useful.Valid { u := int(useful.Int64) e.Usefulness = &u } } out = append(out, e) } return out, rows.Err() } // ModelStat is the per-model rollup the scoreboard returns. It is intentionally // POINTS-FREE: raw minutes/tokens and a confirmed-by-severity histogram, so the // client applies its own weights for points and value-per-minute/token. type ModelStat struct { Model string `json:"model"` Provider string `json:"provider,omitempty"` Runs int `json:"runs"` Minutes float64 `json:"minutes"` InputTokens int64 `json:"input_tokens"` OutputTokens int64 `json:"output_tokens"` Findings int `json:"findings"` Confirmed int `json:"confirmed"` FalsePositive int `json:"false_positive"` Ungraded int `json:"ungraded"` BySeverity map[string]int `json:"by_severity"` // confirmed findings per severity } // Scoreboard rolls runs + reports + latest grades up per model. All counts of // findings are DISTINCT by finding (a model re-reporting across runs counts once). func (s *Store) Scoreboard() ([]ModelStat, error) { stats := map[string]*ModelStat{} get := func(model, provider string) *ModelStat { m, ok := stats[model] if !ok { m = &ModelStat{Model: model, Provider: provider, BySeverity: map[string]int{}} stats[model] = m } return m } // Runs: minutes + tokens + run counts. rrows, err := s.db.Query(` SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0), COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0) FROM runs GROUP BY model, provider`) if err != nil { return nil, err } for rrows.Next() { var model, provider string var runs int var dur float64 var in, out int64 if err := rrows.Scan(&model, &provider, &runs, &dur, &in, &out); err != nil { rrows.Close() return nil, err } m := get(model, provider) m.Runs += runs m.Minutes += dur / 60 m.InputTokens += in m.OutputTokens += out } rrows.Close() // Findings: distinct per model, split by latest-grade state. frows, err := s.db.Query(` SELECT r.model, COUNT(DISTINCT r.finding_id), COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END), COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END), COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END) FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id GROUP BY r.model`) if err != nil { return nil, err } for frows.Next() { var model string var total, confirmed, fp, ungraded int if err := frows.Scan(&model, &total, &confirmed, &fp, &ungraded); err != nil { frows.Close() return nil, err } m := get(model, "") m.Findings, m.Confirmed, m.FalsePositive, m.Ungraded = total, confirmed, fp, ungraded } frows.Close() // Confirmed-by-severity histogram (distinct findings). srows, err := s.db.Query(` SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id) FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id WHERE lg.is_real=1 AND lg.severity IS NOT NULL GROUP BY r.model, lg.severity`) if err != nil { return nil, err } for srows.Next() { var model, sev string var n int if err := srows.Scan(&model, &sev, &n); err != nil { srows.Close() return nil, err } get(model, "").BySeverity[sev] = n } srows.Close() out := make([]ModelStat, 0, len(stats)) for _, m := range stats { out = append(out, *m) } sort.Slice(out, func(i, j int) bool { return out[i].Model < out[j].Model }) return out, nil } func sortedSeverities() []string { out := make([]string, 0, len(validSeverities)) for s := range validSeverities { out = append(out, s) } sort.Strings(out) return out } func nullStr(s string) any { if s == "" { return nil } return s }