feat: gadfly-reports — findings store + scoreboard daemon

SQLite-backed HTTP store for Gadfly review findings, per-review run timings, and human/Claude grades, with a points-free per-model scoreboard. Pure fact store: it computes no points or rankings (the dashboard maps severity->points client-side and retunes without re-scoring). Findings are content-addressed by location so cross-model reports collapse for consensus; one grade per finding, latest wins. Pure-Go SQLite (CGO-free) + Docker image CI + tests. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 23:55:24 -04:00
parent 52dce5eb2f
commit ddcf42a3ce
16 changed files with 1269 additions and 27 deletions
@@ -0,0 +1,10 @@
+.git
+.gitea
+*.db
+*.db-wal
+*.db-shm
+/data
+gadfly-reports
+README.md
+CLAUDE.md
+.env*
@@ -0,0 +1,11 @@
+# === gadfly-reports daemon configuration ===
+
+# Listen address (default: :8090)
+GADFLY_REPORTS_ADDR=:8090
+
+# SQLite database path (default: gadfly-reports.db; /data/gadfly-reports.db in Docker)
+GADFLY_REPORTS_DB=/data/gadfly-reports.db
+
+# Bearer token callers must present on every route except /healthz (empty = open).
+# gadfly (emit) and gadfly-mcp must present the same token.
+GADFLY_REPORTS_TOKEN=change-me-to-a-secret
@@ -0,0 +1,69 @@
+name: Build & push image
+
+# Builds the gadfly-reports daemon image and pushes it to the Gitea container
+# registry so it's easy to self-host.
+#
+#   push to main   -> :latest + :sha-<short>
+#   push tag v*    -> :<tag> + :latest
+#
+# Required repo secrets: REGISTRY_USER / REGISTRY_PASSWORD (registry push). The
+# Go build uses only PUBLIC modules, so no private-module creds are needed.
+
+on:
+  push:
+    branches: [main]
+    tags: ["v*"]
+    paths-ignore:
+      - "**.md"
+      - "LICENSE"
+      - ".gitignore"
+      - ".env.example"
+  workflow_dispatch: {}
+
+concurrency:
+  group: gadfly-reports-image-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  IMAGE_NAME: gitea.stevedudenhoeffer.com/steve/gadfly-reports
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        run: docker buildx create --use --name gr-builder --driver docker-container 2>/dev/null || docker buildx use gr-builder
+
+      - name: Log in to the registry
+        env:
+          REGISTRY_USER: ${{ secrets.REGISTRY_USER }}
+          REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
+        run: echo "${REGISTRY_PASSWORD}" | docker login gitea.stevedudenhoeffer.com -u "${REGISTRY_USER}" --password-stdin
+
+      - name: Compute tags
+        id: meta
+        run: |
+          SHA_SHORT=$(echo "${GITHUB_SHA}" | cut -c1-7)
+          if [ "${{ github.ref_type }}" = "tag" ]; then
+            TAGS="${IMAGE_NAME}:${GITHUB_REF_NAME},${IMAGE_NAME}:latest"
+          else
+            TAGS="${IMAGE_NAME}:latest,${IMAGE_NAME}:sha-${SHA_SHORT}"
+          fi
+          echo "tags=${TAGS}" >> "$GITHUB_OUTPUT"
+          echo "Tags: ${TAGS}"
+
+      - name: Build and push
+        run: |
+          TAG_FLAGS=""
+          IFS=',' read -ra TAG_ARRAY <<< "${{ steps.meta.outputs.tags }}"
+          for t in "${TAG_ARRAY[@]}"; do TAG_FLAGS="$TAG_FLAGS --tag $t"; done
+          docker buildx build \
+            --push \
+            --platform linux/amd64 \
+            $TAG_FLAGS \
+            --add-host gitea.stevedudenhoeffer.com:192.168.0.134 \
+            --file ./Dockerfile \
+            .
@@ -0,0 +1,26 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    types: [opened, synchronize, reopened]
+  workflow_dispatch: {}
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-go@v5
+        with:
+          go-version: "1.26"
+      - name: Build
+        run: go build ./...
+      - name: Vet
+        run: go vet ./...
+      - name: gofmt
+        run: test -z "$(gofmt -l .)" || { gofmt -l .; echo "gofmt needed"; exit 1; }
+      - name: Test (race)
+        run: go test -race ./...
@@ -1,27 +1,9 @@
-# ---> Go
-# If you prefer the allow list template instead of the deny list, see community template:
-# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
-#
-# Binaries for programs and plugins
-*.exe
-*.exe~
-*.dll
-*.so
-*.dylib
-
-# Test binary, built with `go test -c`
-*.test
-
-# Output of the go coverage tool, specifically when used with LiteIDE
-*.out
-
-# Dependency directories (remove the comment below to include it)
-# vendor/
-
-# Go workspace file
-go.work
-go.work.sum
-
-# env file
+# build output
+/gadfly-reports
+# local SQLite databases
+*.db
+*.db-wal
+*.db-shm
+/data/
+# local env
 .env
-
@@ -0,0 +1,68 @@
+# gadfly-reports — Developer Guide
+
+A small Go + SQLite HTTP daemon that stores [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)
+review findings, the per-review run timings, and human/Claude grades — and serves a points-free
+per-model scoreboard. The companion MCP client is
+[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp).
+
+> This is a public, **vibe-coded** project (built largely by an AI agent). Keep that framing honest
+> in the README; don't oversell it — it's a homelab-grade store, not a hardened product.
+
+## Core principle: store raw facts, score on the client
+
+gadfly-reports records **only facts**: runs (timing/tokens), findings (content-addressed by
+location), reports (which model raised which finding), and grades (`is_real` + `severity` +
+`usefulness`). It **never stores points or computes rankings**. Mapping `severity → points` and any
+"value per minute / per token" ranking is the dashboard's job. This is deliberate — keep it that way:
+do not add a points column or a weighting config to the store. Retuning the curve must never require
+a migration or a re-score.
+
+The severity vocabulary (`trivial|small|medium|high|critical`) in `store.go` is the **only**
+scoring-adjacent contract, and it's a closed set validated on write.
+
+## Architecture
+
+```
+main.go     subcommand dispatch (serve) + flags/env
+store.go    SQLite schema + types + queries (runs/findings/reports/grades + latest_grades view)
+server.go   net/http API (ServeMux method+path routes) + optional bearer auth
+*_test.go   store + server end-to-end tests (consensus, latest-grade-wins, validation, auth)
+Dockerfile  CGO-free build (pure-Go modernc sqlite) -> small alpine image
+.gitea/workflows/  ci.yml (build/vet/test) + build-image.yml (publish :latest + :sha-<short>)
+```
+
+**Data model.** A **finding** is identified by `sha256(repo|pr|lens|file|line)[:16]` — *not* by
+wording — so the same issue from different models (or a re-review) collapses to one finding with many
+**reports**. One **grade** per finding (history kept, latest wins via the `latest_grades` view).
+
+## Dependencies
+
+- **modernc.org/sqlite** (pure Go) — chosen so the binary is CGO-free and `go run …@latest`/the
+  Docker build need no C toolchain. Don't swap in a cgo driver.
+- Otherwise stdlib only. The MCP SDK lives in gadfly-mcp, **not** here — keep this daemon lean.
+
+## Build / test
+
+```sh
+go build ./...
+go vet ./...
+gofmt -l .        # must be empty
+go test -race ./...
+```
+
+## Release / deploy
+
+- **Push to `main`** → CI builds and publishes `:latest` (+ `:sha-<short>`) to
+  `gitea.stevedudenhoeffer.com/steve/gadfly-reports`.
+- **Tag `v*`** → publishes `:<tag>` (+ `:latest`).
+- CI needs repo secrets `REGISTRY_USER` / `REGISTRY_PASSWORD` to push the image (the Go build itself
+  uses only public modules — no private-module creds needed).
+
+## When making changes
+
+- Keep the **README API table** in sync with `server.go` routes and `store.go` JSON tags — it is the
+  contract gadfly (emit) and gadfly-mcp rely on. Stale docs are a bug.
+- Preserve the **store-no-points** principle (see above).
+- Add a test when you add logic. Keep `gofmt` clean and `go vet` quiet.
+- The schema uses `CREATE TABLE IF NOT EXISTS` migrations applied on `Open`; additive changes are
+  fine, destructive ones need a real migration story (there isn't one yet — it's a homelab store).
@@ -0,0 +1,19 @@
+# gadfly-reports daemon image. modernc.org/sqlite is pure Go, so the binary is
+# CGO-free and the final image needs no libc / no C toolchain at build time.
+FROM golang:1.26 AS build
+WORKDIR /src
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/gadfly-reports .
+
+FROM alpine:3.20
+RUN adduser -D -u 10001 app && mkdir -p /data && chown app /data
+COPY --from=build /out/gadfly-reports /usr/local/bin/gadfly-reports
+USER app
+ENV GADFLY_REPORTS_ADDR=:8090 \
+    GADFLY_REPORTS_DB=/data/gadfly-reports.db
+EXPOSE 8090
+VOLUME ["/data"]
+ENTRYPOINT ["/usr/local/bin/gadfly-reports"]
+CMD ["serve"]
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Steve Dudenhoeffer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -1,2 +1,96 @@
-# gadfly-reports
+# 🪰📋 gadfly-reports

+A small **durable store + scoreboard** for [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)
+review findings. Gadfly (and any CI) POST each model's findings and per-review timing here; a human
+or Claude — via [gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp) — later grades
+each finding. It's a single Go binary backed by SQLite, speaking a tiny HTTP API.
+
+> ### 🤖 Heads up: this is a vibe-coded project
+> gadfly-reports was built almost entirely by an AI agent (Claude Code) — the design, the code, and
+> these docs. It's small and it's tested, but treat it accordingly: it's a homelab-grade service,
+> not a hardened product, and there may be the occasional AI-flavored rough edge. Issues and PRs
+> welcome.
+
+## What it stores — and what it deliberately doesn't
+
+gadfly-reports is a **pure fact store**:
+
+- **runs** — one per model's review of a PR: wall-clock duration, lens count, optional token/cost.
+- **findings** — **content-addressed by location** (`repo + pr + lens + file + line`), so the *same*
+  issue raised by several models collapses to one finding with many **reports**. That collapse is
+  what makes cross-model **consensus** and per-model **precision** measurable.
+- **grades** — a triage verdict per finding: `is_real`, `severity`
+  (`trivial|small|medium|high|critical`), optional `usefulness` (1–5), notes, grader. Grade history
+  is kept; the latest wins.
+
+It stores **no points and computes no rankings.** Mapping severity → points and ranking models by
+"value per minute" (or per token) is a **client/dashboard concern**, so you can retune the curve any
+time without migrating or re-scoring stored data.
+
+## Run it
+
+```sh
+# from source
+go run gitea.stevedudenhoeffer.com/steve/gadfly-reports@latest serve
+
+# or Docker (image published by CI on every push to main)
+docker run -d --name gadfly-reports -p 8090:8090 -v gadfly-reports-data:/data \
+  -e GADFLY_REPORTS_TOKEN=change-me \
+  gitea.stevedudenhoeffer.com/steve/gadfly-reports:latest
+```
+
+## HTTP API (the canonical contract)
+
+| Method & path | Body / query | Purpose |
+|---|---|---|
+| `GET /healthz` | — | liveness (open even when a token is set) |
+| `POST /runs` | one run object | upsert a model's review of a PR (timing/tokens) |
+| `POST /reports` | JSON **array** of report objects | record findings + which model reported each |
+| `POST /findings/{id}/grade` | `{is_real, severity?, usefulness?, notes?, grader?}` | record a triage grade |
+| `GET /export` | — | flat report×finding×run×latest-grade rows — the dashboard feed |
+| `GET /scoreboard` | — | points-free per-model rollup |
+
+`POST /runs` body: `{run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens?, output_tokens?, cost_usd?}`
+(re-posting the same `run_id` updates it).
+
+`POST /reports` array element: `{repo, pr, lens, file, line, title, model, provider, run_id, raw_severity, detail}`.
+
+`GET /scoreboard` element: `{model, provider, runs, minutes, input_tokens, output_tokens, findings, confirmed, false_positive, ungraded, by_severity:{severity:count}}`.
+
+If `GADFLY_REPORTS_TOKEN` is set, every route except `/healthz` requires `Authorization: Bearer <token>`.
+
+## Configuration
+
+| Env | Default | Meaning |
+|-----|---------|---------|
+| `GADFLY_REPORTS_ADDR` | `:8090` | listen address |
+| `GADFLY_REPORTS_DB` | `gadfly-reports.db` (`/data/gadfly-reports.db` in Docker) | SQLite path |
+| `GADFLY_REPORTS_TOKEN` | *(empty)* | bearer token callers must present (empty = open) |
+
+CLI flags `--addr` / `--db` / `--token` override the env.
+
+## Dashboards
+
+Point anything at the JSON endpoints (or the SQLite file read-only). `GET /export` is the flat feed;
+`GET /scoreboard` is the per-model rollup. Compute points and value-per-minute **in the dashboard**,
+e.g. with a curve like `trivial=1, small=3, medium=5, high=8, critical=20` →
+`points = Σ weight[severity]·by_severity[severity]`, `value/min = points / minutes`.
+
+## How it fits together
+
+- **[gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)** POSTs findings here after each
+  review when `GADFLY_FINDINGS_URL` points at this store (advisory; off by default).
+- **[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp)** is the MCP server Claude
+  uses to list findings and record grades against this store.
+
+## Build / test
+
+```sh
+go build ./...
+go test ./...
+gofmt -l .   # must be clean
+```
+
+## License
+
+MIT © 2026 Steve Dudenhoeffer.
@@ -0,0 +1,17 @@
+module gitea.stevedudenhoeffer.com/steve/gadfly-reports
+
+go 1.26
+
+require modernc.org/sqlite v1.53.0
+
+require (
+	github.com/dustin/go-humanize v1.0.1 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
+	golang.org/x/sys v0.44.0 // indirect
+	modernc.org/libc v1.73.4 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
+)
@@ -0,0 +1,51 @@
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
+golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4=
+golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
+golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
+golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
+modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c=
+modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
+modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws=
+modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE=
+modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
+modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc=
+modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA=
+modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
+modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
+modernc.org/sqlite v1.53.0 h1:20WG8N9q4ji/dEqGk4uiI0c6OPjSeLTNYGFCc3+7c1M=
+modernc.org/sqlite v1.53.0/go.mod h1:xoEpOIpGrgT48H5iiyt/YXPCZPEzlfmfFwtk8Lklw8s=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
@@ -0,0 +1,74 @@
+// Command gadfly-reports is a small, durable store + scoreboard for Gadfly's review
+// findings. Gadfly (and CI) report each model's findings and per-review timing
+// here; a human or Claude later grades each finding (is_real + severity +
+// usefulness). gadfly-reports stores only those RAW facts — it deliberately does NOT
+// compute points or rankings, so the dashboard/client owns the scoring curve
+// (severity -> points, value-per-minute, value-per-token) and can retune it
+// without migrating or re-scoring stored data.
+//
+// Subcommands:
+//
+//	gadfly-reports serve [flags]   run the HTTP + SQLite store (the long-running daemon)
+//
+// The MCP server Claude calls to record grades lives in ./cmd/mcp, so the daemon
+// stays lean; both are launchable with `go run <module>[/cmd/mcp]@latest`.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+)
+
+func main() {
+	if len(os.Args) < 2 {
+		usage()
+		os.Exit(2)
+	}
+	switch os.Args[1] {
+	case "serve":
+		serveCmd(os.Args[2:])
+	default:
+		usage()
+		os.Exit(2)
+	}
+}
+
+func usage() {
+	fmt.Fprint(os.Stderr, `gadfly-reports — durable store + scoreboard for Gadfly review findings
+
+Usage:
+  gadfly-reports serve [flags]     run the HTTP + SQLite store
+
+Run "gadfly-reports serve -h" for flags.
+`)
+}
+
+func serveCmd(args []string) {
+	fs := flag.NewFlagSet("serve", flag.ExitOnError)
+	addr := fs.String("addr", envOr("GADFLY_REPORTS_ADDR", ":8090"), "listen address")
+	dbPath := fs.String("db", envOr("GADFLY_REPORTS_DB", "gadfly-reports.db"), "SQLite database path")
+	token := fs.String("token", os.Getenv("GADFLY_REPORTS_TOKEN"), "bearer token callers must present (empty = open)")
+	_ = fs.Parse(args)
+
+	store, err := Open(*dbPath)
+	if err != nil {
+		log.Fatalf("gadfly-reports: %v", err)
+	}
+	defer store.Close()
+
+	log.Printf("gadfly-reports: serving %s (db=%s, auth=%v)", *addr, *dbPath, *token != "")
+	srv := &http.Server{Addr: *addr, Handler: newServer(store, *token)}
+	if err := srv.ListenAndServe(); err != nil {
+		log.Fatalf("gadfly-reports: %v", err)
+	}
+}
+
+func envOr(key, def string) string {
+	if v := os.Getenv(key); v != "" {
+		return v
+	}
+	return def
+}
@@ -0,0 +1,121 @@
+package main
+
+import (
+	"encoding/json"
+	"errors"
+	"log"
+	"net/http"
+	"strings"
+)
+
+// newServer wires the store to the HTTP API. If token is non-empty, every route
+// except /healthz requires "Authorization: Bearer <token>".
+//
+// Routes:
+//
+//	GET  /healthz                  liveness
+//	POST /runs                     upsert one run (model review of a PR; timing/tokens)
+//	POST /reports                  record a batch of findings + this model's reports
+//	POST /findings/{id}/grade      record a triage grade (is_real, severity, …)
+//	GET  /export                   flat report×finding×grade rows (the dashboard feed)
+//	GET  /scoreboard               points-free per-model rollup
+func newServer(store *Store, token string) http.Handler {
+	mux := http.NewServeMux()
+
+	mux.HandleFunc("GET /healthz", func(w http.ResponseWriter, _ *http.Request) {
+		writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
+	})
+
+	mux.HandleFunc("POST /runs", func(w http.ResponseWriter, r *http.Request) {
+		var run Run
+		if !decode(w, r, &run) {
+			return
+		}
+		if err := store.AddRun(run); err != nil {
+			writeErr(w, http.StatusBadRequest, err)
+			return
+		}
+		writeJSON(w, http.StatusOK, map[string]string{"run_id": run.RunID})
+	})
+
+	mux.HandleFunc("POST /reports", func(w http.ResponseWriter, r *http.Request) {
+		var reps []ReportIn
+		if !decode(w, r, &reps) {
+			return
+		}
+		ids, err := store.AddReports(reps)
+		if err != nil {
+			writeErr(w, http.StatusBadRequest, err)
+			return
+		}
+		writeJSON(w, http.StatusOK, map[string]any{"finding_ids": ids})
+	})
+
+	mux.HandleFunc("POST /findings/{id}/grade", func(w http.ResponseWriter, r *http.Request) {
+		var g Grade
+		if !decode(w, r, &g) {
+			return
+		}
+		g.FindingID = r.PathValue("id")
+		if err := store.AddGrade(g); err != nil {
+			writeErr(w, http.StatusBadRequest, err)
+			return
+		}
+		writeJSON(w, http.StatusOK, map[string]string{"finding_id": g.FindingID})
+	})
+
+	mux.HandleFunc("GET /export", func(w http.ResponseWriter, _ *http.Request) {
+		rows, err := store.Export()
+		if err != nil {
+			writeErr(w, http.StatusInternalServerError, err)
+			return
+		}
+		writeJSON(w, http.StatusOK, rows)
+	})
+
+	mux.HandleFunc("GET /scoreboard", func(w http.ResponseWriter, _ *http.Request) {
+		stats, err := store.Scoreboard()
+		if err != nil {
+			writeErr(w, http.StatusInternalServerError, err)
+			return
+		}
+		writeJSON(w, http.StatusOK, stats)
+	})
+
+	return auth(token, mux)
+}
+
+// auth gates everything but /healthz behind a bearer token, when one is set.
+func auth(token string, next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if token != "" && r.URL.Path != "/healthz" {
+			got := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
+			if strings.TrimSpace(got) != token {
+				writeErr(w, http.StatusUnauthorized, errors.New("missing or invalid bearer token"))
+				return
+			}
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+// decode reads a JSON body into v, writing a 400 and returning false on failure.
+func decode(w http.ResponseWriter, r *http.Request, v any) bool {
+	if err := json.NewDecoder(r.Body).Decode(v); err != nil {
+		writeErr(w, http.StatusBadRequest, errors.New("invalid JSON body: "+err.Error()))
+		return false
+	}
+	return true
+}
+
+func writeJSON(w http.ResponseWriter, code int, v any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(code)
+	if err := json.NewEncoder(w).Encode(v); err != nil {
+		log.Printf("gadfly-reports: write response: %v", err)
+	}
+}
+
+func writeErr(w http.ResponseWriter, code int, err error) {
+	writeJSON(w, code, map[string]string{"error": err.Error()})
+}
@@ -0,0 +1,100 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"path/filepath"
+	"testing"
+)
+
+func testServer(t *testing.T, token string) *httptest.Server {
+	t.Helper()
+	store, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { store.Close() })
+	srv := httptest.NewServer(newServer(store, token))
+	t.Cleanup(srv.Close)
+	return srv
+}
+
+func post(t *testing.T, srv *httptest.Server, token, path string, body any) *http.Response {
+	t.Helper()
+	b, _ := json.Marshal(body)
+	req, _ := http.NewRequest("POST", srv.URL+path, bytes.NewReader(b))
+	req.Header.Set("Content-Type", "application/json")
+	if token != "" {
+		req.Header.Set("Authorization", "Bearer "+token)
+	}
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		t.Fatalf("POST %s: %v", path, err)
+	}
+	return resp
+}
+
+// TestServerEndToEnd: run -> reports -> grade -> scoreboard over HTTP.
+func TestServerEndToEnd(t *testing.T) {
+	srv := testServer(t, "")
+
+	if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 120}); resp.StatusCode != 200 {
+		t.Fatalf("POST /runs = %d", resp.StatusCode)
+	}
+
+	resp := post(t, srv, "", "/reports", []ReportIn{
+		{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 7, Title: "leak", Model: "m", Provider: "p", RunID: "r1"},
+	})
+	if resp.StatusCode != 200 {
+		t.Fatalf("POST /reports = %d", resp.StatusCode)
+	}
+	var rep struct {
+		FindingIDs []string `json:"finding_ids"`
+	}
+	json.NewDecoder(resp.Body).Decode(&rep)
+	if len(rep.FindingIDs) != 1 {
+		t.Fatalf("want 1 finding id, got %v", rep.FindingIDs)
+	}
+	id := rep.FindingIDs[0]
+
+	if resp := post(t, srv, "", "/findings/"+id+"/grade", Grade{IsReal: true, Severity: "medium", Grader: "claude"}); resp.StatusCode != 200 {
+		t.Fatalf("POST grade = %d", resp.StatusCode)
+	}
+
+	resp = mustGet(t, srv, "", "/scoreboard")
+	var board []ModelStat
+	json.NewDecoder(resp.Body).Decode(&board)
+	if len(board) != 1 || board[0].Confirmed != 1 || board[0].BySeverity["medium"] != 1 || board[0].Minutes != 2 {
+		t.Fatalf("unexpected scoreboard: %+v", board)
+	}
+}
+
+// TestServerAuth: a set token gates writes but leaves /healthz open.
+func TestServerAuth(t *testing.T) {
+	srv := testServer(t, "secret")
+
+	if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != http.StatusUnauthorized {
+		t.Errorf("unauthenticated POST = %d, want 401", resp.StatusCode)
+	}
+	if resp := post(t, srv, "secret", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != 200 {
+		t.Errorf("authenticated POST = %d, want 200", resp.StatusCode)
+	}
+	if resp := mustGet(t, srv, "", "/healthz"); resp.StatusCode != 200 {
+		t.Errorf("healthz should be open, got %d", resp.StatusCode)
+	}
+}
+
+func mustGet(t *testing.T, srv *httptest.Server, token, path string) *http.Response {
+	t.Helper()
+	req, _ := http.NewRequest("GET", srv.URL+path, nil)
+	if token != "" {
+		req.Header.Set("Authorization", "Bearer "+token)
+	}
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		t.Fatalf("GET %s: %v", path, err)
+	}
+	return resp
+}
@@ -0,0 +1,447 @@
+package main
+
+import (
+	"crypto/sha256"
+	"database/sql"
+	"encoding/hex"
+	"fmt"
+	"sort"
+	"strings"
+	"time"
+
+	_ "modernc.org/sqlite"
+)
+
+// gadfly-reports stores only RAW review facts: which model reported which finding, how
+// long each model's review took, and a human/Claude grade (is_real + severity +
+// usefulness). It deliberately does NOT compute points or rankings — the
+// dashboard owns the scoring curve (severity -> points, value-per-minute), so it
+// can be retuned without re-scoring or migrating stored data. The severity
+// vocabulary below is the only scoring-related contract.
+
+// validSeverities is the closed set a grade may assign to a REAL finding. The
+// client maps these to points however it likes (e.g. trivial=1 … critical=20).
+var validSeverities = map[string]bool{
+	"trivial":  true,
+	"small":    true,
+	"medium":   true,
+	"high":     true,
+	"critical": true,
+}
+
+const schema = `
+CREATE TABLE IF NOT EXISTS runs (
+  run_id        TEXT PRIMARY KEY,
+  repo          TEXT NOT NULL,
+  pr            INTEGER NOT NULL,
+  model         TEXT NOT NULL,
+  provider      TEXT NOT NULL,
+  lenses        INTEGER NOT NULL DEFAULT 0,
+  duration_secs REAL    NOT NULL DEFAULT 0,
+  input_tokens  INTEGER,
+  output_tokens INTEGER,
+  cost_usd      REAL,
+  created_at    TEXT    NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS findings (
+  id         TEXT PRIMARY KEY,
+  repo       TEXT NOT NULL,
+  pr         INTEGER NOT NULL,
+  lens       TEXT NOT NULL,
+  file       TEXT,
+  line       INTEGER,
+  title      TEXT NOT NULL,
+  first_seen TEXT NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS reports (
+  id           INTEGER PRIMARY KEY AUTOINCREMENT,
+  finding_id   TEXT NOT NULL,
+  run_id       TEXT NOT NULL,
+  model        TEXT NOT NULL,
+  provider     TEXT NOT NULL,
+  raw_severity TEXT,
+  detail       TEXT,
+  created_at   TEXT NOT NULL,
+  UNIQUE(finding_id, run_id)
+);
+CREATE INDEX IF NOT EXISTS idx_reports_finding ON reports(finding_id);
+CREATE INDEX IF NOT EXISTS idx_reports_model   ON reports(model);
+
+CREATE TABLE IF NOT EXISTS grades (
+  id         INTEGER PRIMARY KEY AUTOINCREMENT,
+  finding_id TEXT NOT NULL,
+  is_real    INTEGER NOT NULL,
+  severity   TEXT,
+  usefulness INTEGER,
+  notes      TEXT,
+  grader     TEXT,
+  created_at TEXT NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_grades_finding ON grades(finding_id);
+
+-- latest_grades: the most recent grade per finding (grade history is kept; the
+-- latest wins). Used by every read path so a re-grade supersedes the old one.
+CREATE VIEW IF NOT EXISTS latest_grades AS
+SELECT g.* FROM grades g
+JOIN (SELECT finding_id, MAX(id) AS max_id FROM grades GROUP BY finding_id) m
+  ON g.id = m.max_id;
+`
+
+// Store is the SQLite-backed fact store.
+type Store struct{ db *sql.DB }
+
+// Open opens (creating if needed) the SQLite database at path and applies the
+// schema. WAL + a busy timeout keep the single-writer daemon honest under the
+// occasional concurrent reader.
+func Open(path string) (*Store, error) {
+	db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)&_pragma=foreign_keys(on)")
+	if err != nil {
+		return nil, fmt.Errorf("open %s: %w", path, err)
+	}
+	// modernc's pure-Go driver is happiest with a single writer connection.
+	db.SetMaxOpenConns(1)
+	if _, err := db.Exec(schema); err != nil {
+		db.Close()
+		return nil, fmt.Errorf("migrate: %w", err)
+	}
+	return &Store{db: db}, nil
+}
+
+func (s *Store) Close() error { return s.db.Close() }
+
+func now() string { return time.Now().UTC().Format(time.RFC3339) }
+
+// findingID content-addresses a finding by location, NOT by wording, so the same
+// issue raised by different models (or re-raised on a re-review) collapses to one
+// finding with many reports — that collapse is what makes cross-model consensus
+// and per-model precision measurable. Title is intentionally excluded.
+func findingID(repo string, pr int, lens, file string, line int) string {
+	key := fmt.Sprintf("%s|%d|%s|%s|%d",
+		strings.TrimSpace(repo), pr, strings.ToLower(strings.TrimSpace(lens)),
+		strings.TrimSpace(file), line)
+	sum := sha256.Sum256([]byte(key))
+	return hex.EncodeToString(sum[:])[:16]
+}
+
+// Run is one model's review of one PR — the unit run.sh times.
+type Run struct {
+	RunID        string   `json:"run_id"`
+	Repo         string   `json:"repo"`
+	PR           int      `json:"pr"`
+	Model        string   `json:"model"`
+	Provider     string   `json:"provider"`
+	Lenses       int      `json:"lenses"`
+	DurationSecs float64  `json:"duration_secs"`
+	InputTokens  *int64   `json:"input_tokens,omitempty"`
+	OutputTokens *int64   `json:"output_tokens,omitempty"`
+	CostUSD      *float64 `json:"cost_usd,omitempty"`
+}
+
+// AddRun upserts a run by run_id (a re-posted run overwrites timing/tokens).
+func (s *Store) AddRun(r Run) error {
+	if strings.TrimSpace(r.RunID) == "" || strings.TrimSpace(r.Model) == "" {
+		return fmt.Errorf("run_id and model are required")
+	}
+	_, err := s.db.Exec(`
+INSERT INTO runs (run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens, output_tokens, cost_usd, created_at)
+VALUES (?,?,?,?,?,?,?,?,?,?,?)
+ON CONFLICT(run_id) DO UPDATE SET
+  repo=excluded.repo, pr=excluded.pr, model=excluded.model, provider=excluded.provider,
+  lenses=excluded.lenses, duration_secs=excluded.duration_secs,
+  input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens, cost_usd=excluded.cost_usd`,
+		r.RunID, r.Repo, r.PR, r.Model, r.Provider, r.Lenses, r.DurationSecs,
+		r.InputTokens, r.OutputTokens, r.CostUSD, now())
+	return err
+}
+
+// ReportIn is one finding as a single model reported it.
+type ReportIn struct {
+	Repo        string `json:"repo"`
+	PR          int    `json:"pr"`
+	Lens        string `json:"lens"`
+	File        string `json:"file"`
+	Line        int    `json:"line"`
+	Title       string `json:"title"`
+	Model       string `json:"model"`
+	Provider    string `json:"provider"`
+	RunID       string `json:"run_id"`
+	RawSeverity string `json:"raw_severity"`
+	Detail      string `json:"detail"`
+}
+
+// AddReports records a batch of findings: each upserts its (content-addressed)
+// finding row and adds this model's report of it. Returns the finding id per
+// input (same order). A model re-reporting the same finding in the same run is a
+// no-op (UNIQUE finding_id,run_id).
+func (s *Store) AddReports(in []ReportIn) ([]string, error) {
+	tx, err := s.db.Begin()
+	if err != nil {
+		return nil, err
+	}
+	defer tx.Rollback()
+
+	ts := now()
+	ids := make([]string, len(in))
+	for i, r := range in {
+		if strings.TrimSpace(r.Title) == "" || strings.TrimSpace(r.Lens) == "" {
+			return nil, fmt.Errorf("report %d: lens and title are required", i)
+		}
+		id := findingID(r.Repo, r.PR, r.Lens, r.File, r.Line)
+		ids[i] = id
+		if _, err := tx.Exec(`
+INSERT INTO findings (id, repo, pr, lens, file, line, title, first_seen)
+VALUES (?,?,?,?,?,?,?,?) ON CONFLICT(id) DO NOTHING`,
+			id, r.Repo, r.PR, strings.ToLower(strings.TrimSpace(r.Lens)), r.File, r.Line, r.Title, ts); err != nil {
+			return nil, err
+		}
+		if _, err := tx.Exec(`
+INSERT INTO reports (finding_id, run_id, model, provider, raw_severity, detail, created_at)
+VALUES (?,?,?,?,?,?,?) ON CONFLICT(finding_id, run_id) DO NOTHING`,
+			id, r.RunID, r.Model, r.Provider, r.RawSeverity, r.Detail, ts); err != nil {
+			return nil, err
+		}
+	}
+	return ids, tx.Commit()
+}
+
+// Grade is a triage verdict on a finding. Severity is required when is_real and
+// must be one of validSeverities; it is cleared when !is_real. No points here —
+// the client maps severity -> points.
+type Grade struct {
+	FindingID  string `json:"finding_id"`
+	IsReal     bool   `json:"is_real"`
+	Severity   string `json:"severity,omitempty"`
+	Usefulness *int   `json:"usefulness,omitempty"`
+	Notes      string `json:"notes,omitempty"`
+	Grader     string `json:"grader,omitempty"`
+}
+
+// AddGrade appends a grade (history is kept; latest wins).
+func (s *Store) AddGrade(g Grade) error {
+	if strings.TrimSpace(g.FindingID) == "" {
+		return fmt.Errorf("finding_id is required")
+	}
+	var exists bool
+	if err := s.db.QueryRow(`SELECT EXISTS(SELECT 1 FROM findings WHERE id=?)`, g.FindingID).Scan(&exists); err != nil {
+		return err
+	}
+	if !exists {
+		return fmt.Errorf("unknown finding_id %q", g.FindingID)
+	}
+	sev := strings.ToLower(strings.TrimSpace(g.Severity))
+	if g.IsReal {
+		if !validSeverities[sev] {
+			return fmt.Errorf("severity %q invalid for a real finding (want one of: %s)", g.Severity, strings.Join(sortedSeverities(), ", "))
+		}
+	} else {
+		sev = "" // a false positive carries no severity
+	}
+	if g.Usefulness != nil && (*g.Usefulness < 1 || *g.Usefulness > 5) {
+		return fmt.Errorf("usefulness must be 1..5, got %d", *g.Usefulness)
+	}
+	_, err := s.db.Exec(`
+INSERT INTO grades (finding_id, is_real, severity, usefulness, notes, grader, created_at)
+VALUES (?,?,?,?,?,?,?)`,
+		g.FindingID, g.IsReal, nullStr(sev), g.Usefulness, nullStr(g.Notes), nullStr(g.Grader), now())
+	return err
+}
+
+// ExportRow is one report joined with its finding, run timing, and latest grade
+// — the flat shape a dashboard consumes. Grade fields are nil/empty until graded.
+type ExportRow struct {
+	FindingID    string  `json:"finding_id"`
+	Repo         string  `json:"repo"`
+	PR           int     `json:"pr"`
+	Lens         string  `json:"lens"`
+	File         string  `json:"file,omitempty"`
+	Line         int     `json:"line,omitempty"`
+	Title        string  `json:"title"`
+	Model        string  `json:"model"`
+	Provider     string  `json:"provider,omitempty"`
+	RunID        string  `json:"run_id"`
+	RawSeverity  string  `json:"raw_severity,omitempty"`
+	ReportedAt   string  `json:"reported_at"`
+	DurationSecs float64 `json:"duration_secs"`
+	InputTokens  *int64  `json:"input_tokens,omitempty"`
+	OutputTokens *int64  `json:"output_tokens,omitempty"`
+	Graded       bool    `json:"graded"`
+	IsReal       *bool   `json:"is_real,omitempty"`
+	Severity     string  `json:"severity,omitempty"`
+	Usefulness   *int    `json:"usefulness,omitempty"`
+	Notes        string  `json:"notes,omitempty"`
+	Grader       string  `json:"grader,omitempty"`
+	GradedAt     string  `json:"graded_at,omitempty"`
+}
+
+// Export returns every report joined with finding, run timing, and latest grade,
+// oldest first. The dashboard does all weighting from these raw rows.
+func (s *Store) Export() ([]ExportRow, error) {
+	rows, err := s.db.Query(`
+SELECT r.finding_id, f.repo, f.pr, f.lens, f.file, f.line, f.title,
+       r.model, r.provider, r.run_id, r.raw_severity, r.created_at,
+       COALESCE(ru.duration_secs, 0), ru.input_tokens, ru.output_tokens,
+       lg.is_real, lg.severity, lg.usefulness, lg.notes, lg.grader, lg.created_at
+FROM reports r
+JOIN findings f ON f.id = r.finding_id
+LEFT JOIN runs ru ON ru.run_id = r.run_id
+LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
+ORDER BY r.created_at, r.id`)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var out []ExportRow
+	for rows.Next() {
+		var e ExportRow
+		var file, rawSev, sev, notes, grader, gradedAt sql.NullString
+		var line sql.NullInt64
+		var isReal sql.NullBool
+		var useful sql.NullInt64
+		if err := rows.Scan(&e.FindingID, &e.Repo, &e.PR, &e.Lens, &file, &line, &e.Title,
+			&e.Model, &e.Provider, &e.RunID, &rawSev, &e.ReportedAt,
+			&e.DurationSecs, &e.InputTokens, &e.OutputTokens,
+			&isReal, &sev, &useful, &notes, &grader, &gradedAt); err != nil {
+			return nil, err
+		}
+		e.File, e.Line = file.String, int(line.Int64)
+		e.RawSeverity = rawSev.String
+		if isReal.Valid {
+			e.Graded = true
+			v := isReal.Bool
+			e.IsReal = &v
+			e.Severity, e.Notes, e.Grader, e.GradedAt = sev.String, notes.String, grader.String, gradedAt.String
+			if useful.Valid {
+				u := int(useful.Int64)
+				e.Usefulness = &u
+			}
+		}
+		out = append(out, e)
+	}
+	return out, rows.Err()
+}
+
+// ModelStat is the per-model rollup the scoreboard returns. It is intentionally
+// POINTS-FREE: raw minutes/tokens and a confirmed-by-severity histogram, so the
+// client applies its own weights for points and value-per-minute/token.
+type ModelStat struct {
+	Model         string         `json:"model"`
+	Provider      string         `json:"provider,omitempty"`
+	Runs          int            `json:"runs"`
+	Minutes       float64        `json:"minutes"`
+	InputTokens   int64          `json:"input_tokens"`
+	OutputTokens  int64          `json:"output_tokens"`
+	Findings      int            `json:"findings"`
+	Confirmed     int            `json:"confirmed"`
+	FalsePositive int            `json:"false_positive"`
+	Ungraded      int            `json:"ungraded"`
+	BySeverity    map[string]int `json:"by_severity"` // confirmed findings per severity
+}
+
+// Scoreboard rolls runs + reports + latest grades up per model. All counts of
+// findings are DISTINCT by finding (a model re-reporting across runs counts once).
+func (s *Store) Scoreboard() ([]ModelStat, error) {
+	stats := map[string]*ModelStat{}
+	get := func(model, provider string) *ModelStat {
+		m, ok := stats[model]
+		if !ok {
+			m = &ModelStat{Model: model, Provider: provider, BySeverity: map[string]int{}}
+			stats[model] = m
+		}
+		return m
+	}
+
+	// Runs: minutes + tokens + run counts.
+	rrows, err := s.db.Query(`
+SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0),
+       COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0)
+FROM runs GROUP BY model, provider`)
+	if err != nil {
+		return nil, err
+	}
+	for rrows.Next() {
+		var model, provider string
+		var runs int
+		var dur float64
+		var in, out int64
+		if err := rrows.Scan(&model, &provider, &runs, &dur, &in, &out); err != nil {
+			rrows.Close()
+			return nil, err
+		}
+		m := get(model, provider)
+		m.Runs += runs
+		m.Minutes += dur / 60
+		m.InputTokens += in
+		m.OutputTokens += out
+	}
+	rrows.Close()
+
+	// Findings: distinct per model, split by latest-grade state.
+	frows, err := s.db.Query(`
+SELECT r.model,
+       COUNT(DISTINCT r.finding_id),
+       COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END),
+       COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END),
+       COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END)
+FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
+GROUP BY r.model`)
+	if err != nil {
+		return nil, err
+	}
+	for frows.Next() {
+		var model string
+		var total, confirmed, fp, ungraded int
+		if err := frows.Scan(&model, &total, &confirmed, &fp, &ungraded); err != nil {
+			frows.Close()
+			return nil, err
+		}
+		m := get(model, "")
+		m.Findings, m.Confirmed, m.FalsePositive, m.Ungraded = total, confirmed, fp, ungraded
+	}
+	frows.Close()
+
+	// Confirmed-by-severity histogram (distinct findings).
+	srows, err := s.db.Query(`
+SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id)
+FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id
+WHERE lg.is_real=1 AND lg.severity IS NOT NULL
+GROUP BY r.model, lg.severity`)
+	if err != nil {
+		return nil, err
+	}
+	for srows.Next() {
+		var model, sev string
+		var n int
+		if err := srows.Scan(&model, &sev, &n); err != nil {
+			srows.Close()
+			return nil, err
+		}
+		get(model, "").BySeverity[sev] = n
+	}
+	srows.Close()
+
+	out := make([]ModelStat, 0, len(stats))
+	for _, m := range stats {
+		out = append(out, *m)
+	}
+	sort.Slice(out, func(i, j int) bool { return out[i].Model < out[j].Model })
+	return out, nil
+}
+
+func sortedSeverities() []string {
+	out := make([]string, 0, len(validSeverities))
+	for s := range validSeverities {
+		out = append(out, s)
+	}
+	sort.Strings(out)
+	return out
+}
+
+func nullStr(s string) any {
+	if s == "" {
+		return nil
+	}
+	return s
+}
@@ -0,0 +1,132 @@
+package main
+
+import (
+	"path/filepath"
+	"testing"
+)
+
+func testStore(t *testing.T) *Store {
+	t.Helper()
+	s, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	t.Cleanup(func() { s.Close() })
+	return s
+}
+
+func i64(v int64) *int64 { return &v }
+func intp(v int) *int    { return &v }
+
+// TestConsensusAndGrade: two models reporting the SAME location collapse to one
+// finding with two reports; a single grade applies to both models' scoreboards.
+func TestConsensusAndGrade(t *testing.T) {
+	s := testStore(t)
+
+	if err := s.AddRun(Run{RunID: "r-cloud", Repo: "steve/x", PR: 2, Model: "minimax", Provider: "ollama-cloud", Lenses: 3, DurationSecs: 300, InputTokens: i64(1000), OutputTokens: i64(500)}); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.AddRun(Run{RunID: "r-m1", Repo: "steve/x", PR: 2, Model: "qwen3", Provider: "m1", Lenses: 3, DurationSecs: 1740}); err != nil {
+		t.Fatal(err)
+	}
+
+	// Both models flag the same file:line under the same lens.
+	ids, err := s.AddReports([]ReportIn{
+		{Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "SetIteration never called", Model: "minimax", Provider: "ollama-cloud", RunID: "r-cloud", RawSeverity: "Blocking"},
+		{Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "iteration counter dead", Model: "qwen3", Provider: "m1", RunID: "r-m1", RawSeverity: "Blocking"},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if ids[0] != ids[1] {
+		t.Fatalf("same location should collapse to one finding id, got %q and %q", ids[0], ids[1])
+	}
+
+	if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high", Usefulness: intp(4), Grader: "claude"}); err != nil {
+		t.Fatal(err)
+	}
+
+	board, err := s.Scoreboard()
+	if err != nil {
+		t.Fatal(err)
+	}
+	byModel := map[string]ModelStat{}
+	for _, m := range board {
+		byModel[m.Model] = m
+	}
+	for _, name := range []string{"minimax", "qwen3"} {
+		m := byModel[name]
+		if m.Findings != 1 || m.Confirmed != 1 || m.BySeverity["high"] != 1 {
+			t.Errorf("%s: findings=%d confirmed=%d high=%d, want 1/1/1", name, m.Findings, m.Confirmed, m.BySeverity["high"])
+		}
+	}
+	if got := byModel["minimax"].Minutes; got != 5 {
+		t.Errorf("minimax minutes = %v, want 5", got)
+	}
+	if got := byModel["qwen3"].Minutes; got != 29 {
+		t.Errorf("qwen3 minutes = %v, want 29", got)
+	}
+	if got := byModel["minimax"].InputTokens; got != 1000 {
+		t.Errorf("minimax input_tokens = %d, want 1000", got)
+	}
+}
+
+// TestLatestGradeWins: a re-grade supersedes the prior one everywhere.
+func TestLatestGradeWins(t *testing.T) {
+	s := testStore(t)
+	if err := s.AddRun(Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60}); err != nil {
+		t.Fatal(err)
+	}
+	ids, err := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 5, Title: "x", Model: "m", Provider: "p", RunID: "r1"}})
+	if err != nil {
+		t.Fatal(err)
+	}
+	id := ids[0]
+	if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "critical"}); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive
+		t.Fatal(err)
+	}
+	board, _ := s.Scoreboard()
+	m := board[0]
+	if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 {
+		t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"])
+	}
+}
+
+// TestGradeValidation rejects bad severity / usefulness / unknown finding.
+func TestGradeValidation(t *testing.T) {
+	s := testStore(t)
+	ids, _ := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "perf", File: "a.go", Line: 1, Title: "t", Model: "m", Provider: "p", RunID: "r1"}})
+	id := ids[0]
+
+	if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "huge"}); err == nil {
+		t.Error("expected error for invalid severity")
+	}
+	if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "high", Usefulness: intp(9)}); err == nil {
+		t.Error("expected error for out-of-range usefulness")
+	}
+	if err := s.AddGrade(Grade{FindingID: "nope", IsReal: true, Severity: "high"}); err == nil {
+		t.Error("expected error for unknown finding")
+	}
+	// A false positive needs no severity.
+	if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil {
+		t.Errorf("false positive without severity should be valid: %v", err)
+	}
+}
+
+// TestFindingIDLocationKeyed: id depends on location, not wording; line matters.
+func TestFindingIDLocationKeyed(t *testing.T) {
+	a := findingID("r", 1, "security", "a.go", 10)
+	sameWordingDiff := findingID("r", 1, "security", "a.go", 10) // any title — id ignores it
+	if a != sameWordingDiff {
+		t.Error("same location must yield same id regardless of wording")
+	}
+	if a == findingID("r", 1, "security", "a.go", 11) {
+		t.Error("different line must yield different id")
+	}
+	if a == findingID("r", 1, "correctness", "a.go", 10) {
+		t.Error("different lens must yield different id")
+	}
+}