feat: gadfly-reports — findings store + scoreboard daemon

SQLite-backed HTTP store for Gadfly review findings, per-review run timings, and human/Claude grades, with a points-free per-model scoreboard. Pure fact store: it computes no points or rankings (the dashboard maps severity->points client-side and retunes without re-scoring). Findings are content-addressed by location so cross-model reports collapse for consensus; one grade per finding, latest wins. Pure-Go SQLite (CGO-free) + Docker image CI + tests. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 23:55:24 -04:00
parent 52dce5eb2f
commit ddcf42a3ce
16 changed files with 1269 additions and 27 deletions
@@ -0,0 +1,10 @@
 .git
 .gitea
 *.db
 *.db-wal
 *.db-shm
 /data
 gadfly-reports
 README.md
 CLAUDE.md
 .env*
@@ -0,0 +1,11 @@
 # === gadfly-reports daemon configuration ===
 # Listen address (default: :8090)
 GADFLY_REPORTS_ADDR=:8090
 # SQLite database path (default: gadfly-reports.db; /data/gadfly-reports.db in Docker)
 GADFLY_REPORTS_DB=/data/gadfly-reports.db
 # Bearer token callers must present on every route except /healthz (empty = open).
 # gadfly (emit) and gadfly-mcp must present the same token.
 GADFLY_REPORTS_TOKEN=change-me-to-a-secret
@@ -0,0 +1,69 @@
 name: Build & push image
 # Builds the gadfly-reports daemon image and pushes it to the Gitea container
 # registry so it's easy to self-host.
 #
 #   push to main   -> :latest + :sha-<short>
 #   push tag v*    -> :<tag> + :latest
 #
 # Required repo secrets: REGISTRY_USER / REGISTRY_PASSWORD (registry push). The
 # Go build uses only PUBLIC modules, so no private-module creds are needed.
 on:
  push:
    branches: [main]
    tags: ["v*"]
    paths-ignore:
      - "**.md"
      - "LICENSE"
      - ".gitignore"
      - ".env.example"
  workflow_dispatch: {}
 concurrency:
  group: gadfly-reports-image-${{ github.ref }}
  cancel-in-progress: true
 env:
  IMAGE_NAME: gitea.stevedudenhoeffer.com/steve/gadfly-reports
 jobs:
  build-and-push:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4
      - name: Set up Docker Buildx
        run: docker buildx create --use --name gr-builder --driver docker-container 2>/dev/null || docker buildx use gr-builder
      - name: Log in to the registry
        env:
          REGISTRY_USER: ${{ secrets.REGISTRY_USER }}
          REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
        run: echo "${REGISTRY_PASSWORD}" | docker login gitea.stevedudenhoeffer.com -u "${REGISTRY_USER}" --password-stdin
      - name: Compute tags
        id: meta
        run: |
          SHA_SHORT=$(echo "${GITHUB_SHA}" | cut -c1-7)
          if [ "${{ github.ref_type }}" = "tag" ]; then
            TAGS="${IMAGE_NAME}:${GITHUB_REF_NAME},${IMAGE_NAME}:latest"
          else
            TAGS="${IMAGE_NAME}:latest,${IMAGE_NAME}:sha-${SHA_SHORT}"
          fi
          echo "tags=${TAGS}" >> "$GITHUB_OUTPUT"
          echo "Tags: ${TAGS}"
      - name: Build and push
        run: |
          TAG_FLAGS=""
          IFS=',' read -ra TAG_ARRAY <<< "${{ steps.meta.outputs.tags }}"
          for t in "${TAG_ARRAY[@]}"; do TAG_FLAGS="$TAG_FLAGS --tag $t"; done
          docker buildx build \
            --push \
            --platform linux/amd64 \
            $TAG_FLAGS \
            --add-host gitea.stevedudenhoeffer.com:192.168.0.134 \
            --file ./Dockerfile \
            .
@@ -0,0 +1,26 @@
 name: CI
 on:
  push:
    branches: [main]
  pull_request:
    types: [opened, synchronize, reopened]
  workflow_dispatch: {}
 jobs:
  test:
    runs-on: ubuntu-latest
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version: "1.26"
      - name: Build
        run: go build ./...
      - name: Vet
        run: go vet ./...
      - name: gofmt
        run: test -z "$(gofmt -l .)" || { gofmt -l .; echo "gofmt needed"; exit 1; }
      - name: Test (race)
        run: go test -race ./...
@@ -1,27 +1,9 @@
-# ---> Go
+# build output
-# If you prefer the allow list template instead of the deny list, see community template:
+/gadfly-reports
-# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+# local SQLite databases
-#
+*.db
-# Binaries for programs and plugins
+*.db-wal
-*.exe
+*.db-shm
-*.exe~
+/data/
-*.dll
+# local env
 *.so
 *.dylib
 # Test binary, built with `go test -c`
 *.test
 # Output of the go coverage tool, specifically when used with LiteIDE
 *.out
 # Dependency directories (remove the comment below to include it)
 # vendor/
 # Go workspace file
 go.work
 go.work.sum
 # env file
 .env
@@ -0,0 +1,68 @@
 # gadfly-reports — Developer Guide
 A small Go + SQLite HTTP daemon that stores [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)
 review findings, the per-review run timings, and human/Claude grades — and serves a points-free
 per-model scoreboard. The companion MCP client is
 [gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp).
 > This is a public, **vibe-coded** project (built largely by an AI agent). Keep that framing honest
 > in the README; don't oversell it — it's a homelab-grade store, not a hardened product.
 ## Core principle: store raw facts, score on the client
 gadfly-reports records **only facts**: runs (timing/tokens), findings (content-addressed by
 location), reports (which model raised which finding), and grades (`is_real` + `severity` +
 `usefulness`). It **never stores points or computes rankings**. Mapping `severity → points` and any
 "value per minute / per token" ranking is the dashboard's job. This is deliberate — keep it that way:
 do not add a points column or a weighting config to the store. Retuning the curve must never require
 a migration or a re-score.
 The severity vocabulary (`trivial|small|medium|high|critical`) in `store.go` is the **only**
 scoring-adjacent contract, and it's a closed set validated on write.
 ## Architecture
 ```
 main.go     subcommand dispatch (serve) + flags/env
 store.go    SQLite schema + types + queries (runs/findings/reports/grades + latest_grades view)
 server.go   net/http API (ServeMux method+path routes) + optional bearer auth
 *_test.go   store + server end-to-end tests (consensus, latest-grade-wins, validation, auth)
 Dockerfile  CGO-free build (pure-Go modernc sqlite) -> small alpine image
 .gitea/workflows/  ci.yml (build/vet/test) + build-image.yml (publish :latest + :sha-<short>)
 ```
 **Data model.** A **finding** is identified by `sha256(repo|pr|lens|file|line)[:16]` — *not* by
 wording — so the same issue from different models (or a re-review) collapses to one finding with many
 **reports**. One **grade** per finding (history kept, latest wins via the `latest_grades` view).
 ## Dependencies
 - **modernc.org/sqlite** (pure Go) — chosen so the binary is CGO-free and `go run …@latest`/the
  Docker build need no C toolchain. Don't swap in a cgo driver.
 - Otherwise stdlib only. The MCP SDK lives in gadfly-mcp, **not** here — keep this daemon lean.
 ## Build / test
 ```sh
 go build ./...
 go vet ./...
 gofmt -l .        # must be empty
 go test -race ./...
 ```
 ## Release / deploy
 - **Push to `main`** → CI builds and publishes `:latest` (+ `:sha-<short>`) to
  `gitea.stevedudenhoeffer.com/steve/gadfly-reports`.
 - **Tag `v*`** → publishes `:<tag>` (+ `:latest`).
 - CI needs repo secrets `REGISTRY_USER` / `REGISTRY_PASSWORD` to push the image (the Go build itself
  uses only public modules — no private-module creds needed).
 ## When making changes
 - Keep the **README API table** in sync with `server.go` routes and `store.go` JSON tags — it is the
  contract gadfly (emit) and gadfly-mcp rely on. Stale docs are a bug.
 - Preserve the **store-no-points** principle (see above).
 - Add a test when you add logic. Keep `gofmt` clean and `go vet` quiet.
 - The schema uses `CREATE TABLE IF NOT EXISTS` migrations applied on `Open`; additive changes are
  fine, destructive ones need a real migration story (there isn't one yet — it's a homelab store).
@@ -0,0 +1,19 @@
 # gadfly-reports daemon image. modernc.org/sqlite is pure Go, so the binary is
 # CGO-free and the final image needs no libc / no C toolchain at build time.
 FROM golang:1.26 AS build
 WORKDIR /src
 COPY go.mod go.sum ./
 RUN go mod download
 COPY . .
 RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/gadfly-reports .
 FROM alpine:3.20
 RUN adduser -D -u 10001 app && mkdir -p /data && chown app /data
 COPY --from=build /out/gadfly-reports /usr/local/bin/gadfly-reports
 USER app
 ENV GADFLY_REPORTS_ADDR=:8090 \
    GADFLY_REPORTS_DB=/data/gadfly-reports.db
 EXPOSE 8090
 VOLUME ["/data"]
 ENTRYPOINT ["/usr/local/bin/gadfly-reports"]
 CMD ["serve"]
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2026 Steve Dudenhoeffer
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
@@ -1,2 +1,96 @@
-# gadfly-reports
+# 🪰📋 gadfly-reports
 A small **durable store + scoreboard** for [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)
 review findings. Gadfly (and any CI) POST each model's findings and per-review timing here; a human
 or Claude — via [gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp) — later grades
 each finding. It's a single Go binary backed by SQLite, speaking a tiny HTTP API.
 > ### 🤖 Heads up: this is a vibe-coded project
 > gadfly-reports was built almost entirely by an AI agent (Claude Code) — the design, the code, and
 > these docs. It's small and it's tested, but treat it accordingly: it's a homelab-grade service,
 > not a hardened product, and there may be the occasional AI-flavored rough edge. Issues and PRs
 > welcome.
 ## What it stores — and what it deliberately doesn't
 gadfly-reports is a **pure fact store**:
 - **runs** — one per model's review of a PR: wall-clock duration, lens count, optional token/cost.
 - **findings** — **content-addressed by location** (`repo + pr + lens + file + line`), so the *same*
  issue raised by several models collapses to one finding with many **reports**. That collapse is
  what makes cross-model **consensus** and per-model **precision** measurable.
 - **grades** — a triage verdict per finding: `is_real`, `severity`
  (`trivial|small|medium|high|critical`), optional `usefulness` (1–5), notes, grader. Grade history
  is kept; the latest wins.
 It stores **no points and computes no rankings.** Mapping severity → points and ranking models by
 "value per minute" (or per token) is a **client/dashboard concern**, so you can retune the curve any
 time without migrating or re-scoring stored data.
 ## Run it
 ```sh
 # from source
 go run gitea.stevedudenhoeffer.com/steve/gadfly-reports@latest serve
 # or Docker (image published by CI on every push to main)
 docker run -d --name gadfly-reports -p 8090:8090 -v gadfly-reports-data:/data \
  -e GADFLY_REPORTS_TOKEN=change-me \
  gitea.stevedudenhoeffer.com/steve/gadfly-reports:latest
 ```
 ## HTTP API (the canonical contract)
 | Method & path | Body / query | Purpose |
 |---|---|---|
 | `GET /healthz` | — | liveness (open even when a token is set) |
 | `POST /runs` | one run object | upsert a model's review of a PR (timing/tokens) |
 | `POST /reports` | JSON **array** of report objects | record findings + which model reported each |
 | `POST /findings/{id}/grade` | `{is_real, severity?, usefulness?, notes?, grader?}` | record a triage grade |
 | `GET /export` | — | flat report×finding×run×latest-grade rows — the dashboard feed |
 | `GET /scoreboard` | — | points-free per-model rollup |
 `POST /runs` body: `{run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens?, output_tokens?, cost_usd?}`
 (re-posting the same `run_id` updates it).
 `POST /reports` array element: `{repo, pr, lens, file, line, title, model, provider, run_id, raw_severity, detail}`.
 `GET /scoreboard` element: `{model, provider, runs, minutes, input_tokens, output_tokens, findings, confirmed, false_positive, ungraded, by_severity:{severity:count}}`.
 If `GADFLY_REPORTS_TOKEN` is set, every route except `/healthz` requires `Authorization: Bearer <token>`.
 ## Configuration
 | Env | Default | Meaning |
 |-----|---------|---------|
 | `GADFLY_REPORTS_ADDR` | `:8090` | listen address |
 | `GADFLY_REPORTS_DB` | `gadfly-reports.db` (`/data/gadfly-reports.db` in Docker) | SQLite path |
 | `GADFLY_REPORTS_TOKEN` | *(empty)* | bearer token callers must present (empty = open) |
 CLI flags `--addr` / `--db` / `--token` override the env.
 ## Dashboards
 Point anything at the JSON endpoints (or the SQLite file read-only). `GET /export` is the flat feed;
 `GET /scoreboard` is the per-model rollup. Compute points and value-per-minute **in the dashboard**,
 e.g. with a curve like `trivial=1, small=3, medium=5, high=8, critical=20` →
 `points = Σ weight[severity]·by_severity[severity]`, `value/min = points / minutes`.
 ## How it fits together
 - **[gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)** POSTs findings here after each
  review when `GADFLY_FINDINGS_URL` points at this store (advisory; off by default).
 - **[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp)** is the MCP server Claude
  uses to list findings and record grades against this store.
 ## Build / test
 ```sh
 go build ./...
 go test ./...
 gofmt -l .   # must be clean
 ```
 ## License
 MIT © 2026 Steve Dudenhoeffer.
@@ -0,0 +1,17 @@
 module gitea.stevedudenhoeffer.com/steve/gadfly-reports
 go 1.26
 require modernc.org/sqlite v1.53.0
 require (
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/ncruces/go-strftime v1.0.0 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	golang.org/x/sys v0.44.0 // indirect
 	modernc.org/libc v1.73.4 // indirect
 	modernc.org/mathutil v1.7.1 // indirect
 	modernc.org/memory v1.11.0 // indirect
 )
@@ -0,0 +1,51 @@
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
 github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
 github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4=
 golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ=
 golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
 golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
 golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
 golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
 golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
 modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c=
 modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
 modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws=
 modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE=
 modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
 modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
 modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
 modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
 modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc=
 modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
 modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
 modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
 modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA=
 modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8=
 modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
 modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
 modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
 modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
 modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
 modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
 modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
 modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
 modernc.org/sqlite v1.53.0 h1:20WG8N9q4ji/dEqGk4uiI0c6OPjSeLTNYGFCc3+7c1M=
 modernc.org/sqlite v1.53.0/go.mod h1:xoEpOIpGrgT48H5iiyt/YXPCZPEzlfmfFwtk8Lklw8s=
 modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
 modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
 modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
 modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
@@ -0,0 +1,74 @@
 // Command gadfly-reports is a small, durable store + scoreboard for Gadfly's review
 // findings. Gadfly (and CI) report each model's findings and per-review timing
 // here; a human or Claude later grades each finding (is_real + severity +
 // usefulness). gadfly-reports stores only those RAW facts — it deliberately does NOT
 // compute points or rankings, so the dashboard/client owns the scoring curve
 // (severity -> points, value-per-minute, value-per-token) and can retune it
 // without migrating or re-scoring stored data.
 //
 // Subcommands:
 //
 //	gadfly-reports serve [flags]   run the HTTP + SQLite store (the long-running daemon)
 //
 // The MCP server Claude calls to record grades lives in ./cmd/mcp, so the daemon
 // stays lean; both are launchable with `go run <module>[/cmd/mcp]@latest`.
 package main
 import (
 	"flag"
 	"fmt"
 	"log"
 	"net/http"
 	"os"
 )
 func main() {
 	if len(os.Args) < 2 {
 		usage()
 		os.Exit(2)
 	}
 	switch os.Args[1] {
 	case "serve":
 		serveCmd(os.Args[2:])
 	default:
 		usage()
 		os.Exit(2)
 	}
 }
 func usage() {
 	fmt.Fprint(os.Stderr, `gadfly-reports — durable store + scoreboard for Gadfly review findings
 Usage:
  gadfly-reports serve [flags]     run the HTTP + SQLite store
 Run "gadfly-reports serve -h" for flags.
 `)
 }
 func serveCmd(args []string) {
 	fs := flag.NewFlagSet("serve", flag.ExitOnError)
 	addr := fs.String("addr", envOr("GADFLY_REPORTS_ADDR", ":8090"), "listen address")
 	dbPath := fs.String("db", envOr("GADFLY_REPORTS_DB", "gadfly-reports.db"), "SQLite database path")
 	token := fs.String("token", os.Getenv("GADFLY_REPORTS_TOKEN"), "bearer token callers must present (empty = open)")
 	_ = fs.Parse(args)
 	store, err := Open(*dbPath)
 	if err != nil {
 		log.Fatalf("gadfly-reports: %v", err)
 	}
 	defer store.Close()
 	log.Printf("gadfly-reports: serving %s (db=%s, auth=%v)", *addr, *dbPath, *token != "")
 	srv := &http.Server{Addr: *addr, Handler: newServer(store, *token)}
 	if err := srv.ListenAndServe(); err != nil {
 		log.Fatalf("gadfly-reports: %v", err)
 	}
 }
 func envOr(key, def string) string {
 	if v := os.Getenv(key); v != "" {
 		return v
 	}
 	return def
 }
@@ -0,0 +1,121 @@
 package main
 import (
 	"encoding/json"
 	"errors"
 	"log"
 	"net/http"
 	"strings"
 )
 // newServer wires the store to the HTTP API. If token is non-empty, every route
 // except /healthz requires "Authorization: Bearer <token>".
 //
 // Routes:
 //
 //	GET  /healthz                  liveness
 //	POST /runs                     upsert one run (model review of a PR; timing/tokens)
 //	POST /reports                  record a batch of findings + this model's reports
 //	POST /findings/{id}/grade      record a triage grade (is_real, severity, …)
 //	GET  /export                   flat report×finding×grade rows (the dashboard feed)
 //	GET  /scoreboard               points-free per-model rollup
 func newServer(store *Store, token string) http.Handler {
 	mux := http.NewServeMux()
 	mux.HandleFunc("GET /healthz", func(w http.ResponseWriter, _ *http.Request) {
 		writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
 	})
 	mux.HandleFunc("POST /runs", func(w http.ResponseWriter, r *http.Request) {
 		var run Run
 		if !decode(w, r, &run) {
 			return
 		}
 		if err := store.AddRun(run); err != nil {
 			writeErr(w, http.StatusBadRequest, err)
 			return
 		}
 		writeJSON(w, http.StatusOK, map[string]string{"run_id": run.RunID})
 	})
 	mux.HandleFunc("POST /reports", func(w http.ResponseWriter, r *http.Request) {
 		var reps []ReportIn
 		if !decode(w, r, &reps) {
 			return
 		}
 		ids, err := store.AddReports(reps)
 		if err != nil {
 			writeErr(w, http.StatusBadRequest, err)
 			return
 		}
 		writeJSON(w, http.StatusOK, map[string]any{"finding_ids": ids})
 	})
 	mux.HandleFunc("POST /findings/{id}/grade", func(w http.ResponseWriter, r *http.Request) {
 		var g Grade
 		if !decode(w, r, &g) {
 			return
 		}
 		g.FindingID = r.PathValue("id")
 		if err := store.AddGrade(g); err != nil {
 			writeErr(w, http.StatusBadRequest, err)
 			return
 		}
 		writeJSON(w, http.StatusOK, map[string]string{"finding_id": g.FindingID})
 	})
 	mux.HandleFunc("GET /export", func(w http.ResponseWriter, _ *http.Request) {
 		rows, err := store.Export()
 		if err != nil {
 			writeErr(w, http.StatusInternalServerError, err)
 			return
 		}
 		writeJSON(w, http.StatusOK, rows)
 	})
 	mux.HandleFunc("GET /scoreboard", func(w http.ResponseWriter, _ *http.Request) {
 		stats, err := store.Scoreboard()
 		if err != nil {
 			writeErr(w, http.StatusInternalServerError, err)
 			return
 		}
 		writeJSON(w, http.StatusOK, stats)
 	})
 	return auth(token, mux)
 }
 // auth gates everything but /healthz behind a bearer token, when one is set.
 func auth(token string, next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if token != "" && r.URL.Path != "/healthz" {
 			got := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
 			if strings.TrimSpace(got) != token {
 				writeErr(w, http.StatusUnauthorized, errors.New("missing or invalid bearer token"))
 				return
 			}
 		}
 		next.ServeHTTP(w, r)
 	})
 }
 // decode reads a JSON body into v, writing a 400 and returning false on failure.
 func decode(w http.ResponseWriter, r *http.Request, v any) bool {
 	if err := json.NewDecoder(r.Body).Decode(v); err != nil {
 		writeErr(w, http.StatusBadRequest, errors.New("invalid JSON body: "+err.Error()))
 		return false
 	}
 	return true
 }
 func writeJSON(w http.ResponseWriter, code int, v any) {
 	w.Header().Set("Content-Type", "application/json")
 	w.WriteHeader(code)
 	if err := json.NewEncoder(w).Encode(v); err != nil {
 		log.Printf("gadfly-reports: write response: %v", err)
 	}
 }
 func writeErr(w http.ResponseWriter, code int, err error) {
 	writeJSON(w, code, map[string]string{"error": err.Error()})
 }
@@ -0,0 +1,100 @@
 package main
 import (
 	"bytes"
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"path/filepath"
 	"testing"
 )
 func testServer(t *testing.T, token string) *httptest.Server {
 	t.Helper()
 	store, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db"))
 	if err != nil {
 		t.Fatal(err)
 	}
 	t.Cleanup(func() { store.Close() })
 	srv := httptest.NewServer(newServer(store, token))
 	t.Cleanup(srv.Close)
 	return srv
 }
 func post(t *testing.T, srv *httptest.Server, token, path string, body any) *http.Response {
 	t.Helper()
 	b, _ := json.Marshal(body)
 	req, _ := http.NewRequest("POST", srv.URL+path, bytes.NewReader(b))
 	req.Header.Set("Content-Type", "application/json")
 	if token != "" {
 		req.Header.Set("Authorization", "Bearer "+token)
 	}
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		t.Fatalf("POST %s: %v", path, err)
 	}
 	return resp
 }
 // TestServerEndToEnd: run -> reports -> grade -> scoreboard over HTTP.
 func TestServerEndToEnd(t *testing.T) {
 	srv := testServer(t, "")
 	if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 120}); resp.StatusCode != 200 {
 		t.Fatalf("POST /runs = %d", resp.StatusCode)
 	}
 	resp := post(t, srv, "", "/reports", []ReportIn{
 		{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 7, Title: "leak", Model: "m", Provider: "p", RunID: "r1"},
 	})
 	if resp.StatusCode != 200 {
 		t.Fatalf("POST /reports = %d", resp.StatusCode)
 	}
 	var rep struct {
 		FindingIDs []string `json:"finding_ids"`
 	}
 	json.NewDecoder(resp.Body).Decode(&rep)
 	if len(rep.FindingIDs) != 1 {
 		t.Fatalf("want 1 finding id, got %v", rep.FindingIDs)
 	}
 	id := rep.FindingIDs[0]
 	if resp := post(t, srv, "", "/findings/"+id+"/grade", Grade{IsReal: true, Severity: "medium", Grader: "claude"}); resp.StatusCode != 200 {
 		t.Fatalf("POST grade = %d", resp.StatusCode)
 	}
 	resp = mustGet(t, srv, "", "/scoreboard")
 	var board []ModelStat
 	json.NewDecoder(resp.Body).Decode(&board)
 	if len(board) != 1 || board[0].Confirmed != 1 || board[0].BySeverity["medium"] != 1 || board[0].Minutes != 2 {
 		t.Fatalf("unexpected scoreboard: %+v", board)
 	}
 }
 // TestServerAuth: a set token gates writes but leaves /healthz open.
 func TestServerAuth(t *testing.T) {
 	srv := testServer(t, "secret")
 	if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != http.StatusUnauthorized {
 		t.Errorf("unauthenticated POST = %d, want 401", resp.StatusCode)
 	}
 	if resp := post(t, srv, "secret", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != 200 {
 		t.Errorf("authenticated POST = %d, want 200", resp.StatusCode)
 	}
 	if resp := mustGet(t, srv, "", "/healthz"); resp.StatusCode != 200 {
 		t.Errorf("healthz should be open, got %d", resp.StatusCode)
 	}
 }
 func mustGet(t *testing.T, srv *httptest.Server, token, path string) *http.Response {
 	t.Helper()
 	req, _ := http.NewRequest("GET", srv.URL+path, nil)
 	if token != "" {
 		req.Header.Set("Authorization", "Bearer "+token)
 	}
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		t.Fatalf("GET %s: %v", path, err)
 	}
 	return resp
 }
@@ -0,0 +1,447 @@
 package main
 import (
 	"crypto/sha256"
 	"database/sql"
 	"encoding/hex"
 	"fmt"
 	"sort"
 	"strings"
 	"time"
 	_ "modernc.org/sqlite"
 )
 // gadfly-reports stores only RAW review facts: which model reported which finding, how
 // long each model's review took, and a human/Claude grade (is_real + severity +
 // usefulness). It deliberately does NOT compute points or rankings — the
 // dashboard owns the scoring curve (severity -> points, value-per-minute), so it
 // can be retuned without re-scoring or migrating stored data. The severity
 // vocabulary below is the only scoring-related contract.
 // validSeverities is the closed set a grade may assign to a REAL finding. The
 // client maps these to points however it likes (e.g. trivial=1 … critical=20).
 var validSeverities = map[string]bool{
 	"trivial":  true,
 	"small":    true,
 	"medium":   true,
 	"high":     true,
 	"critical": true,
 }
 const schema = `
 CREATE TABLE IF NOT EXISTS runs (
  run_id        TEXT PRIMARY KEY,
  repo          TEXT NOT NULL,
  pr            INTEGER NOT NULL,
  model         TEXT NOT NULL,
  provider      TEXT NOT NULL,
  lenses        INTEGER NOT NULL DEFAULT 0,
  duration_secs REAL    NOT NULL DEFAULT 0,
  input_tokens  INTEGER,
  output_tokens INTEGER,
  cost_usd      REAL,
  created_at    TEXT    NOT NULL
 );
 CREATE TABLE IF NOT EXISTS findings (
  id         TEXT PRIMARY KEY,
  repo       TEXT NOT NULL,
  pr         INTEGER NOT NULL,
  lens       TEXT NOT NULL,
  file       TEXT,
  line       INTEGER,
  title      TEXT NOT NULL,
  first_seen TEXT NOT NULL
 );
 CREATE TABLE IF NOT EXISTS reports (
  id           INTEGER PRIMARY KEY AUTOINCREMENT,
  finding_id   TEXT NOT NULL,
  run_id       TEXT NOT NULL,
  model        TEXT NOT NULL,
  provider     TEXT NOT NULL,
  raw_severity TEXT,
  detail       TEXT,
  created_at   TEXT NOT NULL,
  UNIQUE(finding_id, run_id)
 );
 CREATE INDEX IF NOT EXISTS idx_reports_finding ON reports(finding_id);
 CREATE INDEX IF NOT EXISTS idx_reports_model   ON reports(model);
 CREATE TABLE IF NOT EXISTS grades (
  id         INTEGER PRIMARY KEY AUTOINCREMENT,
  finding_id TEXT NOT NULL,
  is_real    INTEGER NOT NULL,
  severity   TEXT,
  usefulness INTEGER,
  notes      TEXT,
  grader     TEXT,
  created_at TEXT NOT NULL
 );
 CREATE INDEX IF NOT EXISTS idx_grades_finding ON grades(finding_id);
 -- latest_grades: the most recent grade per finding (grade history is kept; the
 -- latest wins). Used by every read path so a re-grade supersedes the old one.
 CREATE VIEW IF NOT EXISTS latest_grades AS
 SELECT g.* FROM grades g
 JOIN (SELECT finding_id, MAX(id) AS max_id FROM grades GROUP BY finding_id) m
  ON g.id = m.max_id;
 `
 // Store is the SQLite-backed fact store.
 type Store struct{ db *sql.DB }
 // Open opens (creating if needed) the SQLite database at path and applies the
 // schema. WAL + a busy timeout keep the single-writer daemon honest under the
 // occasional concurrent reader.
 func Open(path string) (*Store, error) {
 	db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)&_pragma=foreign_keys(on)")
 	if err != nil {
 		return nil, fmt.Errorf("open %s: %w", path, err)
 	}
 	// modernc's pure-Go driver is happiest with a single writer connection.
 	db.SetMaxOpenConns(1)
 	if _, err := db.Exec(schema); err != nil {
 		db.Close()
 		return nil, fmt.Errorf("migrate: %w", err)
 	}
 	return &Store{db: db}, nil
 }
 func (s *Store) Close() error { return s.db.Close() }
 func now() string { return time.Now().UTC().Format(time.RFC3339) }
 // findingID content-addresses a finding by location, NOT by wording, so the same
 // issue raised by different models (or re-raised on a re-review) collapses to one
 // finding with many reports — that collapse is what makes cross-model consensus
 // and per-model precision measurable. Title is intentionally excluded.
 func findingID(repo string, pr int, lens, file string, line int) string {
 	key := fmt.Sprintf("%s|%d|%s|%s|%d",
 		strings.TrimSpace(repo), pr, strings.ToLower(strings.TrimSpace(lens)),
 		strings.TrimSpace(file), line)
 	sum := sha256.Sum256([]byte(key))
 	return hex.EncodeToString(sum[:])[:16]
 }
 // Run is one model's review of one PR — the unit run.sh times.
 type Run struct {
 	RunID        string   `json:"run_id"`
 	Repo         string   `json:"repo"`
 	PR           int      `json:"pr"`
 	Model        string   `json:"model"`
 	Provider     string   `json:"provider"`
 	Lenses       int      `json:"lenses"`
 	DurationSecs float64  `json:"duration_secs"`
 	InputTokens  *int64   `json:"input_tokens,omitempty"`
 	OutputTokens *int64   `json:"output_tokens,omitempty"`
 	CostUSD      *float64 `json:"cost_usd,omitempty"`
 }
 // AddRun upserts a run by run_id (a re-posted run overwrites timing/tokens).
 func (s *Store) AddRun(r Run) error {
 	if strings.TrimSpace(r.RunID) == "" || strings.TrimSpace(r.Model) == "" {
 		return fmt.Errorf("run_id and model are required")
 	}
 	_, err := s.db.Exec(`
 INSERT INTO runs (run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens, output_tokens, cost_usd, created_at)
 VALUES (?,?,?,?,?,?,?,?,?,?,?)
 ON CONFLICT(run_id) DO UPDATE SET
  repo=excluded.repo, pr=excluded.pr, model=excluded.model, provider=excluded.provider,
  lenses=excluded.lenses, duration_secs=excluded.duration_secs,
  input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens, cost_usd=excluded.cost_usd`,
 		r.RunID, r.Repo, r.PR, r.Model, r.Provider, r.Lenses, r.DurationSecs,
 		r.InputTokens, r.OutputTokens, r.CostUSD, now())
 	return err
 }
 // ReportIn is one finding as a single model reported it.
 type ReportIn struct {
 	Repo        string `json:"repo"`
 	PR          int    `json:"pr"`
 	Lens        string `json:"lens"`
 	File        string `json:"file"`
 	Line        int    `json:"line"`
 	Title       string `json:"title"`
 	Model       string `json:"model"`
 	Provider    string `json:"provider"`
 	RunID       string `json:"run_id"`
 	RawSeverity string `json:"raw_severity"`
 	Detail      string `json:"detail"`
 }
 // AddReports records a batch of findings: each upserts its (content-addressed)
 // finding row and adds this model's report of it. Returns the finding id per
 // input (same order). A model re-reporting the same finding in the same run is a
 // no-op (UNIQUE finding_id,run_id).
 func (s *Store) AddReports(in []ReportIn) ([]string, error) {
 	tx, err := s.db.Begin()
 	if err != nil {
 		return nil, err
 	}
 	defer tx.Rollback()
 	ts := now()
 	ids := make([]string, len(in))
 	for i, r := range in {
 		if strings.TrimSpace(r.Title) == "" || strings.TrimSpace(r.Lens) == "" {
 			return nil, fmt.Errorf("report %d: lens and title are required", i)
 		}
 		id := findingID(r.Repo, r.PR, r.Lens, r.File, r.Line)
 		ids[i] = id
 		if _, err := tx.Exec(`
 INSERT INTO findings (id, repo, pr, lens, file, line, title, first_seen)
 VALUES (?,?,?,?,?,?,?,?) ON CONFLICT(id) DO NOTHING`,
 			id, r.Repo, r.PR, strings.ToLower(strings.TrimSpace(r.Lens)), r.File, r.Line, r.Title, ts); err != nil {
 			return nil, err
 		}
 		if _, err := tx.Exec(`
 INSERT INTO reports (finding_id, run_id, model, provider, raw_severity, detail, created_at)
 VALUES (?,?,?,?,?,?,?) ON CONFLICT(finding_id, run_id) DO NOTHING`,
 			id, r.RunID, r.Model, r.Provider, r.RawSeverity, r.Detail, ts); err != nil {
 			return nil, err
 		}
 	}
 	return ids, tx.Commit()
 }
 // Grade is a triage verdict on a finding. Severity is required when is_real and
 // must be one of validSeverities; it is cleared when !is_real. No points here —
 // the client maps severity -> points.
 type Grade struct {
 	FindingID  string `json:"finding_id"`
 	IsReal     bool   `json:"is_real"`
 	Severity   string `json:"severity,omitempty"`
 	Usefulness *int   `json:"usefulness,omitempty"`
 	Notes      string `json:"notes,omitempty"`
 	Grader     string `json:"grader,omitempty"`
 }
 // AddGrade appends a grade (history is kept; latest wins).
 func (s *Store) AddGrade(g Grade) error {
 	if strings.TrimSpace(g.FindingID) == "" {
 		return fmt.Errorf("finding_id is required")
 	}
 	var exists bool
 	if err := s.db.QueryRow(`SELECT EXISTS(SELECT 1 FROM findings WHERE id=?)`, g.FindingID).Scan(&exists); err != nil {
 		return err
 	}
 	if !exists {
 		return fmt.Errorf("unknown finding_id %q", g.FindingID)
 	}
 	sev := strings.ToLower(strings.TrimSpace(g.Severity))
 	if g.IsReal {
 		if !validSeverities[sev] {
 			return fmt.Errorf("severity %q invalid for a real finding (want one of: %s)", g.Severity, strings.Join(sortedSeverities(), ", "))
 		}
 	} else {
 		sev = "" // a false positive carries no severity
 	}
 	if g.Usefulness != nil && (*g.Usefulness < 1 || *g.Usefulness > 5) {
 		return fmt.Errorf("usefulness must be 1..5, got %d", *g.Usefulness)
 	}
 	_, err := s.db.Exec(`
 INSERT INTO grades (finding_id, is_real, severity, usefulness, notes, grader, created_at)
 VALUES (?,?,?,?,?,?,?)`,
 		g.FindingID, g.IsReal, nullStr(sev), g.Usefulness, nullStr(g.Notes), nullStr(g.Grader), now())
 	return err
 }
 // ExportRow is one report joined with its finding, run timing, and latest grade
 // — the flat shape a dashboard consumes. Grade fields are nil/empty until graded.
 type ExportRow struct {
 	FindingID    string  `json:"finding_id"`
 	Repo         string  `json:"repo"`
 	PR           int     `json:"pr"`
 	Lens         string  `json:"lens"`
 	File         string  `json:"file,omitempty"`
 	Line         int     `json:"line,omitempty"`
 	Title        string  `json:"title"`
 	Model        string  `json:"model"`
 	Provider     string  `json:"provider,omitempty"`
 	RunID        string  `json:"run_id"`
 	RawSeverity  string  `json:"raw_severity,omitempty"`
 	ReportedAt   string  `json:"reported_at"`
 	DurationSecs float64 `json:"duration_secs"`
 	InputTokens  *int64  `json:"input_tokens,omitempty"`
 	OutputTokens *int64  `json:"output_tokens,omitempty"`
 	Graded       bool    `json:"graded"`
 	IsReal       *bool   `json:"is_real,omitempty"`
 	Severity     string  `json:"severity,omitempty"`
 	Usefulness   *int    `json:"usefulness,omitempty"`
 	Notes        string  `json:"notes,omitempty"`
 	Grader       string  `json:"grader,omitempty"`
 	GradedAt     string  `json:"graded_at,omitempty"`
 }
 // Export returns every report joined with finding, run timing, and latest grade,
 // oldest first. The dashboard does all weighting from these raw rows.
 func (s *Store) Export() ([]ExportRow, error) {
 	rows, err := s.db.Query(`
 SELECT r.finding_id, f.repo, f.pr, f.lens, f.file, f.line, f.title,
       r.model, r.provider, r.run_id, r.raw_severity, r.created_at,
       COALESCE(ru.duration_secs, 0), ru.input_tokens, ru.output_tokens,
       lg.is_real, lg.severity, lg.usefulness, lg.notes, lg.grader, lg.created_at
 FROM reports r
 JOIN findings f ON f.id = r.finding_id
 LEFT JOIN runs ru ON ru.run_id = r.run_id
 LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
 ORDER BY r.created_at, r.id`)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
 	var out []ExportRow
 	for rows.Next() {
 		var e ExportRow
 		var file, rawSev, sev, notes, grader, gradedAt sql.NullString
 		var line sql.NullInt64
 		var isReal sql.NullBool
 		var useful sql.NullInt64
 		if err := rows.Scan(&e.FindingID, &e.Repo, &e.PR, &e.Lens, &file, &line, &e.Title,
 			&e.Model, &e.Provider, &e.RunID, &rawSev, &e.ReportedAt,
 			&e.DurationSecs, &e.InputTokens, &e.OutputTokens,
 			&isReal, &sev, &useful, &notes, &grader, &gradedAt); err != nil {
 			return nil, err
 		}
 		e.File, e.Line = file.String, int(line.Int64)
 		e.RawSeverity = rawSev.String
 		if isReal.Valid {
 			e.Graded = true
 			v := isReal.Bool
 			e.IsReal = &v
 			e.Severity, e.Notes, e.Grader, e.GradedAt = sev.String, notes.String, grader.String, gradedAt.String
 			if useful.Valid {
 				u := int(useful.Int64)
 				e.Usefulness = &u
 			}
 		}
 		out = append(out, e)
 	}
 	return out, rows.Err()
 }
 // ModelStat is the per-model rollup the scoreboard returns. It is intentionally
 // POINTS-FREE: raw minutes/tokens and a confirmed-by-severity histogram, so the
 // client applies its own weights for points and value-per-minute/token.
 type ModelStat struct {
 	Model         string         `json:"model"`
 	Provider      string         `json:"provider,omitempty"`
 	Runs          int            `json:"runs"`
 	Minutes       float64        `json:"minutes"`
 	InputTokens   int64          `json:"input_tokens"`
 	OutputTokens  int64          `json:"output_tokens"`
 	Findings      int            `json:"findings"`
 	Confirmed     int            `json:"confirmed"`
 	FalsePositive int            `json:"false_positive"`
 	Ungraded      int            `json:"ungraded"`
 	BySeverity    map[string]int `json:"by_severity"` // confirmed findings per severity
 }
 // Scoreboard rolls runs + reports + latest grades up per model. All counts of
 // findings are DISTINCT by finding (a model re-reporting across runs counts once).
 func (s *Store) Scoreboard() ([]ModelStat, error) {
 	stats := map[string]*ModelStat{}
 	get := func(model, provider string) *ModelStat {
 		m, ok := stats[model]
 		if !ok {
 			m = &ModelStat{Model: model, Provider: provider, BySeverity: map[string]int{}}
 			stats[model] = m
 		}
 		return m
 	}
 	// Runs: minutes + tokens + run counts.
 	rrows, err := s.db.Query(`
 SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0),
       COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0)
 FROM runs GROUP BY model, provider`)
 	if err != nil {
 		return nil, err
 	}
 	for rrows.Next() {
 		var model, provider string
 		var runs int
 		var dur float64
 		var in, out int64
 		if err := rrows.Scan(&model, &provider, &runs, &dur, &in, &out); err != nil {
 			rrows.Close()
 			return nil, err
 		}
 		m := get(model, provider)
 		m.Runs += runs
 		m.Minutes += dur / 60
 		m.InputTokens += in
 		m.OutputTokens += out
 	}
 	rrows.Close()
 	// Findings: distinct per model, split by latest-grade state.
 	frows, err := s.db.Query(`
 SELECT r.model,
       COUNT(DISTINCT r.finding_id),
       COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END),
       COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END),
       COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END)
 FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
 GROUP BY r.model`)
 	if err != nil {
 		return nil, err
 	}
 	for frows.Next() {
 		var model string
 		var total, confirmed, fp, ungraded int
 		if err := frows.Scan(&model, &total, &confirmed, &fp, &ungraded); err != nil {
 			frows.Close()
 			return nil, err
 		}
 		m := get(model, "")
 		m.Findings, m.Confirmed, m.FalsePositive, m.Ungraded = total, confirmed, fp, ungraded
 	}
 	frows.Close()
 	// Confirmed-by-severity histogram (distinct findings).
 	srows, err := s.db.Query(`
 SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id)
 FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id
 WHERE lg.is_real=1 AND lg.severity IS NOT NULL
 GROUP BY r.model, lg.severity`)
 	if err != nil {
 		return nil, err
 	}
 	for srows.Next() {
 		var model, sev string
 		var n int
 		if err := srows.Scan(&model, &sev, &n); err != nil {
 			srows.Close()
 			return nil, err
 		}
 		get(model, "").BySeverity[sev] = n
 	}
 	srows.Close()
 	out := make([]ModelStat, 0, len(stats))
 	for _, m := range stats {
 		out = append(out, *m)
 	}
 	sort.Slice(out, func(i, j int) bool { return out[i].Model < out[j].Model })
 	return out, nil
 }
 func sortedSeverities() []string {
 	out := make([]string, 0, len(validSeverities))
 	for s := range validSeverities {
 		out = append(out, s)
 	}
 	sort.Strings(out)
 	return out
 }
 func nullStr(s string) any {
 	if s == "" {
 		return nil
 	}
 	return s
 }
@@ -0,0 +1,132 @@
 package main
 import (
 	"path/filepath"
 	"testing"
 )
 func testStore(t *testing.T) *Store {
 	t.Helper()
 	s, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db"))
 	if err != nil {
 		t.Fatalf("open: %v", err)
 	}
 	t.Cleanup(func() { s.Close() })
 	return s
 }
 func i64(v int64) *int64 { return &v }
 func intp(v int) *int    { return &v }
 // TestConsensusAndGrade: two models reporting the SAME location collapse to one
 // finding with two reports; a single grade applies to both models' scoreboards.
 func TestConsensusAndGrade(t *testing.T) {
 	s := testStore(t)
 	if err := s.AddRun(Run{RunID: "r-cloud", Repo: "steve/x", PR: 2, Model: "minimax", Provider: "ollama-cloud", Lenses: 3, DurationSecs: 300, InputTokens: i64(1000), OutputTokens: i64(500)}); err != nil {
 		t.Fatal(err)
 	}
 	if err := s.AddRun(Run{RunID: "r-m1", Repo: "steve/x", PR: 2, Model: "qwen3", Provider: "m1", Lenses: 3, DurationSecs: 1740}); err != nil {
 		t.Fatal(err)
 	}
 	// Both models flag the same file:line under the same lens.
 	ids, err := s.AddReports([]ReportIn{
 		{Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "SetIteration never called", Model: "minimax", Provider: "ollama-cloud", RunID: "r-cloud", RawSeverity: "Blocking"},
 		{Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "iteration counter dead", Model: "qwen3", Provider: "m1", RunID: "r-m1", RawSeverity: "Blocking"},
 	})
 	if err != nil {
 		t.Fatal(err)
 	}
 	if ids[0] != ids[1] {
 		t.Fatalf("same location should collapse to one finding id, got %q and %q", ids[0], ids[1])
 	}
 	if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high", Usefulness: intp(4), Grader: "claude"}); err != nil {
 		t.Fatal(err)
 	}
 	board, err := s.Scoreboard()
 	if err != nil {
 		t.Fatal(err)
 	}
 	byModel := map[string]ModelStat{}
 	for _, m := range board {
 		byModel[m.Model] = m
 	}
 	for _, name := range []string{"minimax", "qwen3"} {
 		m := byModel[name]
 		if m.Findings != 1 || m.Confirmed != 1 || m.BySeverity["high"] != 1 {
 			t.Errorf("%s: findings=%d confirmed=%d high=%d, want 1/1/1", name, m.Findings, m.Confirmed, m.BySeverity["high"])
 		}
 	}
 	if got := byModel["minimax"].Minutes; got != 5 {
 		t.Errorf("minimax minutes = %v, want 5", got)
 	}
 	if got := byModel["qwen3"].Minutes; got != 29 {
 		t.Errorf("qwen3 minutes = %v, want 29", got)
 	}
 	if got := byModel["minimax"].InputTokens; got != 1000 {
 		t.Errorf("minimax input_tokens = %d, want 1000", got)
 	}
 }
 // TestLatestGradeWins: a re-grade supersedes the prior one everywhere.
 func TestLatestGradeWins(t *testing.T) {
 	s := testStore(t)
 	if err := s.AddRun(Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60}); err != nil {
 		t.Fatal(err)
 	}
 	ids, err := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 5, Title: "x", Model: "m", Provider: "p", RunID: "r1"}})
 	if err != nil {
 		t.Fatal(err)
 	}
 	id := ids[0]
 	if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "critical"}); err != nil {
 		t.Fatal(err)
 	}
 	if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive
 		t.Fatal(err)
 	}
 	board, _ := s.Scoreboard()
 	m := board[0]
 	if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 {
 		t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"])
 	}
 }
 // TestGradeValidation rejects bad severity / usefulness / unknown finding.
 func TestGradeValidation(t *testing.T) {
 	s := testStore(t)
 	ids, _ := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "perf", File: "a.go", Line: 1, Title: "t", Model: "m", Provider: "p", RunID: "r1"}})
 	id := ids[0]
 	if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "huge"}); err == nil {
 		t.Error("expected error for invalid severity")
 	}
 	if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "high", Usefulness: intp(9)}); err == nil {
 		t.Error("expected error for out-of-range usefulness")
 	}
 	if err := s.AddGrade(Grade{FindingID: "nope", IsReal: true, Severity: "high"}); err == nil {
 		t.Error("expected error for unknown finding")
 	}
 	// A false positive needs no severity.
 	if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil {
 		t.Errorf("false positive without severity should be valid: %v", err)
 	}
 }
 // TestFindingIDLocationKeyed: id depends on location, not wording; line matters.
 func TestFindingIDLocationKeyed(t *testing.T) {
 	a := findingID("r", 1, "security", "a.go", 10)
 	sameWordingDiff := findingID("r", 1, "security", "a.go", 10) // any title — id ignores it
 	if a != sameWordingDiff {
 		t.Error("same location must yield same id regardless of wording")
 	}
 	if a == findingID("r", 1, "security", "a.go", 11) {
 		t.Error("different line must yield different id")
 	}
 	if a == findingID("r", 1, "correctness", "a.go", 10) {
 		t.Error("different lens must yield different id")
 	}
 }