From ddcf42a3ce96d2c221d2a55da802872f02495b3a Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Fri, 26 Jun 2026 23:55:24 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20gadfly-reports=20=E2=80=94=20findings?= =?UTF-8?q?=20store=20+=20scoreboard=20daemon?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SQLite-backed HTTP store for Gadfly review findings, per-review run timings, and human/Claude grades, with a points-free per-model scoreboard. Pure fact store: it computes no points or rankings (the dashboard maps severity->points client-side and retunes without re-scoring). Findings are content-addressed by location so cross-model reports collapse for consensus; one grade per finding, latest wins. Pure-Go SQLite (CGO-free) + Docker image CI + tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- .dockerignore | 10 + .env.example | 11 + .gitea/workflows/build-image.yml | 69 +++++ .gitea/workflows/ci.yml | 26 ++ .gitignore | 34 +-- CLAUDE.md | 68 +++++ Dockerfile | 19 ++ LICENSE | 21 ++ README.md | 96 ++++++- go.mod | 17 ++ go.sum | 51 ++++ main.go | 74 +++++ server.go | 121 +++++++++ server_test.go | 100 +++++++ store.go | 447 +++++++++++++++++++++++++++++++ store_test.go | 132 +++++++++ 16 files changed, 1269 insertions(+), 27 deletions(-) create mode 100644 .dockerignore create mode 100644 .env.example create mode 100644 .gitea/workflows/build-image.yml create mode 100644 .gitea/workflows/ci.yml create mode 100644 CLAUDE.md create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 server.go create mode 100644 server_test.go create mode 100644 store.go create mode 100644 store_test.go diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..14a6e05 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +.git +.gitea +*.db +*.db-wal +*.db-shm +/data +gadfly-reports +README.md +CLAUDE.md +.env* diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..4dc1921 --- /dev/null +++ b/.env.example @@ -0,0 +1,11 @@ +# === gadfly-reports daemon configuration === + +# Listen address (default: :8090) +GADFLY_REPORTS_ADDR=:8090 + +# SQLite database path (default: gadfly-reports.db; /data/gadfly-reports.db in Docker) +GADFLY_REPORTS_DB=/data/gadfly-reports.db + +# Bearer token callers must present on every route except /healthz (empty = open). +# gadfly (emit) and gadfly-mcp must present the same token. +GADFLY_REPORTS_TOKEN=change-me-to-a-secret diff --git a/.gitea/workflows/build-image.yml b/.gitea/workflows/build-image.yml new file mode 100644 index 0000000..f2745c0 --- /dev/null +++ b/.gitea/workflows/build-image.yml @@ -0,0 +1,69 @@ +name: Build & push image + +# Builds the gadfly-reports daemon image and pushes it to the Gitea container +# registry so it's easy to self-host. +# +# push to main -> :latest + :sha- +# push tag v* -> : + :latest +# +# Required repo secrets: REGISTRY_USER / REGISTRY_PASSWORD (registry push). The +# Go build uses only PUBLIC modules, so no private-module creds are needed. + +on: + push: + branches: [main] + tags: ["v*"] + paths-ignore: + - "**.md" + - "LICENSE" + - ".gitignore" + - ".env.example" + workflow_dispatch: {} + +concurrency: + group: gadfly-reports-image-${{ github.ref }} + cancel-in-progress: true + +env: + IMAGE_NAME: gitea.stevedudenhoeffer.com/steve/gadfly-reports + +jobs: + build-and-push: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + run: docker buildx create --use --name gr-builder --driver docker-container 2>/dev/null || docker buildx use gr-builder + + - name: Log in to the registry + env: + REGISTRY_USER: ${{ secrets.REGISTRY_USER }} + REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} + run: echo "${REGISTRY_PASSWORD}" | docker login gitea.stevedudenhoeffer.com -u "${REGISTRY_USER}" --password-stdin + + - name: Compute tags + id: meta + run: | + SHA_SHORT=$(echo "${GITHUB_SHA}" | cut -c1-7) + if [ "${{ github.ref_type }}" = "tag" ]; then + TAGS="${IMAGE_NAME}:${GITHUB_REF_NAME},${IMAGE_NAME}:latest" + else + TAGS="${IMAGE_NAME}:latest,${IMAGE_NAME}:sha-${SHA_SHORT}" + fi + echo "tags=${TAGS}" >> "$GITHUB_OUTPUT" + echo "Tags: ${TAGS}" + + - name: Build and push + run: | + TAG_FLAGS="" + IFS=',' read -ra TAG_ARRAY <<< "${{ steps.meta.outputs.tags }}" + for t in "${TAG_ARRAY[@]}"; do TAG_FLAGS="$TAG_FLAGS --tag $t"; done + docker buildx build \ + --push \ + --platform linux/amd64 \ + $TAG_FLAGS \ + --add-host gitea.stevedudenhoeffer.com:192.168.0.134 \ + --file ./Dockerfile \ + . diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..acafab3 --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,26 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: {} + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: "1.26" + - name: Build + run: go build ./... + - name: Vet + run: go vet ./... + - name: gofmt + run: test -z "$(gofmt -l .)" || { gofmt -l .; echo "gofmt needed"; exit 1; } + - name: Test (race) + run: go test -race ./... diff --git a/.gitignore b/.gitignore index 5b90e79..6bec015 100644 --- a/.gitignore +++ b/.gitignore @@ -1,27 +1,9 @@ -# ---> Go -# If you prefer the allow list template instead of the deny list, see community template: -# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore -# -# Binaries for programs and plugins -*.exe -*.exe~ -*.dll -*.so -*.dylib - -# Test binary, built with `go test -c` -*.test - -# Output of the go coverage tool, specifically when used with LiteIDE -*.out - -# Dependency directories (remove the comment below to include it) -# vendor/ - -# Go workspace file -go.work -go.work.sum - -# env file +# build output +/gadfly-reports +# local SQLite databases +*.db +*.db-wal +*.db-shm +/data/ +# local env .env - diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..f7662cd --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,68 @@ +# gadfly-reports — Developer Guide + +A small Go + SQLite HTTP daemon that stores [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly) +review findings, the per-review run timings, and human/Claude grades — and serves a points-free +per-model scoreboard. The companion MCP client is +[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp). + +> This is a public, **vibe-coded** project (built largely by an AI agent). Keep that framing honest +> in the README; don't oversell it — it's a homelab-grade store, not a hardened product. + +## Core principle: store raw facts, score on the client + +gadfly-reports records **only facts**: runs (timing/tokens), findings (content-addressed by +location), reports (which model raised which finding), and grades (`is_real` + `severity` + +`usefulness`). It **never stores points or computes rankings**. Mapping `severity → points` and any +"value per minute / per token" ranking is the dashboard's job. This is deliberate — keep it that way: +do not add a points column or a weighting config to the store. Retuning the curve must never require +a migration or a re-score. + +The severity vocabulary (`trivial|small|medium|high|critical`) in `store.go` is the **only** +scoring-adjacent contract, and it's a closed set validated on write. + +## Architecture + +``` +main.go subcommand dispatch (serve) + flags/env +store.go SQLite schema + types + queries (runs/findings/reports/grades + latest_grades view) +server.go net/http API (ServeMux method+path routes) + optional bearer auth +*_test.go store + server end-to-end tests (consensus, latest-grade-wins, validation, auth) +Dockerfile CGO-free build (pure-Go modernc sqlite) -> small alpine image +.gitea/workflows/ ci.yml (build/vet/test) + build-image.yml (publish :latest + :sha-) +``` + +**Data model.** A **finding** is identified by `sha256(repo|pr|lens|file|line)[:16]` — *not* by +wording — so the same issue from different models (or a re-review) collapses to one finding with many +**reports**. One **grade** per finding (history kept, latest wins via the `latest_grades` view). + +## Dependencies + +- **modernc.org/sqlite** (pure Go) — chosen so the binary is CGO-free and `go run …@latest`/the + Docker build need no C toolchain. Don't swap in a cgo driver. +- Otherwise stdlib only. The MCP SDK lives in gadfly-mcp, **not** here — keep this daemon lean. + +## Build / test + +```sh +go build ./... +go vet ./... +gofmt -l . # must be empty +go test -race ./... +``` + +## Release / deploy + +- **Push to `main`** → CI builds and publishes `:latest` (+ `:sha-`) to + `gitea.stevedudenhoeffer.com/steve/gadfly-reports`. +- **Tag `v*`** → publishes `:` (+ `:latest`). +- CI needs repo secrets `REGISTRY_USER` / `REGISTRY_PASSWORD` to push the image (the Go build itself + uses only public modules — no private-module creds needed). + +## When making changes + +- Keep the **README API table** in sync with `server.go` routes and `store.go` JSON tags — it is the + contract gadfly (emit) and gadfly-mcp rely on. Stale docs are a bug. +- Preserve the **store-no-points** principle (see above). +- Add a test when you add logic. Keep `gofmt` clean and `go vet` quiet. +- The schema uses `CREATE TABLE IF NOT EXISTS` migrations applied on `Open`; additive changes are + fine, destructive ones need a real migration story (there isn't one yet — it's a homelab store). diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..19974ec --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# gadfly-reports daemon image. modernc.org/sqlite is pure Go, so the binary is +# CGO-free and the final image needs no libc / no C toolchain at build time. +FROM golang:1.26 AS build +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/gadfly-reports . + +FROM alpine:3.20 +RUN adduser -D -u 10001 app && mkdir -p /data && chown app /data +COPY --from=build /out/gadfly-reports /usr/local/bin/gadfly-reports +USER app +ENV GADFLY_REPORTS_ADDR=:8090 \ + GADFLY_REPORTS_DB=/data/gadfly-reports.db +EXPOSE 8090 +VOLUME ["/data"] +ENTRYPOINT ["/usr/local/bin/gadfly-reports"] +CMD ["serve"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6ab7069 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Steve Dudenhoeffer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 3798e14..b387567 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,96 @@ -# gadfly-reports +# 🪰📋 gadfly-reports +A small **durable store + scoreboard** for [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly) +review findings. Gadfly (and any CI) POST each model's findings and per-review timing here; a human +or Claude — via [gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp) — later grades +each finding. It's a single Go binary backed by SQLite, speaking a tiny HTTP API. + +> ### 🤖 Heads up: this is a vibe-coded project +> gadfly-reports was built almost entirely by an AI agent (Claude Code) — the design, the code, and +> these docs. It's small and it's tested, but treat it accordingly: it's a homelab-grade service, +> not a hardened product, and there may be the occasional AI-flavored rough edge. Issues and PRs +> welcome. + +## What it stores — and what it deliberately doesn't + +gadfly-reports is a **pure fact store**: + +- **runs** — one per model's review of a PR: wall-clock duration, lens count, optional token/cost. +- **findings** — **content-addressed by location** (`repo + pr + lens + file + line`), so the *same* + issue raised by several models collapses to one finding with many **reports**. That collapse is + what makes cross-model **consensus** and per-model **precision** measurable. +- **grades** — a triage verdict per finding: `is_real`, `severity` + (`trivial|small|medium|high|critical`), optional `usefulness` (1–5), notes, grader. Grade history + is kept; the latest wins. + +It stores **no points and computes no rankings.** Mapping severity → points and ranking models by +"value per minute" (or per token) is a **client/dashboard concern**, so you can retune the curve any +time without migrating or re-scoring stored data. + +## Run it + +```sh +# from source +go run gitea.stevedudenhoeffer.com/steve/gadfly-reports@latest serve + +# or Docker (image published by CI on every push to main) +docker run -d --name gadfly-reports -p 8090:8090 -v gadfly-reports-data:/data \ + -e GADFLY_REPORTS_TOKEN=change-me \ + gitea.stevedudenhoeffer.com/steve/gadfly-reports:latest +``` + +## HTTP API (the canonical contract) + +| Method & path | Body / query | Purpose | +|---|---|---| +| `GET /healthz` | — | liveness (open even when a token is set) | +| `POST /runs` | one run object | upsert a model's review of a PR (timing/tokens) | +| `POST /reports` | JSON **array** of report objects | record findings + which model reported each | +| `POST /findings/{id}/grade` | `{is_real, severity?, usefulness?, notes?, grader?}` | record a triage grade | +| `GET /export` | — | flat report×finding×run×latest-grade rows — the dashboard feed | +| `GET /scoreboard` | — | points-free per-model rollup | + +`POST /runs` body: `{run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens?, output_tokens?, cost_usd?}` +(re-posting the same `run_id` updates it). + +`POST /reports` array element: `{repo, pr, lens, file, line, title, model, provider, run_id, raw_severity, detail}`. + +`GET /scoreboard` element: `{model, provider, runs, minutes, input_tokens, output_tokens, findings, confirmed, false_positive, ungraded, by_severity:{severity:count}}`. + +If `GADFLY_REPORTS_TOKEN` is set, every route except `/healthz` requires `Authorization: Bearer `. + +## Configuration + +| Env | Default | Meaning | +|-----|---------|---------| +| `GADFLY_REPORTS_ADDR` | `:8090` | listen address | +| `GADFLY_REPORTS_DB` | `gadfly-reports.db` (`/data/gadfly-reports.db` in Docker) | SQLite path | +| `GADFLY_REPORTS_TOKEN` | *(empty)* | bearer token callers must present (empty = open) | + +CLI flags `--addr` / `--db` / `--token` override the env. + +## Dashboards + +Point anything at the JSON endpoints (or the SQLite file read-only). `GET /export` is the flat feed; +`GET /scoreboard` is the per-model rollup. Compute points and value-per-minute **in the dashboard**, +e.g. with a curve like `trivial=1, small=3, medium=5, high=8, critical=20` → +`points = Σ weight[severity]·by_severity[severity]`, `value/min = points / minutes`. + +## How it fits together + +- **[gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)** POSTs findings here after each + review when `GADFLY_FINDINGS_URL` points at this store (advisory; off by default). +- **[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp)** is the MCP server Claude + uses to list findings and record grades against this store. + +## Build / test + +```sh +go build ./... +go test ./... +gofmt -l . # must be clean +``` + +## License + +MIT © 2026 Steve Dudenhoeffer. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..df1eb38 --- /dev/null +++ b/go.mod @@ -0,0 +1,17 @@ +module gitea.stevedudenhoeffer.com/steve/gadfly-reports + +go 1.26 + +require modernc.org/sqlite v1.53.0 + +require ( + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + golang.org/x/sys v0.44.0 // indirect + modernc.org/libc v1.73.4 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..b054032 --- /dev/null +++ b/go.sum @@ -0,0 +1,51 @@ +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4= +golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= +golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8= +golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0= +modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c= +modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI= +modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws= +modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE= +modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM= +modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU= +modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= +modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= +modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc= +modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= +modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= +modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= +modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA= +modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg= +modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= +modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= +modernc.org/sqlite v1.53.0 h1:20WG8N9q4ji/dEqGk4uiI0c6OPjSeLTNYGFCc3+7c1M= +modernc.org/sqlite v1.53.0/go.mod h1:xoEpOIpGrgT48H5iiyt/YXPCZPEzlfmfFwtk8Lklw8s= +modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= +modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..2f64561 --- /dev/null +++ b/main.go @@ -0,0 +1,74 @@ +// Command gadfly-reports is a small, durable store + scoreboard for Gadfly's review +// findings. Gadfly (and CI) report each model's findings and per-review timing +// here; a human or Claude later grades each finding (is_real + severity + +// usefulness). gadfly-reports stores only those RAW facts — it deliberately does NOT +// compute points or rankings, so the dashboard/client owns the scoring curve +// (severity -> points, value-per-minute, value-per-token) and can retune it +// without migrating or re-scoring stored data. +// +// Subcommands: +// +// gadfly-reports serve [flags] run the HTTP + SQLite store (the long-running daemon) +// +// The MCP server Claude calls to record grades lives in ./cmd/mcp, so the daemon +// stays lean; both are launchable with `go run [/cmd/mcp]@latest`. +package main + +import ( + "flag" + "fmt" + "log" + "net/http" + "os" +) + +func main() { + if len(os.Args) < 2 { + usage() + os.Exit(2) + } + switch os.Args[1] { + case "serve": + serveCmd(os.Args[2:]) + default: + usage() + os.Exit(2) + } +} + +func usage() { + fmt.Fprint(os.Stderr, `gadfly-reports — durable store + scoreboard for Gadfly review findings + +Usage: + gadfly-reports serve [flags] run the HTTP + SQLite store + +Run "gadfly-reports serve -h" for flags. +`) +} + +func serveCmd(args []string) { + fs := flag.NewFlagSet("serve", flag.ExitOnError) + addr := fs.String("addr", envOr("GADFLY_REPORTS_ADDR", ":8090"), "listen address") + dbPath := fs.String("db", envOr("GADFLY_REPORTS_DB", "gadfly-reports.db"), "SQLite database path") + token := fs.String("token", os.Getenv("GADFLY_REPORTS_TOKEN"), "bearer token callers must present (empty = open)") + _ = fs.Parse(args) + + store, err := Open(*dbPath) + if err != nil { + log.Fatalf("gadfly-reports: %v", err) + } + defer store.Close() + + log.Printf("gadfly-reports: serving %s (db=%s, auth=%v)", *addr, *dbPath, *token != "") + srv := &http.Server{Addr: *addr, Handler: newServer(store, *token)} + if err := srv.ListenAndServe(); err != nil { + log.Fatalf("gadfly-reports: %v", err) + } +} + +func envOr(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} diff --git a/server.go b/server.go new file mode 100644 index 0000000..4f33135 --- /dev/null +++ b/server.go @@ -0,0 +1,121 @@ +package main + +import ( + "encoding/json" + "errors" + "log" + "net/http" + "strings" +) + +// newServer wires the store to the HTTP API. If token is non-empty, every route +// except /healthz requires "Authorization: Bearer ". +// +// Routes: +// +// GET /healthz liveness +// POST /runs upsert one run (model review of a PR; timing/tokens) +// POST /reports record a batch of findings + this model's reports +// POST /findings/{id}/grade record a triage grade (is_real, severity, …) +// GET /export flat report×finding×grade rows (the dashboard feed) +// GET /scoreboard points-free per-model rollup +func newServer(store *Store, token string) http.Handler { + mux := http.NewServeMux() + + mux.HandleFunc("GET /healthz", func(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, map[string]string{"status": "ok"}) + }) + + mux.HandleFunc("POST /runs", func(w http.ResponseWriter, r *http.Request) { + var run Run + if !decode(w, r, &run) { + return + } + if err := store.AddRun(run); err != nil { + writeErr(w, http.StatusBadRequest, err) + return + } + writeJSON(w, http.StatusOK, map[string]string{"run_id": run.RunID}) + }) + + mux.HandleFunc("POST /reports", func(w http.ResponseWriter, r *http.Request) { + var reps []ReportIn + if !decode(w, r, &reps) { + return + } + ids, err := store.AddReports(reps) + if err != nil { + writeErr(w, http.StatusBadRequest, err) + return + } + writeJSON(w, http.StatusOK, map[string]any{"finding_ids": ids}) + }) + + mux.HandleFunc("POST /findings/{id}/grade", func(w http.ResponseWriter, r *http.Request) { + var g Grade + if !decode(w, r, &g) { + return + } + g.FindingID = r.PathValue("id") + if err := store.AddGrade(g); err != nil { + writeErr(w, http.StatusBadRequest, err) + return + } + writeJSON(w, http.StatusOK, map[string]string{"finding_id": g.FindingID}) + }) + + mux.HandleFunc("GET /export", func(w http.ResponseWriter, _ *http.Request) { + rows, err := store.Export() + if err != nil { + writeErr(w, http.StatusInternalServerError, err) + return + } + writeJSON(w, http.StatusOK, rows) + }) + + mux.HandleFunc("GET /scoreboard", func(w http.ResponseWriter, _ *http.Request) { + stats, err := store.Scoreboard() + if err != nil { + writeErr(w, http.StatusInternalServerError, err) + return + } + writeJSON(w, http.StatusOK, stats) + }) + + return auth(token, mux) +} + +// auth gates everything but /healthz behind a bearer token, when one is set. +func auth(token string, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if token != "" && r.URL.Path != "/healthz" { + got := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ") + if strings.TrimSpace(got) != token { + writeErr(w, http.StatusUnauthorized, errors.New("missing or invalid bearer token")) + return + } + } + next.ServeHTTP(w, r) + }) +} + +// decode reads a JSON body into v, writing a 400 and returning false on failure. +func decode(w http.ResponseWriter, r *http.Request, v any) bool { + if err := json.NewDecoder(r.Body).Decode(v); err != nil { + writeErr(w, http.StatusBadRequest, errors.New("invalid JSON body: "+err.Error())) + return false + } + return true +} + +func writeJSON(w http.ResponseWriter, code int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + if err := json.NewEncoder(w).Encode(v); err != nil { + log.Printf("gadfly-reports: write response: %v", err) + } +} + +func writeErr(w http.ResponseWriter, code int, err error) { + writeJSON(w, code, map[string]string{"error": err.Error()}) +} diff --git a/server_test.go b/server_test.go new file mode 100644 index 0000000..719391f --- /dev/null +++ b/server_test.go @@ -0,0 +1,100 @@ +package main + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "testing" +) + +func testServer(t *testing.T, token string) *httptest.Server { + t.Helper() + store, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db")) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { store.Close() }) + srv := httptest.NewServer(newServer(store, token)) + t.Cleanup(srv.Close) + return srv +} + +func post(t *testing.T, srv *httptest.Server, token, path string, body any) *http.Response { + t.Helper() + b, _ := json.Marshal(body) + req, _ := http.NewRequest("POST", srv.URL+path, bytes.NewReader(b)) + req.Header.Set("Content-Type", "application/json") + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("POST %s: %v", path, err) + } + return resp +} + +// TestServerEndToEnd: run -> reports -> grade -> scoreboard over HTTP. +func TestServerEndToEnd(t *testing.T) { + srv := testServer(t, "") + + if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 120}); resp.StatusCode != 200 { + t.Fatalf("POST /runs = %d", resp.StatusCode) + } + + resp := post(t, srv, "", "/reports", []ReportIn{ + {Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 7, Title: "leak", Model: "m", Provider: "p", RunID: "r1"}, + }) + if resp.StatusCode != 200 { + t.Fatalf("POST /reports = %d", resp.StatusCode) + } + var rep struct { + FindingIDs []string `json:"finding_ids"` + } + json.NewDecoder(resp.Body).Decode(&rep) + if len(rep.FindingIDs) != 1 { + t.Fatalf("want 1 finding id, got %v", rep.FindingIDs) + } + id := rep.FindingIDs[0] + + if resp := post(t, srv, "", "/findings/"+id+"/grade", Grade{IsReal: true, Severity: "medium", Grader: "claude"}); resp.StatusCode != 200 { + t.Fatalf("POST grade = %d", resp.StatusCode) + } + + resp = mustGet(t, srv, "", "/scoreboard") + var board []ModelStat + json.NewDecoder(resp.Body).Decode(&board) + if len(board) != 1 || board[0].Confirmed != 1 || board[0].BySeverity["medium"] != 1 || board[0].Minutes != 2 { + t.Fatalf("unexpected scoreboard: %+v", board) + } +} + +// TestServerAuth: a set token gates writes but leaves /healthz open. +func TestServerAuth(t *testing.T) { + srv := testServer(t, "secret") + + if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != http.StatusUnauthorized { + t.Errorf("unauthenticated POST = %d, want 401", resp.StatusCode) + } + if resp := post(t, srv, "secret", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != 200 { + t.Errorf("authenticated POST = %d, want 200", resp.StatusCode) + } + if resp := mustGet(t, srv, "", "/healthz"); resp.StatusCode != 200 { + t.Errorf("healthz should be open, got %d", resp.StatusCode) + } +} + +func mustGet(t *testing.T, srv *httptest.Server, token, path string) *http.Response { + t.Helper() + req, _ := http.NewRequest("GET", srv.URL+path, nil) + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("GET %s: %v", path, err) + } + return resp +} diff --git a/store.go b/store.go new file mode 100644 index 0000000..6925dfa --- /dev/null +++ b/store.go @@ -0,0 +1,447 @@ +package main + +import ( + "crypto/sha256" + "database/sql" + "encoding/hex" + "fmt" + "sort" + "strings" + "time" + + _ "modernc.org/sqlite" +) + +// gadfly-reports stores only RAW review facts: which model reported which finding, how +// long each model's review took, and a human/Claude grade (is_real + severity + +// usefulness). It deliberately does NOT compute points or rankings — the +// dashboard owns the scoring curve (severity -> points, value-per-minute), so it +// can be retuned without re-scoring or migrating stored data. The severity +// vocabulary below is the only scoring-related contract. + +// validSeverities is the closed set a grade may assign to a REAL finding. The +// client maps these to points however it likes (e.g. trivial=1 … critical=20). +var validSeverities = map[string]bool{ + "trivial": true, + "small": true, + "medium": true, + "high": true, + "critical": true, +} + +const schema = ` +CREATE TABLE IF NOT EXISTS runs ( + run_id TEXT PRIMARY KEY, + repo TEXT NOT NULL, + pr INTEGER NOT NULL, + model TEXT NOT NULL, + provider TEXT NOT NULL, + lenses INTEGER NOT NULL DEFAULT 0, + duration_secs REAL NOT NULL DEFAULT 0, + input_tokens INTEGER, + output_tokens INTEGER, + cost_usd REAL, + created_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS findings ( + id TEXT PRIMARY KEY, + repo TEXT NOT NULL, + pr INTEGER NOT NULL, + lens TEXT NOT NULL, + file TEXT, + line INTEGER, + title TEXT NOT NULL, + first_seen TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS reports ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finding_id TEXT NOT NULL, + run_id TEXT NOT NULL, + model TEXT NOT NULL, + provider TEXT NOT NULL, + raw_severity TEXT, + detail TEXT, + created_at TEXT NOT NULL, + UNIQUE(finding_id, run_id) +); +CREATE INDEX IF NOT EXISTS idx_reports_finding ON reports(finding_id); +CREATE INDEX IF NOT EXISTS idx_reports_model ON reports(model); + +CREATE TABLE IF NOT EXISTS grades ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finding_id TEXT NOT NULL, + is_real INTEGER NOT NULL, + severity TEXT, + usefulness INTEGER, + notes TEXT, + grader TEXT, + created_at TEXT NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_grades_finding ON grades(finding_id); + +-- latest_grades: the most recent grade per finding (grade history is kept; the +-- latest wins). Used by every read path so a re-grade supersedes the old one. +CREATE VIEW IF NOT EXISTS latest_grades AS +SELECT g.* FROM grades g +JOIN (SELECT finding_id, MAX(id) AS max_id FROM grades GROUP BY finding_id) m + ON g.id = m.max_id; +` + +// Store is the SQLite-backed fact store. +type Store struct{ db *sql.DB } + +// Open opens (creating if needed) the SQLite database at path and applies the +// schema. WAL + a busy timeout keep the single-writer daemon honest under the +// occasional concurrent reader. +func Open(path string) (*Store, error) { + db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)&_pragma=foreign_keys(on)") + if err != nil { + return nil, fmt.Errorf("open %s: %w", path, err) + } + // modernc's pure-Go driver is happiest with a single writer connection. + db.SetMaxOpenConns(1) + if _, err := db.Exec(schema); err != nil { + db.Close() + return nil, fmt.Errorf("migrate: %w", err) + } + return &Store{db: db}, nil +} + +func (s *Store) Close() error { return s.db.Close() } + +func now() string { return time.Now().UTC().Format(time.RFC3339) } + +// findingID content-addresses a finding by location, NOT by wording, so the same +// issue raised by different models (or re-raised on a re-review) collapses to one +// finding with many reports — that collapse is what makes cross-model consensus +// and per-model precision measurable. Title is intentionally excluded. +func findingID(repo string, pr int, lens, file string, line int) string { + key := fmt.Sprintf("%s|%d|%s|%s|%d", + strings.TrimSpace(repo), pr, strings.ToLower(strings.TrimSpace(lens)), + strings.TrimSpace(file), line) + sum := sha256.Sum256([]byte(key)) + return hex.EncodeToString(sum[:])[:16] +} + +// Run is one model's review of one PR — the unit run.sh times. +type Run struct { + RunID string `json:"run_id"` + Repo string `json:"repo"` + PR int `json:"pr"` + Model string `json:"model"` + Provider string `json:"provider"` + Lenses int `json:"lenses"` + DurationSecs float64 `json:"duration_secs"` + InputTokens *int64 `json:"input_tokens,omitempty"` + OutputTokens *int64 `json:"output_tokens,omitempty"` + CostUSD *float64 `json:"cost_usd,omitempty"` +} + +// AddRun upserts a run by run_id (a re-posted run overwrites timing/tokens). +func (s *Store) AddRun(r Run) error { + if strings.TrimSpace(r.RunID) == "" || strings.TrimSpace(r.Model) == "" { + return fmt.Errorf("run_id and model are required") + } + _, err := s.db.Exec(` +INSERT INTO runs (run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens, output_tokens, cost_usd, created_at) +VALUES (?,?,?,?,?,?,?,?,?,?,?) +ON CONFLICT(run_id) DO UPDATE SET + repo=excluded.repo, pr=excluded.pr, model=excluded.model, provider=excluded.provider, + lenses=excluded.lenses, duration_secs=excluded.duration_secs, + input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens, cost_usd=excluded.cost_usd`, + r.RunID, r.Repo, r.PR, r.Model, r.Provider, r.Lenses, r.DurationSecs, + r.InputTokens, r.OutputTokens, r.CostUSD, now()) + return err +} + +// ReportIn is one finding as a single model reported it. +type ReportIn struct { + Repo string `json:"repo"` + PR int `json:"pr"` + Lens string `json:"lens"` + File string `json:"file"` + Line int `json:"line"` + Title string `json:"title"` + Model string `json:"model"` + Provider string `json:"provider"` + RunID string `json:"run_id"` + RawSeverity string `json:"raw_severity"` + Detail string `json:"detail"` +} + +// AddReports records a batch of findings: each upserts its (content-addressed) +// finding row and adds this model's report of it. Returns the finding id per +// input (same order). A model re-reporting the same finding in the same run is a +// no-op (UNIQUE finding_id,run_id). +func (s *Store) AddReports(in []ReportIn) ([]string, error) { + tx, err := s.db.Begin() + if err != nil { + return nil, err + } + defer tx.Rollback() + + ts := now() + ids := make([]string, len(in)) + for i, r := range in { + if strings.TrimSpace(r.Title) == "" || strings.TrimSpace(r.Lens) == "" { + return nil, fmt.Errorf("report %d: lens and title are required", i) + } + id := findingID(r.Repo, r.PR, r.Lens, r.File, r.Line) + ids[i] = id + if _, err := tx.Exec(` +INSERT INTO findings (id, repo, pr, lens, file, line, title, first_seen) +VALUES (?,?,?,?,?,?,?,?) ON CONFLICT(id) DO NOTHING`, + id, r.Repo, r.PR, strings.ToLower(strings.TrimSpace(r.Lens)), r.File, r.Line, r.Title, ts); err != nil { + return nil, err + } + if _, err := tx.Exec(` +INSERT INTO reports (finding_id, run_id, model, provider, raw_severity, detail, created_at) +VALUES (?,?,?,?,?,?,?) ON CONFLICT(finding_id, run_id) DO NOTHING`, + id, r.RunID, r.Model, r.Provider, r.RawSeverity, r.Detail, ts); err != nil { + return nil, err + } + } + return ids, tx.Commit() +} + +// Grade is a triage verdict on a finding. Severity is required when is_real and +// must be one of validSeverities; it is cleared when !is_real. No points here — +// the client maps severity -> points. +type Grade struct { + FindingID string `json:"finding_id"` + IsReal bool `json:"is_real"` + Severity string `json:"severity,omitempty"` + Usefulness *int `json:"usefulness,omitempty"` + Notes string `json:"notes,omitempty"` + Grader string `json:"grader,omitempty"` +} + +// AddGrade appends a grade (history is kept; latest wins). +func (s *Store) AddGrade(g Grade) error { + if strings.TrimSpace(g.FindingID) == "" { + return fmt.Errorf("finding_id is required") + } + var exists bool + if err := s.db.QueryRow(`SELECT EXISTS(SELECT 1 FROM findings WHERE id=?)`, g.FindingID).Scan(&exists); err != nil { + return err + } + if !exists { + return fmt.Errorf("unknown finding_id %q", g.FindingID) + } + sev := strings.ToLower(strings.TrimSpace(g.Severity)) + if g.IsReal { + if !validSeverities[sev] { + return fmt.Errorf("severity %q invalid for a real finding (want one of: %s)", g.Severity, strings.Join(sortedSeverities(), ", ")) + } + } else { + sev = "" // a false positive carries no severity + } + if g.Usefulness != nil && (*g.Usefulness < 1 || *g.Usefulness > 5) { + return fmt.Errorf("usefulness must be 1..5, got %d", *g.Usefulness) + } + _, err := s.db.Exec(` +INSERT INTO grades (finding_id, is_real, severity, usefulness, notes, grader, created_at) +VALUES (?,?,?,?,?,?,?)`, + g.FindingID, g.IsReal, nullStr(sev), g.Usefulness, nullStr(g.Notes), nullStr(g.Grader), now()) + return err +} + +// ExportRow is one report joined with its finding, run timing, and latest grade +// — the flat shape a dashboard consumes. Grade fields are nil/empty until graded. +type ExportRow struct { + FindingID string `json:"finding_id"` + Repo string `json:"repo"` + PR int `json:"pr"` + Lens string `json:"lens"` + File string `json:"file,omitempty"` + Line int `json:"line,omitempty"` + Title string `json:"title"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + RunID string `json:"run_id"` + RawSeverity string `json:"raw_severity,omitempty"` + ReportedAt string `json:"reported_at"` + DurationSecs float64 `json:"duration_secs"` + InputTokens *int64 `json:"input_tokens,omitempty"` + OutputTokens *int64 `json:"output_tokens,omitempty"` + Graded bool `json:"graded"` + IsReal *bool `json:"is_real,omitempty"` + Severity string `json:"severity,omitempty"` + Usefulness *int `json:"usefulness,omitempty"` + Notes string `json:"notes,omitempty"` + Grader string `json:"grader,omitempty"` + GradedAt string `json:"graded_at,omitempty"` +} + +// Export returns every report joined with finding, run timing, and latest grade, +// oldest first. The dashboard does all weighting from these raw rows. +func (s *Store) Export() ([]ExportRow, error) { + rows, err := s.db.Query(` +SELECT r.finding_id, f.repo, f.pr, f.lens, f.file, f.line, f.title, + r.model, r.provider, r.run_id, r.raw_severity, r.created_at, + COALESCE(ru.duration_secs, 0), ru.input_tokens, ru.output_tokens, + lg.is_real, lg.severity, lg.usefulness, lg.notes, lg.grader, lg.created_at +FROM reports r +JOIN findings f ON f.id = r.finding_id +LEFT JOIN runs ru ON ru.run_id = r.run_id +LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id +ORDER BY r.created_at, r.id`) + if err != nil { + return nil, err + } + defer rows.Close() + + var out []ExportRow + for rows.Next() { + var e ExportRow + var file, rawSev, sev, notes, grader, gradedAt sql.NullString + var line sql.NullInt64 + var isReal sql.NullBool + var useful sql.NullInt64 + if err := rows.Scan(&e.FindingID, &e.Repo, &e.PR, &e.Lens, &file, &line, &e.Title, + &e.Model, &e.Provider, &e.RunID, &rawSev, &e.ReportedAt, + &e.DurationSecs, &e.InputTokens, &e.OutputTokens, + &isReal, &sev, &useful, ¬es, &grader, &gradedAt); err != nil { + return nil, err + } + e.File, e.Line = file.String, int(line.Int64) + e.RawSeverity = rawSev.String + if isReal.Valid { + e.Graded = true + v := isReal.Bool + e.IsReal = &v + e.Severity, e.Notes, e.Grader, e.GradedAt = sev.String, notes.String, grader.String, gradedAt.String + if useful.Valid { + u := int(useful.Int64) + e.Usefulness = &u + } + } + out = append(out, e) + } + return out, rows.Err() +} + +// ModelStat is the per-model rollup the scoreboard returns. It is intentionally +// POINTS-FREE: raw minutes/tokens and a confirmed-by-severity histogram, so the +// client applies its own weights for points and value-per-minute/token. +type ModelStat struct { + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + Runs int `json:"runs"` + Minutes float64 `json:"minutes"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + Findings int `json:"findings"` + Confirmed int `json:"confirmed"` + FalsePositive int `json:"false_positive"` + Ungraded int `json:"ungraded"` + BySeverity map[string]int `json:"by_severity"` // confirmed findings per severity +} + +// Scoreboard rolls runs + reports + latest grades up per model. All counts of +// findings are DISTINCT by finding (a model re-reporting across runs counts once). +func (s *Store) Scoreboard() ([]ModelStat, error) { + stats := map[string]*ModelStat{} + get := func(model, provider string) *ModelStat { + m, ok := stats[model] + if !ok { + m = &ModelStat{Model: model, Provider: provider, BySeverity: map[string]int{}} + stats[model] = m + } + return m + } + + // Runs: minutes + tokens + run counts. + rrows, err := s.db.Query(` +SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0), + COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0) +FROM runs GROUP BY model, provider`) + if err != nil { + return nil, err + } + for rrows.Next() { + var model, provider string + var runs int + var dur float64 + var in, out int64 + if err := rrows.Scan(&model, &provider, &runs, &dur, &in, &out); err != nil { + rrows.Close() + return nil, err + } + m := get(model, provider) + m.Runs += runs + m.Minutes += dur / 60 + m.InputTokens += in + m.OutputTokens += out + } + rrows.Close() + + // Findings: distinct per model, split by latest-grade state. + frows, err := s.db.Query(` +SELECT r.model, + COUNT(DISTINCT r.finding_id), + COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END), + COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END), + COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END) +FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id +GROUP BY r.model`) + if err != nil { + return nil, err + } + for frows.Next() { + var model string + var total, confirmed, fp, ungraded int + if err := frows.Scan(&model, &total, &confirmed, &fp, &ungraded); err != nil { + frows.Close() + return nil, err + } + m := get(model, "") + m.Findings, m.Confirmed, m.FalsePositive, m.Ungraded = total, confirmed, fp, ungraded + } + frows.Close() + + // Confirmed-by-severity histogram (distinct findings). + srows, err := s.db.Query(` +SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id) +FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id +WHERE lg.is_real=1 AND lg.severity IS NOT NULL +GROUP BY r.model, lg.severity`) + if err != nil { + return nil, err + } + for srows.Next() { + var model, sev string + var n int + if err := srows.Scan(&model, &sev, &n); err != nil { + srows.Close() + return nil, err + } + get(model, "").BySeverity[sev] = n + } + srows.Close() + + out := make([]ModelStat, 0, len(stats)) + for _, m := range stats { + out = append(out, *m) + } + sort.Slice(out, func(i, j int) bool { return out[i].Model < out[j].Model }) + return out, nil +} + +func sortedSeverities() []string { + out := make([]string, 0, len(validSeverities)) + for s := range validSeverities { + out = append(out, s) + } + sort.Strings(out) + return out +} + +func nullStr(s string) any { + if s == "" { + return nil + } + return s +} diff --git a/store_test.go b/store_test.go new file mode 100644 index 0000000..81529be --- /dev/null +++ b/store_test.go @@ -0,0 +1,132 @@ +package main + +import ( + "path/filepath" + "testing" +) + +func testStore(t *testing.T) *Store { + t.Helper() + s, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + t.Cleanup(func() { s.Close() }) + return s +} + +func i64(v int64) *int64 { return &v } +func intp(v int) *int { return &v } + +// TestConsensusAndGrade: two models reporting the SAME location collapse to one +// finding with two reports; a single grade applies to both models' scoreboards. +func TestConsensusAndGrade(t *testing.T) { + s := testStore(t) + + if err := s.AddRun(Run{RunID: "r-cloud", Repo: "steve/x", PR: 2, Model: "minimax", Provider: "ollama-cloud", Lenses: 3, DurationSecs: 300, InputTokens: i64(1000), OutputTokens: i64(500)}); err != nil { + t.Fatal(err) + } + if err := s.AddRun(Run{RunID: "r-m1", Repo: "steve/x", PR: 2, Model: "qwen3", Provider: "m1", Lenses: 3, DurationSecs: 1740}); err != nil { + t.Fatal(err) + } + + // Both models flag the same file:line under the same lens. + ids, err := s.AddReports([]ReportIn{ + {Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "SetIteration never called", Model: "minimax", Provider: "ollama-cloud", RunID: "r-cloud", RawSeverity: "Blocking"}, + {Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "iteration counter dead", Model: "qwen3", Provider: "m1", RunID: "r-m1", RawSeverity: "Blocking"}, + }) + if err != nil { + t.Fatal(err) + } + if ids[0] != ids[1] { + t.Fatalf("same location should collapse to one finding id, got %q and %q", ids[0], ids[1]) + } + + if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high", Usefulness: intp(4), Grader: "claude"}); err != nil { + t.Fatal(err) + } + + board, err := s.Scoreboard() + if err != nil { + t.Fatal(err) + } + byModel := map[string]ModelStat{} + for _, m := range board { + byModel[m.Model] = m + } + for _, name := range []string{"minimax", "qwen3"} { + m := byModel[name] + if m.Findings != 1 || m.Confirmed != 1 || m.BySeverity["high"] != 1 { + t.Errorf("%s: findings=%d confirmed=%d high=%d, want 1/1/1", name, m.Findings, m.Confirmed, m.BySeverity["high"]) + } + } + if got := byModel["minimax"].Minutes; got != 5 { + t.Errorf("minimax minutes = %v, want 5", got) + } + if got := byModel["qwen3"].Minutes; got != 29 { + t.Errorf("qwen3 minutes = %v, want 29", got) + } + if got := byModel["minimax"].InputTokens; got != 1000 { + t.Errorf("minimax input_tokens = %d, want 1000", got) + } +} + +// TestLatestGradeWins: a re-grade supersedes the prior one everywhere. +func TestLatestGradeWins(t *testing.T) { + s := testStore(t) + if err := s.AddRun(Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60}); err != nil { + t.Fatal(err) + } + ids, err := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 5, Title: "x", Model: "m", Provider: "p", RunID: "r1"}}) + if err != nil { + t.Fatal(err) + } + id := ids[0] + if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "critical"}); err != nil { + t.Fatal(err) + } + if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive + t.Fatal(err) + } + board, _ := s.Scoreboard() + m := board[0] + if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 { + t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"]) + } +} + +// TestGradeValidation rejects bad severity / usefulness / unknown finding. +func TestGradeValidation(t *testing.T) { + s := testStore(t) + ids, _ := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "perf", File: "a.go", Line: 1, Title: "t", Model: "m", Provider: "p", RunID: "r1"}}) + id := ids[0] + + if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "huge"}); err == nil { + t.Error("expected error for invalid severity") + } + if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "high", Usefulness: intp(9)}); err == nil { + t.Error("expected error for out-of-range usefulness") + } + if err := s.AddGrade(Grade{FindingID: "nope", IsReal: true, Severity: "high"}); err == nil { + t.Error("expected error for unknown finding") + } + // A false positive needs no severity. + if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { + t.Errorf("false positive without severity should be valid: %v", err) + } +} + +// TestFindingIDLocationKeyed: id depends on location, not wording; line matters. +func TestFindingIDLocationKeyed(t *testing.T) { + a := findingID("r", 1, "security", "a.go", 10) + sameWordingDiff := findingID("r", 1, "security", "a.go", 10) // any title — id ignores it + if a != sameWordingDiff { + t.Error("same location must yield same id regardless of wording") + } + if a == findingID("r", 1, "security", "a.go", 11) { + t.Error("different line must yield different id") + } + if a == findingID("r", 1, "correctness", "a.go", 10) { + t.Error("different lens must yield different id") + } +}