diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..14a6e05 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +.git +.gitea +*.db +*.db-wal +*.db-shm +/data +gadfly-reports +README.md +CLAUDE.md +.env* diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..4dc1921 --- /dev/null +++ b/.env.example @@ -0,0 +1,11 @@ +# === gadfly-reports daemon configuration === + +# Listen address (default: :8090) +GADFLY_REPORTS_ADDR=:8090 + +# SQLite database path (default: gadfly-reports.db; /data/gadfly-reports.db in Docker) +GADFLY_REPORTS_DB=/data/gadfly-reports.db + +# Bearer token callers must present on every route except /healthz (empty = open). +# gadfly (emit) and gadfly-mcp must present the same token. +GADFLY_REPORTS_TOKEN=change-me-to-a-secret diff --git a/.gitea/workflows/build-image.yml b/.gitea/workflows/build-image.yml new file mode 100644 index 0000000..f2745c0 --- /dev/null +++ b/.gitea/workflows/build-image.yml @@ -0,0 +1,69 @@ +name: Build & push image + +# Builds the gadfly-reports daemon image and pushes it to the Gitea container +# registry so it's easy to self-host. +# +# push to main -> :latest + :sha- +# push tag v* -> : + :latest +# +# Required repo secrets: REGISTRY_USER / REGISTRY_PASSWORD (registry push). The +# Go build uses only PUBLIC modules, so no private-module creds are needed. + +on: + push: + branches: [main] + tags: ["v*"] + paths-ignore: + - "**.md" + - "LICENSE" + - ".gitignore" + - ".env.example" + workflow_dispatch: {} + +concurrency: + group: gadfly-reports-image-${{ github.ref }} + cancel-in-progress: true + +env: + IMAGE_NAME: gitea.stevedudenhoeffer.com/steve/gadfly-reports + +jobs: + build-and-push: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + run: docker buildx create --use --name gr-builder --driver docker-container 2>/dev/null || docker buildx use gr-builder + + - name: Log in to the registry + env: + REGISTRY_USER: ${{ secrets.REGISTRY_USER }} + REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} + run: echo "${REGISTRY_PASSWORD}" | docker login gitea.stevedudenhoeffer.com -u "${REGISTRY_USER}" --password-stdin + + - name: Compute tags + id: meta + run: | + SHA_SHORT=$(echo "${GITHUB_SHA}" | cut -c1-7) + if [ "${{ github.ref_type }}" = "tag" ]; then + TAGS="${IMAGE_NAME}:${GITHUB_REF_NAME},${IMAGE_NAME}:latest" + else + TAGS="${IMAGE_NAME}:latest,${IMAGE_NAME}:sha-${SHA_SHORT}" + fi + echo "tags=${TAGS}" >> "$GITHUB_OUTPUT" + echo "Tags: ${TAGS}" + + - name: Build and push + run: | + TAG_FLAGS="" + IFS=',' read -ra TAG_ARRAY <<< "${{ steps.meta.outputs.tags }}" + for t in "${TAG_ARRAY[@]}"; do TAG_FLAGS="$TAG_FLAGS --tag $t"; done + docker buildx build \ + --push \ + --platform linux/amd64 \ + $TAG_FLAGS \ + --add-host gitea.stevedudenhoeffer.com:192.168.0.134 \ + --file ./Dockerfile \ + . diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..acafab3 --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,26 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: {} + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: "1.26" + - name: Build + run: go build ./... + - name: Vet + run: go vet ./... + - name: gofmt + run: test -z "$(gofmt -l .)" || { gofmt -l .; echo "gofmt needed"; exit 1; } + - name: Test (race) + run: go test -race ./... diff --git a/.gitignore b/.gitignore index 5b90e79..6bec015 100644 --- a/.gitignore +++ b/.gitignore @@ -1,27 +1,9 @@ -# ---> Go -# If you prefer the allow list template instead of the deny list, see community template: -# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore -# -# Binaries for programs and plugins -*.exe -*.exe~ -*.dll -*.so -*.dylib - -# Test binary, built with `go test -c` -*.test - -# Output of the go coverage tool, specifically when used with LiteIDE -*.out - -# Dependency directories (remove the comment below to include it) -# vendor/ - -# Go workspace file -go.work -go.work.sum - -# env file +# build output +/gadfly-reports +# local SQLite databases +*.db +*.db-wal +*.db-shm +/data/ +# local env .env - diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..f7662cd --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,68 @@ +# gadfly-reports — Developer Guide + +A small Go + SQLite HTTP daemon that stores [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly) +review findings, the per-review run timings, and human/Claude grades — and serves a points-free +per-model scoreboard. The companion MCP client is +[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp). + +> This is a public, **vibe-coded** project (built largely by an AI agent). Keep that framing honest +> in the README; don't oversell it — it's a homelab-grade store, not a hardened product. + +## Core principle: store raw facts, score on the client + +gadfly-reports records **only facts**: runs (timing/tokens), findings (content-addressed by +location), reports (which model raised which finding), and grades (`is_real` + `severity` + +`usefulness`). It **never stores points or computes rankings**. Mapping `severity → points` and any +"value per minute / per token" ranking is the dashboard's job. This is deliberate — keep it that way: +do not add a points column or a weighting config to the store. Retuning the curve must never require +a migration or a re-score. + +The severity vocabulary (`trivial|small|medium|high|critical`) in `store.go` is the **only** +scoring-adjacent contract, and it's a closed set validated on write. + +## Architecture + +``` +main.go subcommand dispatch (serve) + flags/env +store.go SQLite schema + types + queries (runs/findings/reports/grades + latest_grades view) +server.go net/http API (ServeMux method+path routes) + optional bearer auth +*_test.go store + server end-to-end tests (consensus, latest-grade-wins, validation, auth) +Dockerfile CGO-free build (pure-Go modernc sqlite) -> small alpine image +.gitea/workflows/ ci.yml (build/vet/test) + build-image.yml (publish :latest + :sha-) +``` + +**Data model.** A **finding** is identified by `sha256(repo|pr|lens|file|line)[:16]` — *not* by +wording — so the same issue from different models (or a re-review) collapses to one finding with many +**reports**. One **grade** per finding (history kept, latest wins via the `latest_grades` view). + +## Dependencies + +- **modernc.org/sqlite** (pure Go) — chosen so the binary is CGO-free and `go run …@latest`/the + Docker build need no C toolchain. Don't swap in a cgo driver. +- Otherwise stdlib only. The MCP SDK lives in gadfly-mcp, **not** here — keep this daemon lean. + +## Build / test + +```sh +go build ./... +go vet ./... +gofmt -l . # must be empty +go test -race ./... +``` + +## Release / deploy + +- **Push to `main`** → CI builds and publishes `:latest` (+ `:sha-`) to + `gitea.stevedudenhoeffer.com/steve/gadfly-reports`. +- **Tag `v*`** → publishes `:` (+ `:latest`). +- CI needs repo secrets `REGISTRY_USER` / `REGISTRY_PASSWORD` to push the image (the Go build itself + uses only public modules — no private-module creds needed). + +## When making changes + +- Keep the **README API table** in sync with `server.go` routes and `store.go` JSON tags — it is the + contract gadfly (emit) and gadfly-mcp rely on. Stale docs are a bug. +- Preserve the **store-no-points** principle (see above). +- Add a test when you add logic. Keep `gofmt` clean and `go vet` quiet. +- The schema uses `CREATE TABLE IF NOT EXISTS` migrations applied on `Open`; additive changes are + fine, destructive ones need a real migration story (there isn't one yet — it's a homelab store). diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..19974ec --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# gadfly-reports daemon image. modernc.org/sqlite is pure Go, so the binary is +# CGO-free and the final image needs no libc / no C toolchain at build time. +FROM golang:1.26 AS build +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/gadfly-reports . + +FROM alpine:3.20 +RUN adduser -D -u 10001 app && mkdir -p /data && chown app /data +COPY --from=build /out/gadfly-reports /usr/local/bin/gadfly-reports +USER app +ENV GADFLY_REPORTS_ADDR=:8090 \ + GADFLY_REPORTS_DB=/data/gadfly-reports.db +EXPOSE 8090 +VOLUME ["/data"] +ENTRYPOINT ["/usr/local/bin/gadfly-reports"] +CMD ["serve"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6ab7069 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Steve Dudenhoeffer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 3798e14..b387567 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,96 @@ -# gadfly-reports +# 🪰📋 gadfly-reports +A small **durable store + scoreboard** for [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly) +review findings. Gadfly (and any CI) POST each model's findings and per-review timing here; a human +or Claude — via [gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp) — later grades +each finding. It's a single Go binary backed by SQLite, speaking a tiny HTTP API. + +> ### 🤖 Heads up: this is a vibe-coded project +> gadfly-reports was built almost entirely by an AI agent (Claude Code) — the design, the code, and +> these docs. It's small and it's tested, but treat it accordingly: it's a homelab-grade service, +> not a hardened product, and there may be the occasional AI-flavored rough edge. Issues and PRs +> welcome. + +## What it stores — and what it deliberately doesn't + +gadfly-reports is a **pure fact store**: + +- **runs** — one per model's review of a PR: wall-clock duration, lens count, optional token/cost. +- **findings** — **content-addressed by location** (`repo + pr + lens + file + line`), so the *same* + issue raised by several models collapses to one finding with many **reports**. That collapse is + what makes cross-model **consensus** and per-model **precision** measurable. +- **grades** — a triage verdict per finding: `is_real`, `severity` + (`trivial|small|medium|high|critical`), optional `usefulness` (1–5), notes, grader. Grade history + is kept; the latest wins. + +It stores **no points and computes no rankings.** Mapping severity → points and ranking models by +"value per minute" (or per token) is a **client/dashboard concern**, so you can retune the curve any +time without migrating or re-scoring stored data. + +## Run it + +```sh +# from source +go run gitea.stevedudenhoeffer.com/steve/gadfly-reports@latest serve + +# or Docker (image published by CI on every push to main) +docker run -d --name gadfly-reports -p 8090:8090 -v gadfly-reports-data:/data \ + -e GADFLY_REPORTS_TOKEN=change-me \ + gitea.stevedudenhoeffer.com/steve/gadfly-reports:latest +``` + +## HTTP API (the canonical contract) + +| Method & path | Body / query | Purpose | +|---|---|---| +| `GET /healthz` | — | liveness (open even when a token is set) | +| `POST /runs` | one run object | upsert a model's review of a PR (timing/tokens) | +| `POST /reports` | JSON **array** of report objects | record findings + which model reported each | +| `POST /findings/{id}/grade` | `{is_real, severity?, usefulness?, notes?, grader?}` | record a triage grade | +| `GET /export` | — | flat report×finding×run×latest-grade rows — the dashboard feed | +| `GET /scoreboard` | — | points-free per-model rollup | + +`POST /runs` body: `{run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens?, output_tokens?, cost_usd?}` +(re-posting the same `run_id` updates it). + +`POST /reports` array element: `{repo, pr, lens, file, line, title, model, provider, run_id, raw_severity, detail}`. + +`GET /scoreboard` element: `{model, provider, runs, minutes, input_tokens, output_tokens, findings, confirmed, false_positive, ungraded, by_severity:{severity:count}}`. + +If `GADFLY_REPORTS_TOKEN` is set, every route except `/healthz` requires `Authorization: Bearer `. + +## Configuration + +| Env | Default | Meaning | +|-----|---------|---------| +| `GADFLY_REPORTS_ADDR` | `:8090` | listen address | +| `GADFLY_REPORTS_DB` | `gadfly-reports.db` (`/data/gadfly-reports.db` in Docker) | SQLite path | +| `GADFLY_REPORTS_TOKEN` | *(empty)* | bearer token callers must present (empty = open) | + +CLI flags `--addr` / `--db` / `--token` override the env. + +## Dashboards + +Point anything at the JSON endpoints (or the SQLite file read-only). `GET /export` is the flat feed; +`GET /scoreboard` is the per-model rollup. Compute points and value-per-minute **in the dashboard**, +e.g. with a curve like `trivial=1, small=3, medium=5, high=8, critical=20` → +`points = Σ weight[severity]·by_severity[severity]`, `value/min = points / minutes`. + +## How it fits together + +- **[gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)** POSTs findings here after each + review when `GADFLY_FINDINGS_URL` points at this store (advisory; off by default). +- **[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp)** is the MCP server Claude + uses to list findings and record grades against this store. + +## Build / test + +```sh +go build ./... +go test ./... +gofmt -l . # must be clean +``` + +## License + +MIT © 2026 Steve Dudenhoeffer. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..df1eb38 --- /dev/null +++ b/go.mod @@ -0,0 +1,17 @@ +module gitea.stevedudenhoeffer.com/steve/gadfly-reports + +go 1.26 + +require modernc.org/sqlite v1.53.0 + +require ( + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + golang.org/x/sys v0.44.0 // indirect + modernc.org/libc v1.73.4 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..b054032 --- /dev/null +++ b/go.sum @@ -0,0 +1,51 @@ +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4= +golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= +golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8= +golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0= +modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c= +modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI= +modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws= +modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE= +modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM= +modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU= +modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= +modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= +modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc= +modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= +modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= +modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= +modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA= +modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg= +modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= +modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= +modernc.org/sqlite v1.53.0 h1:20WG8N9q4ji/dEqGk4uiI0c6OPjSeLTNYGFCc3+7c1M= +modernc.org/sqlite v1.53.0/go.mod h1:xoEpOIpGrgT48H5iiyt/YXPCZPEzlfmfFwtk8Lklw8s= +modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= +modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..2f64561 --- /dev/null +++ b/main.go @@ -0,0 +1,74 @@ +// Command gadfly-reports is a small, durable store + scoreboard for Gadfly's review +// findings. Gadfly (and CI) report each model's findings and per-review timing +// here; a human or Claude later grades each finding (is_real + severity + +// usefulness). gadfly-reports stores only those RAW facts — it deliberately does NOT +// compute points or rankings, so the dashboard/client owns the scoring curve +// (severity -> points, value-per-minute, value-per-token) and can retune it +// without migrating or re-scoring stored data. +// +// Subcommands: +// +// gadfly-reports serve [flags] run the HTTP + SQLite store (the long-running daemon) +// +// The MCP server Claude calls to record grades lives in ./cmd/mcp, so the daemon +// stays lean; both are launchable with `go run [/cmd/mcp]@latest`. +package main + +import ( + "flag" + "fmt" + "log" + "net/http" + "os" +) + +func main() { + if len(os.Args) < 2 { + usage() + os.Exit(2) + } + switch os.Args[1] { + case "serve": + serveCmd(os.Args[2:]) + default: + usage() + os.Exit(2) + } +} + +func usage() { + fmt.Fprint(os.Stderr, `gadfly-reports — durable store + scoreboard for Gadfly review findings + +Usage: + gadfly-reports serve [flags] run the HTTP + SQLite store + +Run "gadfly-reports serve -h" for flags. +`) +} + +func serveCmd(args []string) { + fs := flag.NewFlagSet("serve", flag.ExitOnError) + addr := fs.String("addr", envOr("GADFLY_REPORTS_ADDR", ":8090"), "listen address") + dbPath := fs.String("db", envOr("GADFLY_REPORTS_DB", "gadfly-reports.db"), "SQLite database path") + token := fs.String("token", os.Getenv("GADFLY_REPORTS_TOKEN"), "bearer token callers must present (empty = open)") + _ = fs.Parse(args) + + store, err := Open(*dbPath) + if err != nil { + log.Fatalf("gadfly-reports: %v", err) + } + defer store.Close() + + log.Printf("gadfly-reports: serving %s (db=%s, auth=%v)", *addr, *dbPath, *token != "") + srv := &http.Server{Addr: *addr, Handler: newServer(store, *token)} + if err := srv.ListenAndServe(); err != nil { + log.Fatalf("gadfly-reports: %v", err) + } +} + +func envOr(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} diff --git a/server.go b/server.go new file mode 100644 index 0000000..4f33135 --- /dev/null +++ b/server.go @@ -0,0 +1,121 @@ +package main + +import ( + "encoding/json" + "errors" + "log" + "net/http" + "strings" +) + +// newServer wires the store to the HTTP API. If token is non-empty, every route +// except /healthz requires "Authorization: Bearer ". +// +// Routes: +// +// GET /healthz liveness +// POST /runs upsert one run (model review of a PR; timing/tokens) +// POST /reports record a batch of findings + this model's reports +// POST /findings/{id}/grade record a triage grade (is_real, severity, …) +// GET /export flat report×finding×grade rows (the dashboard feed) +// GET /scoreboard points-free per-model rollup +func newServer(store *Store, token string) http.Handler { + mux := http.NewServeMux() + + mux.HandleFunc("GET /healthz", func(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, map[string]string{"status": "ok"}) + }) + + mux.HandleFunc("POST /runs", func(w http.ResponseWriter, r *http.Request) { + var run Run + if !decode(w, r, &run) { + return + } + if err := store.AddRun(run); err != nil { + writeErr(w, http.StatusBadRequest, err) + return + } + writeJSON(w, http.StatusOK, map[string]string{"run_id": run.RunID}) + }) + + mux.HandleFunc("POST /reports", func(w http.ResponseWriter, r *http.Request) { + var reps []ReportIn + if !decode(w, r, &reps) { + return + } + ids, err := store.AddReports(reps) + if err != nil { + writeErr(w, http.StatusBadRequest, err) + return + } + writeJSON(w, http.StatusOK, map[string]any{"finding_ids": ids}) + }) + + mux.HandleFunc("POST /findings/{id}/grade", func(w http.ResponseWriter, r *http.Request) { + var g Grade + if !decode(w, r, &g) { + return + } + g.FindingID = r.PathValue("id") + if err := store.AddGrade(g); err != nil { + writeErr(w, http.StatusBadRequest, err) + return + } + writeJSON(w, http.StatusOK, map[string]string{"finding_id": g.FindingID}) + }) + + mux.HandleFunc("GET /export", func(w http.ResponseWriter, _ *http.Request) { + rows, err := store.Export() + if err != nil { + writeErr(w, http.StatusInternalServerError, err) + return + } + writeJSON(w, http.StatusOK, rows) + }) + + mux.HandleFunc("GET /scoreboard", func(w http.ResponseWriter, _ *http.Request) { + stats, err := store.Scoreboard() + if err != nil { + writeErr(w, http.StatusInternalServerError, err) + return + } + writeJSON(w, http.StatusOK, stats) + }) + + return auth(token, mux) +} + +// auth gates everything but /healthz behind a bearer token, when one is set. +func auth(token string, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if token != "" && r.URL.Path != "/healthz" { + got := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ") + if strings.TrimSpace(got) != token { + writeErr(w, http.StatusUnauthorized, errors.New("missing or invalid bearer token")) + return + } + } + next.ServeHTTP(w, r) + }) +} + +// decode reads a JSON body into v, writing a 400 and returning false on failure. +func decode(w http.ResponseWriter, r *http.Request, v any) bool { + if err := json.NewDecoder(r.Body).Decode(v); err != nil { + writeErr(w, http.StatusBadRequest, errors.New("invalid JSON body: "+err.Error())) + return false + } + return true +} + +func writeJSON(w http.ResponseWriter, code int, v any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + if err := json.NewEncoder(w).Encode(v); err != nil { + log.Printf("gadfly-reports: write response: %v", err) + } +} + +func writeErr(w http.ResponseWriter, code int, err error) { + writeJSON(w, code, map[string]string{"error": err.Error()}) +} diff --git a/server_test.go b/server_test.go new file mode 100644 index 0000000..719391f --- /dev/null +++ b/server_test.go @@ -0,0 +1,100 @@ +package main + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "testing" +) + +func testServer(t *testing.T, token string) *httptest.Server { + t.Helper() + store, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db")) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { store.Close() }) + srv := httptest.NewServer(newServer(store, token)) + t.Cleanup(srv.Close) + return srv +} + +func post(t *testing.T, srv *httptest.Server, token, path string, body any) *http.Response { + t.Helper() + b, _ := json.Marshal(body) + req, _ := http.NewRequest("POST", srv.URL+path, bytes.NewReader(b)) + req.Header.Set("Content-Type", "application/json") + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("POST %s: %v", path, err) + } + return resp +} + +// TestServerEndToEnd: run -> reports -> grade -> scoreboard over HTTP. +func TestServerEndToEnd(t *testing.T) { + srv := testServer(t, "") + + if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 120}); resp.StatusCode != 200 { + t.Fatalf("POST /runs = %d", resp.StatusCode) + } + + resp := post(t, srv, "", "/reports", []ReportIn{ + {Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 7, Title: "leak", Model: "m", Provider: "p", RunID: "r1"}, + }) + if resp.StatusCode != 200 { + t.Fatalf("POST /reports = %d", resp.StatusCode) + } + var rep struct { + FindingIDs []string `json:"finding_ids"` + } + json.NewDecoder(resp.Body).Decode(&rep) + if len(rep.FindingIDs) != 1 { + t.Fatalf("want 1 finding id, got %v", rep.FindingIDs) + } + id := rep.FindingIDs[0] + + if resp := post(t, srv, "", "/findings/"+id+"/grade", Grade{IsReal: true, Severity: "medium", Grader: "claude"}); resp.StatusCode != 200 { + t.Fatalf("POST grade = %d", resp.StatusCode) + } + + resp = mustGet(t, srv, "", "/scoreboard") + var board []ModelStat + json.NewDecoder(resp.Body).Decode(&board) + if len(board) != 1 || board[0].Confirmed != 1 || board[0].BySeverity["medium"] != 1 || board[0].Minutes != 2 { + t.Fatalf("unexpected scoreboard: %+v", board) + } +} + +// TestServerAuth: a set token gates writes but leaves /healthz open. +func TestServerAuth(t *testing.T) { + srv := testServer(t, "secret") + + if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != http.StatusUnauthorized { + t.Errorf("unauthenticated POST = %d, want 401", resp.StatusCode) + } + if resp := post(t, srv, "secret", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != 200 { + t.Errorf("authenticated POST = %d, want 200", resp.StatusCode) + } + if resp := mustGet(t, srv, "", "/healthz"); resp.StatusCode != 200 { + t.Errorf("healthz should be open, got %d", resp.StatusCode) + } +} + +func mustGet(t *testing.T, srv *httptest.Server, token, path string) *http.Response { + t.Helper() + req, _ := http.NewRequest("GET", srv.URL+path, nil) + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("GET %s: %v", path, err) + } + return resp +} diff --git a/store.go b/store.go new file mode 100644 index 0000000..6925dfa --- /dev/null +++ b/store.go @@ -0,0 +1,447 @@ +package main + +import ( + "crypto/sha256" + "database/sql" + "encoding/hex" + "fmt" + "sort" + "strings" + "time" + + _ "modernc.org/sqlite" +) + +// gadfly-reports stores only RAW review facts: which model reported which finding, how +// long each model's review took, and a human/Claude grade (is_real + severity + +// usefulness). It deliberately does NOT compute points or rankings — the +// dashboard owns the scoring curve (severity -> points, value-per-minute), so it +// can be retuned without re-scoring or migrating stored data. The severity +// vocabulary below is the only scoring-related contract. + +// validSeverities is the closed set a grade may assign to a REAL finding. The +// client maps these to points however it likes (e.g. trivial=1 … critical=20). +var validSeverities = map[string]bool{ + "trivial": true, + "small": true, + "medium": true, + "high": true, + "critical": true, +} + +const schema = ` +CREATE TABLE IF NOT EXISTS runs ( + run_id TEXT PRIMARY KEY, + repo TEXT NOT NULL, + pr INTEGER NOT NULL, + model TEXT NOT NULL, + provider TEXT NOT NULL, + lenses INTEGER NOT NULL DEFAULT 0, + duration_secs REAL NOT NULL DEFAULT 0, + input_tokens INTEGER, + output_tokens INTEGER, + cost_usd REAL, + created_at TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS findings ( + id TEXT PRIMARY KEY, + repo TEXT NOT NULL, + pr INTEGER NOT NULL, + lens TEXT NOT NULL, + file TEXT, + line INTEGER, + title TEXT NOT NULL, + first_seen TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS reports ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finding_id TEXT NOT NULL, + run_id TEXT NOT NULL, + model TEXT NOT NULL, + provider TEXT NOT NULL, + raw_severity TEXT, + detail TEXT, + created_at TEXT NOT NULL, + UNIQUE(finding_id, run_id) +); +CREATE INDEX IF NOT EXISTS idx_reports_finding ON reports(finding_id); +CREATE INDEX IF NOT EXISTS idx_reports_model ON reports(model); + +CREATE TABLE IF NOT EXISTS grades ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finding_id TEXT NOT NULL, + is_real INTEGER NOT NULL, + severity TEXT, + usefulness INTEGER, + notes TEXT, + grader TEXT, + created_at TEXT NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_grades_finding ON grades(finding_id); + +-- latest_grades: the most recent grade per finding (grade history is kept; the +-- latest wins). Used by every read path so a re-grade supersedes the old one. +CREATE VIEW IF NOT EXISTS latest_grades AS +SELECT g.* FROM grades g +JOIN (SELECT finding_id, MAX(id) AS max_id FROM grades GROUP BY finding_id) m + ON g.id = m.max_id; +` + +// Store is the SQLite-backed fact store. +type Store struct{ db *sql.DB } + +// Open opens (creating if needed) the SQLite database at path and applies the +// schema. WAL + a busy timeout keep the single-writer daemon honest under the +// occasional concurrent reader. +func Open(path string) (*Store, error) { + db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)&_pragma=foreign_keys(on)") + if err != nil { + return nil, fmt.Errorf("open %s: %w", path, err) + } + // modernc's pure-Go driver is happiest with a single writer connection. + db.SetMaxOpenConns(1) + if _, err := db.Exec(schema); err != nil { + db.Close() + return nil, fmt.Errorf("migrate: %w", err) + } + return &Store{db: db}, nil +} + +func (s *Store) Close() error { return s.db.Close() } + +func now() string { return time.Now().UTC().Format(time.RFC3339) } + +// findingID content-addresses a finding by location, NOT by wording, so the same +// issue raised by different models (or re-raised on a re-review) collapses to one +// finding with many reports — that collapse is what makes cross-model consensus +// and per-model precision measurable. Title is intentionally excluded. +func findingID(repo string, pr int, lens, file string, line int) string { + key := fmt.Sprintf("%s|%d|%s|%s|%d", + strings.TrimSpace(repo), pr, strings.ToLower(strings.TrimSpace(lens)), + strings.TrimSpace(file), line) + sum := sha256.Sum256([]byte(key)) + return hex.EncodeToString(sum[:])[:16] +} + +// Run is one model's review of one PR — the unit run.sh times. +type Run struct { + RunID string `json:"run_id"` + Repo string `json:"repo"` + PR int `json:"pr"` + Model string `json:"model"` + Provider string `json:"provider"` + Lenses int `json:"lenses"` + DurationSecs float64 `json:"duration_secs"` + InputTokens *int64 `json:"input_tokens,omitempty"` + OutputTokens *int64 `json:"output_tokens,omitempty"` + CostUSD *float64 `json:"cost_usd,omitempty"` +} + +// AddRun upserts a run by run_id (a re-posted run overwrites timing/tokens). +func (s *Store) AddRun(r Run) error { + if strings.TrimSpace(r.RunID) == "" || strings.TrimSpace(r.Model) == "" { + return fmt.Errorf("run_id and model are required") + } + _, err := s.db.Exec(` +INSERT INTO runs (run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens, output_tokens, cost_usd, created_at) +VALUES (?,?,?,?,?,?,?,?,?,?,?) +ON CONFLICT(run_id) DO UPDATE SET + repo=excluded.repo, pr=excluded.pr, model=excluded.model, provider=excluded.provider, + lenses=excluded.lenses, duration_secs=excluded.duration_secs, + input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens, cost_usd=excluded.cost_usd`, + r.RunID, r.Repo, r.PR, r.Model, r.Provider, r.Lenses, r.DurationSecs, + r.InputTokens, r.OutputTokens, r.CostUSD, now()) + return err +} + +// ReportIn is one finding as a single model reported it. +type ReportIn struct { + Repo string `json:"repo"` + PR int `json:"pr"` + Lens string `json:"lens"` + File string `json:"file"` + Line int `json:"line"` + Title string `json:"title"` + Model string `json:"model"` + Provider string `json:"provider"` + RunID string `json:"run_id"` + RawSeverity string `json:"raw_severity"` + Detail string `json:"detail"` +} + +// AddReports records a batch of findings: each upserts its (content-addressed) +// finding row and adds this model's report of it. Returns the finding id per +// input (same order). A model re-reporting the same finding in the same run is a +// no-op (UNIQUE finding_id,run_id). +func (s *Store) AddReports(in []ReportIn) ([]string, error) { + tx, err := s.db.Begin() + if err != nil { + return nil, err + } + defer tx.Rollback() + + ts := now() + ids := make([]string, len(in)) + for i, r := range in { + if strings.TrimSpace(r.Title) == "" || strings.TrimSpace(r.Lens) == "" { + return nil, fmt.Errorf("report %d: lens and title are required", i) + } + id := findingID(r.Repo, r.PR, r.Lens, r.File, r.Line) + ids[i] = id + if _, err := tx.Exec(` +INSERT INTO findings (id, repo, pr, lens, file, line, title, first_seen) +VALUES (?,?,?,?,?,?,?,?) ON CONFLICT(id) DO NOTHING`, + id, r.Repo, r.PR, strings.ToLower(strings.TrimSpace(r.Lens)), r.File, r.Line, r.Title, ts); err != nil { + return nil, err + } + if _, err := tx.Exec(` +INSERT INTO reports (finding_id, run_id, model, provider, raw_severity, detail, created_at) +VALUES (?,?,?,?,?,?,?) ON CONFLICT(finding_id, run_id) DO NOTHING`, + id, r.RunID, r.Model, r.Provider, r.RawSeverity, r.Detail, ts); err != nil { + return nil, err + } + } + return ids, tx.Commit() +} + +// Grade is a triage verdict on a finding. Severity is required when is_real and +// must be one of validSeverities; it is cleared when !is_real. No points here — +// the client maps severity -> points. +type Grade struct { + FindingID string `json:"finding_id"` + IsReal bool `json:"is_real"` + Severity string `json:"severity,omitempty"` + Usefulness *int `json:"usefulness,omitempty"` + Notes string `json:"notes,omitempty"` + Grader string `json:"grader,omitempty"` +} + +// AddGrade appends a grade (history is kept; latest wins). +func (s *Store) AddGrade(g Grade) error { + if strings.TrimSpace(g.FindingID) == "" { + return fmt.Errorf("finding_id is required") + } + var exists bool + if err := s.db.QueryRow(`SELECT EXISTS(SELECT 1 FROM findings WHERE id=?)`, g.FindingID).Scan(&exists); err != nil { + return err + } + if !exists { + return fmt.Errorf("unknown finding_id %q", g.FindingID) + } + sev := strings.ToLower(strings.TrimSpace(g.Severity)) + if g.IsReal { + if !validSeverities[sev] { + return fmt.Errorf("severity %q invalid for a real finding (want one of: %s)", g.Severity, strings.Join(sortedSeverities(), ", ")) + } + } else { + sev = "" // a false positive carries no severity + } + if g.Usefulness != nil && (*g.Usefulness < 1 || *g.Usefulness > 5) { + return fmt.Errorf("usefulness must be 1..5, got %d", *g.Usefulness) + } + _, err := s.db.Exec(` +INSERT INTO grades (finding_id, is_real, severity, usefulness, notes, grader, created_at) +VALUES (?,?,?,?,?,?,?)`, + g.FindingID, g.IsReal, nullStr(sev), g.Usefulness, nullStr(g.Notes), nullStr(g.Grader), now()) + return err +} + +// ExportRow is one report joined with its finding, run timing, and latest grade +// — the flat shape a dashboard consumes. Grade fields are nil/empty until graded. +type ExportRow struct { + FindingID string `json:"finding_id"` + Repo string `json:"repo"` + PR int `json:"pr"` + Lens string `json:"lens"` + File string `json:"file,omitempty"` + Line int `json:"line,omitempty"` + Title string `json:"title"` + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + RunID string `json:"run_id"` + RawSeverity string `json:"raw_severity,omitempty"` + ReportedAt string `json:"reported_at"` + DurationSecs float64 `json:"duration_secs"` + InputTokens *int64 `json:"input_tokens,omitempty"` + OutputTokens *int64 `json:"output_tokens,omitempty"` + Graded bool `json:"graded"` + IsReal *bool `json:"is_real,omitempty"` + Severity string `json:"severity,omitempty"` + Usefulness *int `json:"usefulness,omitempty"` + Notes string `json:"notes,omitempty"` + Grader string `json:"grader,omitempty"` + GradedAt string `json:"graded_at,omitempty"` +} + +// Export returns every report joined with finding, run timing, and latest grade, +// oldest first. The dashboard does all weighting from these raw rows. +func (s *Store) Export() ([]ExportRow, error) { + rows, err := s.db.Query(` +SELECT r.finding_id, f.repo, f.pr, f.lens, f.file, f.line, f.title, + r.model, r.provider, r.run_id, r.raw_severity, r.created_at, + COALESCE(ru.duration_secs, 0), ru.input_tokens, ru.output_tokens, + lg.is_real, lg.severity, lg.usefulness, lg.notes, lg.grader, lg.created_at +FROM reports r +JOIN findings f ON f.id = r.finding_id +LEFT JOIN runs ru ON ru.run_id = r.run_id +LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id +ORDER BY r.created_at, r.id`) + if err != nil { + return nil, err + } + defer rows.Close() + + var out []ExportRow + for rows.Next() { + var e ExportRow + var file, rawSev, sev, notes, grader, gradedAt sql.NullString + var line sql.NullInt64 + var isReal sql.NullBool + var useful sql.NullInt64 + if err := rows.Scan(&e.FindingID, &e.Repo, &e.PR, &e.Lens, &file, &line, &e.Title, + &e.Model, &e.Provider, &e.RunID, &rawSev, &e.ReportedAt, + &e.DurationSecs, &e.InputTokens, &e.OutputTokens, + &isReal, &sev, &useful, ¬es, &grader, &gradedAt); err != nil { + return nil, err + } + e.File, e.Line = file.String, int(line.Int64) + e.RawSeverity = rawSev.String + if isReal.Valid { + e.Graded = true + v := isReal.Bool + e.IsReal = &v + e.Severity, e.Notes, e.Grader, e.GradedAt = sev.String, notes.String, grader.String, gradedAt.String + if useful.Valid { + u := int(useful.Int64) + e.Usefulness = &u + } + } + out = append(out, e) + } + return out, rows.Err() +} + +// ModelStat is the per-model rollup the scoreboard returns. It is intentionally +// POINTS-FREE: raw minutes/tokens and a confirmed-by-severity histogram, so the +// client applies its own weights for points and value-per-minute/token. +type ModelStat struct { + Model string `json:"model"` + Provider string `json:"provider,omitempty"` + Runs int `json:"runs"` + Minutes float64 `json:"minutes"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + Findings int `json:"findings"` + Confirmed int `json:"confirmed"` + FalsePositive int `json:"false_positive"` + Ungraded int `json:"ungraded"` + BySeverity map[string]int `json:"by_severity"` // confirmed findings per severity +} + +// Scoreboard rolls runs + reports + latest grades up per model. All counts of +// findings are DISTINCT by finding (a model re-reporting across runs counts once). +func (s *Store) Scoreboard() ([]ModelStat, error) { + stats := map[string]*ModelStat{} + get := func(model, provider string) *ModelStat { + m, ok := stats[model] + if !ok { + m = &ModelStat{Model: model, Provider: provider, BySeverity: map[string]int{}} + stats[model] = m + } + return m + } + + // Runs: minutes + tokens + run counts. + rrows, err := s.db.Query(` +SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0), + COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0) +FROM runs GROUP BY model, provider`) + if err != nil { + return nil, err + } + for rrows.Next() { + var model, provider string + var runs int + var dur float64 + var in, out int64 + if err := rrows.Scan(&model, &provider, &runs, &dur, &in, &out); err != nil { + rrows.Close() + return nil, err + } + m := get(model, provider) + m.Runs += runs + m.Minutes += dur / 60 + m.InputTokens += in + m.OutputTokens += out + } + rrows.Close() + + // Findings: distinct per model, split by latest-grade state. + frows, err := s.db.Query(` +SELECT r.model, + COUNT(DISTINCT r.finding_id), + COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END), + COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END), + COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END) +FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id +GROUP BY r.model`) + if err != nil { + return nil, err + } + for frows.Next() { + var model string + var total, confirmed, fp, ungraded int + if err := frows.Scan(&model, &total, &confirmed, &fp, &ungraded); err != nil { + frows.Close() + return nil, err + } + m := get(model, "") + m.Findings, m.Confirmed, m.FalsePositive, m.Ungraded = total, confirmed, fp, ungraded + } + frows.Close() + + // Confirmed-by-severity histogram (distinct findings). + srows, err := s.db.Query(` +SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id) +FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id +WHERE lg.is_real=1 AND lg.severity IS NOT NULL +GROUP BY r.model, lg.severity`) + if err != nil { + return nil, err + } + for srows.Next() { + var model, sev string + var n int + if err := srows.Scan(&model, &sev, &n); err != nil { + srows.Close() + return nil, err + } + get(model, "").BySeverity[sev] = n + } + srows.Close() + + out := make([]ModelStat, 0, len(stats)) + for _, m := range stats { + out = append(out, *m) + } + sort.Slice(out, func(i, j int) bool { return out[i].Model < out[j].Model }) + return out, nil +} + +func sortedSeverities() []string { + out := make([]string, 0, len(validSeverities)) + for s := range validSeverities { + out = append(out, s) + } + sort.Strings(out) + return out +} + +func nullStr(s string) any { + if s == "" { + return nil + } + return s +} diff --git a/store_test.go b/store_test.go new file mode 100644 index 0000000..81529be --- /dev/null +++ b/store_test.go @@ -0,0 +1,132 @@ +package main + +import ( + "path/filepath" + "testing" +) + +func testStore(t *testing.T) *Store { + t.Helper() + s, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + t.Cleanup(func() { s.Close() }) + return s +} + +func i64(v int64) *int64 { return &v } +func intp(v int) *int { return &v } + +// TestConsensusAndGrade: two models reporting the SAME location collapse to one +// finding with two reports; a single grade applies to both models' scoreboards. +func TestConsensusAndGrade(t *testing.T) { + s := testStore(t) + + if err := s.AddRun(Run{RunID: "r-cloud", Repo: "steve/x", PR: 2, Model: "minimax", Provider: "ollama-cloud", Lenses: 3, DurationSecs: 300, InputTokens: i64(1000), OutputTokens: i64(500)}); err != nil { + t.Fatal(err) + } + if err := s.AddRun(Run{RunID: "r-m1", Repo: "steve/x", PR: 2, Model: "qwen3", Provider: "m1", Lenses: 3, DurationSecs: 1740}); err != nil { + t.Fatal(err) + } + + // Both models flag the same file:line under the same lens. + ids, err := s.AddReports([]ReportIn{ + {Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "SetIteration never called", Model: "minimax", Provider: "ollama-cloud", RunID: "r-cloud", RawSeverity: "Blocking"}, + {Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "iteration counter dead", Model: "qwen3", Provider: "m1", RunID: "r-m1", RawSeverity: "Blocking"}, + }) + if err != nil { + t.Fatal(err) + } + if ids[0] != ids[1] { + t.Fatalf("same location should collapse to one finding id, got %q and %q", ids[0], ids[1]) + } + + if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high", Usefulness: intp(4), Grader: "claude"}); err != nil { + t.Fatal(err) + } + + board, err := s.Scoreboard() + if err != nil { + t.Fatal(err) + } + byModel := map[string]ModelStat{} + for _, m := range board { + byModel[m.Model] = m + } + for _, name := range []string{"minimax", "qwen3"} { + m := byModel[name] + if m.Findings != 1 || m.Confirmed != 1 || m.BySeverity["high"] != 1 { + t.Errorf("%s: findings=%d confirmed=%d high=%d, want 1/1/1", name, m.Findings, m.Confirmed, m.BySeverity["high"]) + } + } + if got := byModel["minimax"].Minutes; got != 5 { + t.Errorf("minimax minutes = %v, want 5", got) + } + if got := byModel["qwen3"].Minutes; got != 29 { + t.Errorf("qwen3 minutes = %v, want 29", got) + } + if got := byModel["minimax"].InputTokens; got != 1000 { + t.Errorf("minimax input_tokens = %d, want 1000", got) + } +} + +// TestLatestGradeWins: a re-grade supersedes the prior one everywhere. +func TestLatestGradeWins(t *testing.T) { + s := testStore(t) + if err := s.AddRun(Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60}); err != nil { + t.Fatal(err) + } + ids, err := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 5, Title: "x", Model: "m", Provider: "p", RunID: "r1"}}) + if err != nil { + t.Fatal(err) + } + id := ids[0] + if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "critical"}); err != nil { + t.Fatal(err) + } + if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive + t.Fatal(err) + } + board, _ := s.Scoreboard() + m := board[0] + if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 { + t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"]) + } +} + +// TestGradeValidation rejects bad severity / usefulness / unknown finding. +func TestGradeValidation(t *testing.T) { + s := testStore(t) + ids, _ := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "perf", File: "a.go", Line: 1, Title: "t", Model: "m", Provider: "p", RunID: "r1"}}) + id := ids[0] + + if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "huge"}); err == nil { + t.Error("expected error for invalid severity") + } + if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "high", Usefulness: intp(9)}); err == nil { + t.Error("expected error for out-of-range usefulness") + } + if err := s.AddGrade(Grade{FindingID: "nope", IsReal: true, Severity: "high"}); err == nil { + t.Error("expected error for unknown finding") + } + // A false positive needs no severity. + if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { + t.Errorf("false positive without severity should be valid: %v", err) + } +} + +// TestFindingIDLocationKeyed: id depends on location, not wording; line matters. +func TestFindingIDLocationKeyed(t *testing.T) { + a := findingID("r", 1, "security", "a.go", 10) + sameWordingDiff := findingID("r", 1, "security", "a.go", 10) // any title — id ignores it + if a != sameWordingDiff { + t.Error("same location must yield same id regardless of wording") + } + if a == findingID("r", 1, "security", "a.go", 11) { + t.Error("different line must yield different id") + } + if a == findingID("r", 1, "correctness", "a.go", 10) { + t.Error("different lens must yield different id") + } +}