feat: gadfly-reports — findings store + scoreboard daemon
SQLite-backed HTTP store for Gadfly review findings, per-review run timings, and human/Claude grades, with a points-free per-model scoreboard. Pure fact store: it computes no points or rankings (the dashboard maps severity->points client-side and retunes without re-scoring). Findings are content-addressed by location so cross-model reports collapse for consensus; one grade per finding, latest wins. Pure-Go SQLite (CGO-free) + Docker image CI + tests. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,10 @@
|
|||||||
|
.git
|
||||||
|
.gitea
|
||||||
|
*.db
|
||||||
|
*.db-wal
|
||||||
|
*.db-shm
|
||||||
|
/data
|
||||||
|
gadfly-reports
|
||||||
|
README.md
|
||||||
|
CLAUDE.md
|
||||||
|
.env*
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
# === gadfly-reports daemon configuration ===
|
||||||
|
|
||||||
|
# Listen address (default: :8090)
|
||||||
|
GADFLY_REPORTS_ADDR=:8090
|
||||||
|
|
||||||
|
# SQLite database path (default: gadfly-reports.db; /data/gadfly-reports.db in Docker)
|
||||||
|
GADFLY_REPORTS_DB=/data/gadfly-reports.db
|
||||||
|
|
||||||
|
# Bearer token callers must present on every route except /healthz (empty = open).
|
||||||
|
# gadfly (emit) and gadfly-mcp must present the same token.
|
||||||
|
GADFLY_REPORTS_TOKEN=change-me-to-a-secret
|
||||||
@@ -0,0 +1,69 @@
|
|||||||
|
name: Build & push image
|
||||||
|
|
||||||
|
# Builds the gadfly-reports daemon image and pushes it to the Gitea container
|
||||||
|
# registry so it's easy to self-host.
|
||||||
|
#
|
||||||
|
# push to main -> :latest + :sha-<short>
|
||||||
|
# push tag v* -> :<tag> + :latest
|
||||||
|
#
|
||||||
|
# Required repo secrets: REGISTRY_USER / REGISTRY_PASSWORD (registry push). The
|
||||||
|
# Go build uses only PUBLIC modules, so no private-module creds are needed.
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
tags: ["v*"]
|
||||||
|
paths-ignore:
|
||||||
|
- "**.md"
|
||||||
|
- "LICENSE"
|
||||||
|
- ".gitignore"
|
||||||
|
- ".env.example"
|
||||||
|
workflow_dispatch: {}
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: gadfly-reports-image-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
IMAGE_NAME: gitea.stevedudenhoeffer.com/steve/gadfly-reports
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 20
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
run: docker buildx create --use --name gr-builder --driver docker-container 2>/dev/null || docker buildx use gr-builder
|
||||||
|
|
||||||
|
- name: Log in to the registry
|
||||||
|
env:
|
||||||
|
REGISTRY_USER: ${{ secrets.REGISTRY_USER }}
|
||||||
|
REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
|
||||||
|
run: echo "${REGISTRY_PASSWORD}" | docker login gitea.stevedudenhoeffer.com -u "${REGISTRY_USER}" --password-stdin
|
||||||
|
|
||||||
|
- name: Compute tags
|
||||||
|
id: meta
|
||||||
|
run: |
|
||||||
|
SHA_SHORT=$(echo "${GITHUB_SHA}" | cut -c1-7)
|
||||||
|
if [ "${{ github.ref_type }}" = "tag" ]; then
|
||||||
|
TAGS="${IMAGE_NAME}:${GITHUB_REF_NAME},${IMAGE_NAME}:latest"
|
||||||
|
else
|
||||||
|
TAGS="${IMAGE_NAME}:latest,${IMAGE_NAME}:sha-${SHA_SHORT}"
|
||||||
|
fi
|
||||||
|
echo "tags=${TAGS}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "Tags: ${TAGS}"
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
run: |
|
||||||
|
TAG_FLAGS=""
|
||||||
|
IFS=',' read -ra TAG_ARRAY <<< "${{ steps.meta.outputs.tags }}"
|
||||||
|
for t in "${TAG_ARRAY[@]}"; do TAG_FLAGS="$TAG_FLAGS --tag $t"; done
|
||||||
|
docker buildx build \
|
||||||
|
--push \
|
||||||
|
--platform linux/amd64 \
|
||||||
|
$TAG_FLAGS \
|
||||||
|
--add-host gitea.stevedudenhoeffer.com:192.168.0.134 \
|
||||||
|
--file ./Dockerfile \
|
||||||
|
.
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
workflow_dispatch: {}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 10
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: "1.26"
|
||||||
|
- name: Build
|
||||||
|
run: go build ./...
|
||||||
|
- name: Vet
|
||||||
|
run: go vet ./...
|
||||||
|
- name: gofmt
|
||||||
|
run: test -z "$(gofmt -l .)" || { gofmt -l .; echo "gofmt needed"; exit 1; }
|
||||||
|
- name: Test (race)
|
||||||
|
run: go test -race ./...
|
||||||
+8
-26
@@ -1,27 +1,9 @@
|
|||||||
# ---> Go
|
# build output
|
||||||
# If you prefer the allow list template instead of the deny list, see community template:
|
/gadfly-reports
|
||||||
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
|
# local SQLite databases
|
||||||
#
|
*.db
|
||||||
# Binaries for programs and plugins
|
*.db-wal
|
||||||
*.exe
|
*.db-shm
|
||||||
*.exe~
|
/data/
|
||||||
*.dll
|
# local env
|
||||||
*.so
|
|
||||||
*.dylib
|
|
||||||
|
|
||||||
# Test binary, built with `go test -c`
|
|
||||||
*.test
|
|
||||||
|
|
||||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
|
||||||
*.out
|
|
||||||
|
|
||||||
# Dependency directories (remove the comment below to include it)
|
|
||||||
# vendor/
|
|
||||||
|
|
||||||
# Go workspace file
|
|
||||||
go.work
|
|
||||||
go.work.sum
|
|
||||||
|
|
||||||
# env file
|
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,68 @@
|
|||||||
|
# gadfly-reports — Developer Guide
|
||||||
|
|
||||||
|
A small Go + SQLite HTTP daemon that stores [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)
|
||||||
|
review findings, the per-review run timings, and human/Claude grades — and serves a points-free
|
||||||
|
per-model scoreboard. The companion MCP client is
|
||||||
|
[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp).
|
||||||
|
|
||||||
|
> This is a public, **vibe-coded** project (built largely by an AI agent). Keep that framing honest
|
||||||
|
> in the README; don't oversell it — it's a homelab-grade store, not a hardened product.
|
||||||
|
|
||||||
|
## Core principle: store raw facts, score on the client
|
||||||
|
|
||||||
|
gadfly-reports records **only facts**: runs (timing/tokens), findings (content-addressed by
|
||||||
|
location), reports (which model raised which finding), and grades (`is_real` + `severity` +
|
||||||
|
`usefulness`). It **never stores points or computes rankings**. Mapping `severity → points` and any
|
||||||
|
"value per minute / per token" ranking is the dashboard's job. This is deliberate — keep it that way:
|
||||||
|
do not add a points column or a weighting config to the store. Retuning the curve must never require
|
||||||
|
a migration or a re-score.
|
||||||
|
|
||||||
|
The severity vocabulary (`trivial|small|medium|high|critical`) in `store.go` is the **only**
|
||||||
|
scoring-adjacent contract, and it's a closed set validated on write.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
main.go subcommand dispatch (serve) + flags/env
|
||||||
|
store.go SQLite schema + types + queries (runs/findings/reports/grades + latest_grades view)
|
||||||
|
server.go net/http API (ServeMux method+path routes) + optional bearer auth
|
||||||
|
*_test.go store + server end-to-end tests (consensus, latest-grade-wins, validation, auth)
|
||||||
|
Dockerfile CGO-free build (pure-Go modernc sqlite) -> small alpine image
|
||||||
|
.gitea/workflows/ ci.yml (build/vet/test) + build-image.yml (publish :latest + :sha-<short>)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Data model.** A **finding** is identified by `sha256(repo|pr|lens|file|line)[:16]` — *not* by
|
||||||
|
wording — so the same issue from different models (or a re-review) collapses to one finding with many
|
||||||
|
**reports**. One **grade** per finding (history kept, latest wins via the `latest_grades` view).
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- **modernc.org/sqlite** (pure Go) — chosen so the binary is CGO-free and `go run …@latest`/the
|
||||||
|
Docker build need no C toolchain. Don't swap in a cgo driver.
|
||||||
|
- Otherwise stdlib only. The MCP SDK lives in gadfly-mcp, **not** here — keep this daemon lean.
|
||||||
|
|
||||||
|
## Build / test
|
||||||
|
|
||||||
|
```sh
|
||||||
|
go build ./...
|
||||||
|
go vet ./...
|
||||||
|
gofmt -l . # must be empty
|
||||||
|
go test -race ./...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Release / deploy
|
||||||
|
|
||||||
|
- **Push to `main`** → CI builds and publishes `:latest` (+ `:sha-<short>`) to
|
||||||
|
`gitea.stevedudenhoeffer.com/steve/gadfly-reports`.
|
||||||
|
- **Tag `v*`** → publishes `:<tag>` (+ `:latest`).
|
||||||
|
- CI needs repo secrets `REGISTRY_USER` / `REGISTRY_PASSWORD` to push the image (the Go build itself
|
||||||
|
uses only public modules — no private-module creds needed).
|
||||||
|
|
||||||
|
## When making changes
|
||||||
|
|
||||||
|
- Keep the **README API table** in sync with `server.go` routes and `store.go` JSON tags — it is the
|
||||||
|
contract gadfly (emit) and gadfly-mcp rely on. Stale docs are a bug.
|
||||||
|
- Preserve the **store-no-points** principle (see above).
|
||||||
|
- Add a test when you add logic. Keep `gofmt` clean and `go vet` quiet.
|
||||||
|
- The schema uses `CREATE TABLE IF NOT EXISTS` migrations applied on `Open`; additive changes are
|
||||||
|
fine, destructive ones need a real migration story (there isn't one yet — it's a homelab store).
|
||||||
+19
@@ -0,0 +1,19 @@
|
|||||||
|
# gadfly-reports daemon image. modernc.org/sqlite is pure Go, so the binary is
|
||||||
|
# CGO-free and the final image needs no libc / no C toolchain at build time.
|
||||||
|
FROM golang:1.26 AS build
|
||||||
|
WORKDIR /src
|
||||||
|
COPY go.mod go.sum ./
|
||||||
|
RUN go mod download
|
||||||
|
COPY . .
|
||||||
|
RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/gadfly-reports .
|
||||||
|
|
||||||
|
FROM alpine:3.20
|
||||||
|
RUN adduser -D -u 10001 app && mkdir -p /data && chown app /data
|
||||||
|
COPY --from=build /out/gadfly-reports /usr/local/bin/gadfly-reports
|
||||||
|
USER app
|
||||||
|
ENV GADFLY_REPORTS_ADDR=:8090 \
|
||||||
|
GADFLY_REPORTS_DB=/data/gadfly-reports.db
|
||||||
|
EXPOSE 8090
|
||||||
|
VOLUME ["/data"]
|
||||||
|
ENTRYPOINT ["/usr/local/bin/gadfly-reports"]
|
||||||
|
CMD ["serve"]
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 Steve Dudenhoeffer
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
@@ -1,2 +1,96 @@
|
|||||||
# gadfly-reports
|
# 🪰📋 gadfly-reports
|
||||||
|
|
||||||
|
A small **durable store + scoreboard** for [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)
|
||||||
|
review findings. Gadfly (and any CI) POST each model's findings and per-review timing here; a human
|
||||||
|
or Claude — via [gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp) — later grades
|
||||||
|
each finding. It's a single Go binary backed by SQLite, speaking a tiny HTTP API.
|
||||||
|
|
||||||
|
> ### 🤖 Heads up: this is a vibe-coded project
|
||||||
|
> gadfly-reports was built almost entirely by an AI agent (Claude Code) — the design, the code, and
|
||||||
|
> these docs. It's small and it's tested, but treat it accordingly: it's a homelab-grade service,
|
||||||
|
> not a hardened product, and there may be the occasional AI-flavored rough edge. Issues and PRs
|
||||||
|
> welcome.
|
||||||
|
|
||||||
|
## What it stores — and what it deliberately doesn't
|
||||||
|
|
||||||
|
gadfly-reports is a **pure fact store**:
|
||||||
|
|
||||||
|
- **runs** — one per model's review of a PR: wall-clock duration, lens count, optional token/cost.
|
||||||
|
- **findings** — **content-addressed by location** (`repo + pr + lens + file + line`), so the *same*
|
||||||
|
issue raised by several models collapses to one finding with many **reports**. That collapse is
|
||||||
|
what makes cross-model **consensus** and per-model **precision** measurable.
|
||||||
|
- **grades** — a triage verdict per finding: `is_real`, `severity`
|
||||||
|
(`trivial|small|medium|high|critical`), optional `usefulness` (1–5), notes, grader. Grade history
|
||||||
|
is kept; the latest wins.
|
||||||
|
|
||||||
|
It stores **no points and computes no rankings.** Mapping severity → points and ranking models by
|
||||||
|
"value per minute" (or per token) is a **client/dashboard concern**, so you can retune the curve any
|
||||||
|
time without migrating or re-scoring stored data.
|
||||||
|
|
||||||
|
## Run it
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# from source
|
||||||
|
go run gitea.stevedudenhoeffer.com/steve/gadfly-reports@latest serve
|
||||||
|
|
||||||
|
# or Docker (image published by CI on every push to main)
|
||||||
|
docker run -d --name gadfly-reports -p 8090:8090 -v gadfly-reports-data:/data \
|
||||||
|
-e GADFLY_REPORTS_TOKEN=change-me \
|
||||||
|
gitea.stevedudenhoeffer.com/steve/gadfly-reports:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
## HTTP API (the canonical contract)
|
||||||
|
|
||||||
|
| Method & path | Body / query | Purpose |
|
||||||
|
|---|---|---|
|
||||||
|
| `GET /healthz` | — | liveness (open even when a token is set) |
|
||||||
|
| `POST /runs` | one run object | upsert a model's review of a PR (timing/tokens) |
|
||||||
|
| `POST /reports` | JSON **array** of report objects | record findings + which model reported each |
|
||||||
|
| `POST /findings/{id}/grade` | `{is_real, severity?, usefulness?, notes?, grader?}` | record a triage grade |
|
||||||
|
| `GET /export` | — | flat report×finding×run×latest-grade rows — the dashboard feed |
|
||||||
|
| `GET /scoreboard` | — | points-free per-model rollup |
|
||||||
|
|
||||||
|
`POST /runs` body: `{run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens?, output_tokens?, cost_usd?}`
|
||||||
|
(re-posting the same `run_id` updates it).
|
||||||
|
|
||||||
|
`POST /reports` array element: `{repo, pr, lens, file, line, title, model, provider, run_id, raw_severity, detail}`.
|
||||||
|
|
||||||
|
`GET /scoreboard` element: `{model, provider, runs, minutes, input_tokens, output_tokens, findings, confirmed, false_positive, ungraded, by_severity:{severity:count}}`.
|
||||||
|
|
||||||
|
If `GADFLY_REPORTS_TOKEN` is set, every route except `/healthz` requires `Authorization: Bearer <token>`.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Env | Default | Meaning |
|
||||||
|
|-----|---------|---------|
|
||||||
|
| `GADFLY_REPORTS_ADDR` | `:8090` | listen address |
|
||||||
|
| `GADFLY_REPORTS_DB` | `gadfly-reports.db` (`/data/gadfly-reports.db` in Docker) | SQLite path |
|
||||||
|
| `GADFLY_REPORTS_TOKEN` | *(empty)* | bearer token callers must present (empty = open) |
|
||||||
|
|
||||||
|
CLI flags `--addr` / `--db` / `--token` override the env.
|
||||||
|
|
||||||
|
## Dashboards
|
||||||
|
|
||||||
|
Point anything at the JSON endpoints (or the SQLite file read-only). `GET /export` is the flat feed;
|
||||||
|
`GET /scoreboard` is the per-model rollup. Compute points and value-per-minute **in the dashboard**,
|
||||||
|
e.g. with a curve like `trivial=1, small=3, medium=5, high=8, critical=20` →
|
||||||
|
`points = Σ weight[severity]·by_severity[severity]`, `value/min = points / minutes`.
|
||||||
|
|
||||||
|
## How it fits together
|
||||||
|
|
||||||
|
- **[gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly)** POSTs findings here after each
|
||||||
|
review when `GADFLY_FINDINGS_URL` points at this store (advisory; off by default).
|
||||||
|
- **[gadfly-mcp](https://gitea.stevedudenhoeffer.com/steve/gadfly-mcp)** is the MCP server Claude
|
||||||
|
uses to list findings and record grades against this store.
|
||||||
|
|
||||||
|
## Build / test
|
||||||
|
|
||||||
|
```sh
|
||||||
|
go build ./...
|
||||||
|
go test ./...
|
||||||
|
gofmt -l . # must be clean
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT © 2026 Steve Dudenhoeffer.
|
||||||
|
|||||||
@@ -0,0 +1,17 @@
|
|||||||
|
module gitea.stevedudenhoeffer.com/steve/gadfly-reports
|
||||||
|
|
||||||
|
go 1.26
|
||||||
|
|
||||||
|
require modernc.org/sqlite v1.53.0
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
|
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||||
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||||
|
golang.org/x/sys v0.44.0 // indirect
|
||||||
|
modernc.org/libc v1.73.4 // indirect
|
||||||
|
modernc.org/mathutil v1.7.1 // indirect
|
||||||
|
modernc.org/memory v1.11.0 // indirect
|
||||||
|
)
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||||
|
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||||
|
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
||||||
|
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
||||||
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
|
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||||
|
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||||
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
|
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||||
|
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||||
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||||
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||||
|
golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4=
|
||||||
|
golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ=
|
||||||
|
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||||
|
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||||
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
|
||||||
|
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
|
golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8=
|
||||||
|
golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0=
|
||||||
|
modernc.org/cc/v4 v4.28.4 h1:Hd/4Es+MBj+/7hSdZaisNyu6bv3V0Dp2MdllyfqaH+c=
|
||||||
|
modernc.org/cc/v4 v4.28.4/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
|
||||||
|
modernc.org/ccgo/v4 v4.34.4 h1:OVnSOWQjVKOYkFxoHYB+qQmSHK5gqMqARM+K9DpR/Ws=
|
||||||
|
modernc.org/ccgo/v4 v4.34.4/go.mod h1:qdKqE8FNIYyysougB1RX9MxCzp5oJOcQXSobANJ4TuE=
|
||||||
|
modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
|
||||||
|
modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
|
||||||
|
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
|
||||||
|
modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
|
||||||
|
modernc.org/gc/v3 v3.1.3 h1:6QAplYyVO+KdPW3pGnqmJDUxtkec8ooEWvks/hhU3lc=
|
||||||
|
modernc.org/gc/v3 v3.1.3/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
|
||||||
|
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
|
||||||
|
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
|
||||||
|
modernc.org/libc v1.73.4 h1:+ra4Ui8ngyt8HDcO1FTDPWlkAh6yOdaO2yAoh8MddQA=
|
||||||
|
modernc.org/libc v1.73.4/go.mod h1:DXZ3eO8qMCNn2SnmTNCiC71nJ9Rcq3PsnpU6Vc4rWK8=
|
||||||
|
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||||
|
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||||
|
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
||||||
|
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
|
||||||
|
modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
|
||||||
|
modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
||||||
|
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
|
||||||
|
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
|
||||||
|
modernc.org/sqlite v1.53.0 h1:20WG8N9q4ji/dEqGk4uiI0c6OPjSeLTNYGFCc3+7c1M=
|
||||||
|
modernc.org/sqlite v1.53.0/go.mod h1:xoEpOIpGrgT48H5iiyt/YXPCZPEzlfmfFwtk8Lklw8s=
|
||||||
|
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
|
||||||
|
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
|
||||||
|
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
||||||
|
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
// Command gadfly-reports is a small, durable store + scoreboard for Gadfly's review
|
||||||
|
// findings. Gadfly (and CI) report each model's findings and per-review timing
|
||||||
|
// here; a human or Claude later grades each finding (is_real + severity +
|
||||||
|
// usefulness). gadfly-reports stores only those RAW facts — it deliberately does NOT
|
||||||
|
// compute points or rankings, so the dashboard/client owns the scoring curve
|
||||||
|
// (severity -> points, value-per-minute, value-per-token) and can retune it
|
||||||
|
// without migrating or re-scoring stored data.
|
||||||
|
//
|
||||||
|
// Subcommands:
|
||||||
|
//
|
||||||
|
// gadfly-reports serve [flags] run the HTTP + SQLite store (the long-running daemon)
|
||||||
|
//
|
||||||
|
// The MCP server Claude calls to record grades lives in ./cmd/mcp, so the daemon
|
||||||
|
// stays lean; both are launchable with `go run <module>[/cmd/mcp]@latest`.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if len(os.Args) < 2 {
|
||||||
|
usage()
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
switch os.Args[1] {
|
||||||
|
case "serve":
|
||||||
|
serveCmd(os.Args[2:])
|
||||||
|
default:
|
||||||
|
usage()
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func usage() {
|
||||||
|
fmt.Fprint(os.Stderr, `gadfly-reports — durable store + scoreboard for Gadfly review findings
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
gadfly-reports serve [flags] run the HTTP + SQLite store
|
||||||
|
|
||||||
|
Run "gadfly-reports serve -h" for flags.
|
||||||
|
`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func serveCmd(args []string) {
|
||||||
|
fs := flag.NewFlagSet("serve", flag.ExitOnError)
|
||||||
|
addr := fs.String("addr", envOr("GADFLY_REPORTS_ADDR", ":8090"), "listen address")
|
||||||
|
dbPath := fs.String("db", envOr("GADFLY_REPORTS_DB", "gadfly-reports.db"), "SQLite database path")
|
||||||
|
token := fs.String("token", os.Getenv("GADFLY_REPORTS_TOKEN"), "bearer token callers must present (empty = open)")
|
||||||
|
_ = fs.Parse(args)
|
||||||
|
|
||||||
|
store, err := Open(*dbPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("gadfly-reports: %v", err)
|
||||||
|
}
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
log.Printf("gadfly-reports: serving %s (db=%s, auth=%v)", *addr, *dbPath, *token != "")
|
||||||
|
srv := &http.Server{Addr: *addr, Handler: newServer(store, *token)}
|
||||||
|
if err := srv.ListenAndServe(); err != nil {
|
||||||
|
log.Fatalf("gadfly-reports: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func envOr(key, def string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
@@ -0,0 +1,121 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newServer wires the store to the HTTP API. If token is non-empty, every route
|
||||||
|
// except /healthz requires "Authorization: Bearer <token>".
|
||||||
|
//
|
||||||
|
// Routes:
|
||||||
|
//
|
||||||
|
// GET /healthz liveness
|
||||||
|
// POST /runs upsert one run (model review of a PR; timing/tokens)
|
||||||
|
// POST /reports record a batch of findings + this model's reports
|
||||||
|
// POST /findings/{id}/grade record a triage grade (is_real, severity, …)
|
||||||
|
// GET /export flat report×finding×grade rows (the dashboard feed)
|
||||||
|
// GET /scoreboard points-free per-model rollup
|
||||||
|
func newServer(store *Store, token string) http.Handler {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
mux.HandleFunc("GET /healthz", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("POST /runs", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var run Run
|
||||||
|
if !decode(w, r, &run) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := store.AddRun(run); err != nil {
|
||||||
|
writeErr(w, http.StatusBadRequest, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, map[string]string{"run_id": run.RunID})
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("POST /reports", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var reps []ReportIn
|
||||||
|
if !decode(w, r, &reps) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ids, err := store.AddReports(reps)
|
||||||
|
if err != nil {
|
||||||
|
writeErr(w, http.StatusBadRequest, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, map[string]any{"finding_ids": ids})
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("POST /findings/{id}/grade", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var g Grade
|
||||||
|
if !decode(w, r, &g) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
g.FindingID = r.PathValue("id")
|
||||||
|
if err := store.AddGrade(g); err != nil {
|
||||||
|
writeErr(w, http.StatusBadRequest, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, map[string]string{"finding_id": g.FindingID})
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("GET /export", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
rows, err := store.Export()
|
||||||
|
if err != nil {
|
||||||
|
writeErr(w, http.StatusInternalServerError, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, rows)
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("GET /scoreboard", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
stats, err := store.Scoreboard()
|
||||||
|
if err != nil {
|
||||||
|
writeErr(w, http.StatusInternalServerError, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, stats)
|
||||||
|
})
|
||||||
|
|
||||||
|
return auth(token, mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
// auth gates everything but /healthz behind a bearer token, when one is set.
|
||||||
|
func auth(token string, next http.Handler) http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if token != "" && r.URL.Path != "/healthz" {
|
||||||
|
got := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
|
||||||
|
if strings.TrimSpace(got) != token {
|
||||||
|
writeErr(w, http.StatusUnauthorized, errors.New("missing or invalid bearer token"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
next.ServeHTTP(w, r)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// decode reads a JSON body into v, writing a 400 and returning false on failure.
|
||||||
|
func decode(w http.ResponseWriter, r *http.Request, v any) bool {
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(v); err != nil {
|
||||||
|
writeErr(w, http.StatusBadRequest, errors.New("invalid JSON body: "+err.Error()))
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeJSON(w http.ResponseWriter, code int, v any) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(code)
|
||||||
|
if err := json.NewEncoder(w).Encode(v); err != nil {
|
||||||
|
log.Printf("gadfly-reports: write response: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeErr(w http.ResponseWriter, code int, err error) {
|
||||||
|
writeJSON(w, code, map[string]string{"error": err.Error()})
|
||||||
|
}
|
||||||
+100
@@ -0,0 +1,100 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func testServer(t *testing.T, token string) *httptest.Server {
|
||||||
|
t.Helper()
|
||||||
|
store, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { store.Close() })
|
||||||
|
srv := httptest.NewServer(newServer(store, token))
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
return srv
|
||||||
|
}
|
||||||
|
|
||||||
|
func post(t *testing.T, srv *httptest.Server, token, path string, body any) *http.Response {
|
||||||
|
t.Helper()
|
||||||
|
b, _ := json.Marshal(body)
|
||||||
|
req, _ := http.NewRequest("POST", srv.URL+path, bytes.NewReader(b))
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
if token != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
}
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("POST %s: %v", path, err)
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestServerEndToEnd: run -> reports -> grade -> scoreboard over HTTP.
|
||||||
|
func TestServerEndToEnd(t *testing.T) {
|
||||||
|
srv := testServer(t, "")
|
||||||
|
|
||||||
|
if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 120}); resp.StatusCode != 200 {
|
||||||
|
t.Fatalf("POST /runs = %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := post(t, srv, "", "/reports", []ReportIn{
|
||||||
|
{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 7, Title: "leak", Model: "m", Provider: "p", RunID: "r1"},
|
||||||
|
})
|
||||||
|
if resp.StatusCode != 200 {
|
||||||
|
t.Fatalf("POST /reports = %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
var rep struct {
|
||||||
|
FindingIDs []string `json:"finding_ids"`
|
||||||
|
}
|
||||||
|
json.NewDecoder(resp.Body).Decode(&rep)
|
||||||
|
if len(rep.FindingIDs) != 1 {
|
||||||
|
t.Fatalf("want 1 finding id, got %v", rep.FindingIDs)
|
||||||
|
}
|
||||||
|
id := rep.FindingIDs[0]
|
||||||
|
|
||||||
|
if resp := post(t, srv, "", "/findings/"+id+"/grade", Grade{IsReal: true, Severity: "medium", Grader: "claude"}); resp.StatusCode != 200 {
|
||||||
|
t.Fatalf("POST grade = %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = mustGet(t, srv, "", "/scoreboard")
|
||||||
|
var board []ModelStat
|
||||||
|
json.NewDecoder(resp.Body).Decode(&board)
|
||||||
|
if len(board) != 1 || board[0].Confirmed != 1 || board[0].BySeverity["medium"] != 1 || board[0].Minutes != 2 {
|
||||||
|
t.Fatalf("unexpected scoreboard: %+v", board)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestServerAuth: a set token gates writes but leaves /healthz open.
|
||||||
|
func TestServerAuth(t *testing.T) {
|
||||||
|
srv := testServer(t, "secret")
|
||||||
|
|
||||||
|
if resp := post(t, srv, "", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != http.StatusUnauthorized {
|
||||||
|
t.Errorf("unauthenticated POST = %d, want 401", resp.StatusCode)
|
||||||
|
}
|
||||||
|
if resp := post(t, srv, "secret", "/runs", Run{RunID: "r1", Model: "m"}); resp.StatusCode != 200 {
|
||||||
|
t.Errorf("authenticated POST = %d, want 200", resp.StatusCode)
|
||||||
|
}
|
||||||
|
if resp := mustGet(t, srv, "", "/healthz"); resp.StatusCode != 200 {
|
||||||
|
t.Errorf("healthz should be open, got %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustGet(t *testing.T, srv *httptest.Server, token, path string) *http.Response {
|
||||||
|
t.Helper()
|
||||||
|
req, _ := http.NewRequest("GET", srv.URL+path, nil)
|
||||||
|
if token != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
}
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GET %s: %v", path, err)
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
}
|
||||||
@@ -0,0 +1,447 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
_ "modernc.org/sqlite"
|
||||||
|
)
|
||||||
|
|
||||||
|
// gadfly-reports stores only RAW review facts: which model reported which finding, how
|
||||||
|
// long each model's review took, and a human/Claude grade (is_real + severity +
|
||||||
|
// usefulness). It deliberately does NOT compute points or rankings — the
|
||||||
|
// dashboard owns the scoring curve (severity -> points, value-per-minute), so it
|
||||||
|
// can be retuned without re-scoring or migrating stored data. The severity
|
||||||
|
// vocabulary below is the only scoring-related contract.
|
||||||
|
|
||||||
|
// validSeverities is the closed set a grade may assign to a REAL finding. The
|
||||||
|
// client maps these to points however it likes (e.g. trivial=1 … critical=20).
|
||||||
|
var validSeverities = map[string]bool{
|
||||||
|
"trivial": true,
|
||||||
|
"small": true,
|
||||||
|
"medium": true,
|
||||||
|
"high": true,
|
||||||
|
"critical": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
const schema = `
|
||||||
|
CREATE TABLE IF NOT EXISTS runs (
|
||||||
|
run_id TEXT PRIMARY KEY,
|
||||||
|
repo TEXT NOT NULL,
|
||||||
|
pr INTEGER NOT NULL,
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
provider TEXT NOT NULL,
|
||||||
|
lenses INTEGER NOT NULL DEFAULT 0,
|
||||||
|
duration_secs REAL NOT NULL DEFAULT 0,
|
||||||
|
input_tokens INTEGER,
|
||||||
|
output_tokens INTEGER,
|
||||||
|
cost_usd REAL,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS findings (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
repo TEXT NOT NULL,
|
||||||
|
pr INTEGER NOT NULL,
|
||||||
|
lens TEXT NOT NULL,
|
||||||
|
file TEXT,
|
||||||
|
line INTEGER,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
first_seen TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS reports (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
finding_id TEXT NOT NULL,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
provider TEXT NOT NULL,
|
||||||
|
raw_severity TEXT,
|
||||||
|
detail TEXT,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
UNIQUE(finding_id, run_id)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reports_finding ON reports(finding_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reports_model ON reports(model);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS grades (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
finding_id TEXT NOT NULL,
|
||||||
|
is_real INTEGER NOT NULL,
|
||||||
|
severity TEXT,
|
||||||
|
usefulness INTEGER,
|
||||||
|
notes TEXT,
|
||||||
|
grader TEXT,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_grades_finding ON grades(finding_id);
|
||||||
|
|
||||||
|
-- latest_grades: the most recent grade per finding (grade history is kept; the
|
||||||
|
-- latest wins). Used by every read path so a re-grade supersedes the old one.
|
||||||
|
CREATE VIEW IF NOT EXISTS latest_grades AS
|
||||||
|
SELECT g.* FROM grades g
|
||||||
|
JOIN (SELECT finding_id, MAX(id) AS max_id FROM grades GROUP BY finding_id) m
|
||||||
|
ON g.id = m.max_id;
|
||||||
|
`
|
||||||
|
|
||||||
|
// Store is the SQLite-backed fact store.
|
||||||
|
type Store struct{ db *sql.DB }
|
||||||
|
|
||||||
|
// Open opens (creating if needed) the SQLite database at path and applies the
|
||||||
|
// schema. WAL + a busy timeout keep the single-writer daemon honest under the
|
||||||
|
// occasional concurrent reader.
|
||||||
|
func Open(path string) (*Store, error) {
|
||||||
|
db, err := sql.Open("sqlite", "file:"+path+"?_pragma=busy_timeout(5000)&_pragma=journal_mode(WAL)&_pragma=foreign_keys(on)")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("open %s: %w", path, err)
|
||||||
|
}
|
||||||
|
// modernc's pure-Go driver is happiest with a single writer connection.
|
||||||
|
db.SetMaxOpenConns(1)
|
||||||
|
if _, err := db.Exec(schema); err != nil {
|
||||||
|
db.Close()
|
||||||
|
return nil, fmt.Errorf("migrate: %w", err)
|
||||||
|
}
|
||||||
|
return &Store{db: db}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Store) Close() error { return s.db.Close() }
|
||||||
|
|
||||||
|
func now() string { return time.Now().UTC().Format(time.RFC3339) }
|
||||||
|
|
||||||
|
// findingID content-addresses a finding by location, NOT by wording, so the same
|
||||||
|
// issue raised by different models (or re-raised on a re-review) collapses to one
|
||||||
|
// finding with many reports — that collapse is what makes cross-model consensus
|
||||||
|
// and per-model precision measurable. Title is intentionally excluded.
|
||||||
|
func findingID(repo string, pr int, lens, file string, line int) string {
|
||||||
|
key := fmt.Sprintf("%s|%d|%s|%s|%d",
|
||||||
|
strings.TrimSpace(repo), pr, strings.ToLower(strings.TrimSpace(lens)),
|
||||||
|
strings.TrimSpace(file), line)
|
||||||
|
sum := sha256.Sum256([]byte(key))
|
||||||
|
return hex.EncodeToString(sum[:])[:16]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run is one model's review of one PR — the unit run.sh times.
|
||||||
|
type Run struct {
|
||||||
|
RunID string `json:"run_id"`
|
||||||
|
Repo string `json:"repo"`
|
||||||
|
PR int `json:"pr"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
Provider string `json:"provider"`
|
||||||
|
Lenses int `json:"lenses"`
|
||||||
|
DurationSecs float64 `json:"duration_secs"`
|
||||||
|
InputTokens *int64 `json:"input_tokens,omitempty"`
|
||||||
|
OutputTokens *int64 `json:"output_tokens,omitempty"`
|
||||||
|
CostUSD *float64 `json:"cost_usd,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddRun upserts a run by run_id (a re-posted run overwrites timing/tokens).
|
||||||
|
func (s *Store) AddRun(r Run) error {
|
||||||
|
if strings.TrimSpace(r.RunID) == "" || strings.TrimSpace(r.Model) == "" {
|
||||||
|
return fmt.Errorf("run_id and model are required")
|
||||||
|
}
|
||||||
|
_, err := s.db.Exec(`
|
||||||
|
INSERT INTO runs (run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens, output_tokens, cost_usd, created_at)
|
||||||
|
VALUES (?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
ON CONFLICT(run_id) DO UPDATE SET
|
||||||
|
repo=excluded.repo, pr=excluded.pr, model=excluded.model, provider=excluded.provider,
|
||||||
|
lenses=excluded.lenses, duration_secs=excluded.duration_secs,
|
||||||
|
input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens, cost_usd=excluded.cost_usd`,
|
||||||
|
r.RunID, r.Repo, r.PR, r.Model, r.Provider, r.Lenses, r.DurationSecs,
|
||||||
|
r.InputTokens, r.OutputTokens, r.CostUSD, now())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReportIn is one finding as a single model reported it.
|
||||||
|
type ReportIn struct {
|
||||||
|
Repo string `json:"repo"`
|
||||||
|
PR int `json:"pr"`
|
||||||
|
Lens string `json:"lens"`
|
||||||
|
File string `json:"file"`
|
||||||
|
Line int `json:"line"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
Provider string `json:"provider"`
|
||||||
|
RunID string `json:"run_id"`
|
||||||
|
RawSeverity string `json:"raw_severity"`
|
||||||
|
Detail string `json:"detail"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddReports records a batch of findings: each upserts its (content-addressed)
|
||||||
|
// finding row and adds this model's report of it. Returns the finding id per
|
||||||
|
// input (same order). A model re-reporting the same finding in the same run is a
|
||||||
|
// no-op (UNIQUE finding_id,run_id).
|
||||||
|
func (s *Store) AddReports(in []ReportIn) ([]string, error) {
|
||||||
|
tx, err := s.db.Begin()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
ts := now()
|
||||||
|
ids := make([]string, len(in))
|
||||||
|
for i, r := range in {
|
||||||
|
if strings.TrimSpace(r.Title) == "" || strings.TrimSpace(r.Lens) == "" {
|
||||||
|
return nil, fmt.Errorf("report %d: lens and title are required", i)
|
||||||
|
}
|
||||||
|
id := findingID(r.Repo, r.PR, r.Lens, r.File, r.Line)
|
||||||
|
ids[i] = id
|
||||||
|
if _, err := tx.Exec(`
|
||||||
|
INSERT INTO findings (id, repo, pr, lens, file, line, title, first_seen)
|
||||||
|
VALUES (?,?,?,?,?,?,?,?) ON CONFLICT(id) DO NOTHING`,
|
||||||
|
id, r.Repo, r.PR, strings.ToLower(strings.TrimSpace(r.Lens)), r.File, r.Line, r.Title, ts); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if _, err := tx.Exec(`
|
||||||
|
INSERT INTO reports (finding_id, run_id, model, provider, raw_severity, detail, created_at)
|
||||||
|
VALUES (?,?,?,?,?,?,?) ON CONFLICT(finding_id, run_id) DO NOTHING`,
|
||||||
|
id, r.RunID, r.Model, r.Provider, r.RawSeverity, r.Detail, ts); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ids, tx.Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Grade is a triage verdict on a finding. Severity is required when is_real and
|
||||||
|
// must be one of validSeverities; it is cleared when !is_real. No points here —
|
||||||
|
// the client maps severity -> points.
|
||||||
|
type Grade struct {
|
||||||
|
FindingID string `json:"finding_id"`
|
||||||
|
IsReal bool `json:"is_real"`
|
||||||
|
Severity string `json:"severity,omitempty"`
|
||||||
|
Usefulness *int `json:"usefulness,omitempty"`
|
||||||
|
Notes string `json:"notes,omitempty"`
|
||||||
|
Grader string `json:"grader,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddGrade appends a grade (history is kept; latest wins).
|
||||||
|
func (s *Store) AddGrade(g Grade) error {
|
||||||
|
if strings.TrimSpace(g.FindingID) == "" {
|
||||||
|
return fmt.Errorf("finding_id is required")
|
||||||
|
}
|
||||||
|
var exists bool
|
||||||
|
if err := s.db.QueryRow(`SELECT EXISTS(SELECT 1 FROM findings WHERE id=?)`, g.FindingID).Scan(&exists); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !exists {
|
||||||
|
return fmt.Errorf("unknown finding_id %q", g.FindingID)
|
||||||
|
}
|
||||||
|
sev := strings.ToLower(strings.TrimSpace(g.Severity))
|
||||||
|
if g.IsReal {
|
||||||
|
if !validSeverities[sev] {
|
||||||
|
return fmt.Errorf("severity %q invalid for a real finding (want one of: %s)", g.Severity, strings.Join(sortedSeverities(), ", "))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sev = "" // a false positive carries no severity
|
||||||
|
}
|
||||||
|
if g.Usefulness != nil && (*g.Usefulness < 1 || *g.Usefulness > 5) {
|
||||||
|
return fmt.Errorf("usefulness must be 1..5, got %d", *g.Usefulness)
|
||||||
|
}
|
||||||
|
_, err := s.db.Exec(`
|
||||||
|
INSERT INTO grades (finding_id, is_real, severity, usefulness, notes, grader, created_at)
|
||||||
|
VALUES (?,?,?,?,?,?,?)`,
|
||||||
|
g.FindingID, g.IsReal, nullStr(sev), g.Usefulness, nullStr(g.Notes), nullStr(g.Grader), now())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExportRow is one report joined with its finding, run timing, and latest grade
|
||||||
|
// — the flat shape a dashboard consumes. Grade fields are nil/empty until graded.
|
||||||
|
type ExportRow struct {
|
||||||
|
FindingID string `json:"finding_id"`
|
||||||
|
Repo string `json:"repo"`
|
||||||
|
PR int `json:"pr"`
|
||||||
|
Lens string `json:"lens"`
|
||||||
|
File string `json:"file,omitempty"`
|
||||||
|
Line int `json:"line,omitempty"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
Provider string `json:"provider,omitempty"`
|
||||||
|
RunID string `json:"run_id"`
|
||||||
|
RawSeverity string `json:"raw_severity,omitempty"`
|
||||||
|
ReportedAt string `json:"reported_at"`
|
||||||
|
DurationSecs float64 `json:"duration_secs"`
|
||||||
|
InputTokens *int64 `json:"input_tokens,omitempty"`
|
||||||
|
OutputTokens *int64 `json:"output_tokens,omitempty"`
|
||||||
|
Graded bool `json:"graded"`
|
||||||
|
IsReal *bool `json:"is_real,omitempty"`
|
||||||
|
Severity string `json:"severity,omitempty"`
|
||||||
|
Usefulness *int `json:"usefulness,omitempty"`
|
||||||
|
Notes string `json:"notes,omitempty"`
|
||||||
|
Grader string `json:"grader,omitempty"`
|
||||||
|
GradedAt string `json:"graded_at,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export returns every report joined with finding, run timing, and latest grade,
|
||||||
|
// oldest first. The dashboard does all weighting from these raw rows.
|
||||||
|
func (s *Store) Export() ([]ExportRow, error) {
|
||||||
|
rows, err := s.db.Query(`
|
||||||
|
SELECT r.finding_id, f.repo, f.pr, f.lens, f.file, f.line, f.title,
|
||||||
|
r.model, r.provider, r.run_id, r.raw_severity, r.created_at,
|
||||||
|
COALESCE(ru.duration_secs, 0), ru.input_tokens, ru.output_tokens,
|
||||||
|
lg.is_real, lg.severity, lg.usefulness, lg.notes, lg.grader, lg.created_at
|
||||||
|
FROM reports r
|
||||||
|
JOIN findings f ON f.id = r.finding_id
|
||||||
|
LEFT JOIN runs ru ON ru.run_id = r.run_id
|
||||||
|
LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
|
||||||
|
ORDER BY r.created_at, r.id`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
var out []ExportRow
|
||||||
|
for rows.Next() {
|
||||||
|
var e ExportRow
|
||||||
|
var file, rawSev, sev, notes, grader, gradedAt sql.NullString
|
||||||
|
var line sql.NullInt64
|
||||||
|
var isReal sql.NullBool
|
||||||
|
var useful sql.NullInt64
|
||||||
|
if err := rows.Scan(&e.FindingID, &e.Repo, &e.PR, &e.Lens, &file, &line, &e.Title,
|
||||||
|
&e.Model, &e.Provider, &e.RunID, &rawSev, &e.ReportedAt,
|
||||||
|
&e.DurationSecs, &e.InputTokens, &e.OutputTokens,
|
||||||
|
&isReal, &sev, &useful, ¬es, &grader, &gradedAt); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
e.File, e.Line = file.String, int(line.Int64)
|
||||||
|
e.RawSeverity = rawSev.String
|
||||||
|
if isReal.Valid {
|
||||||
|
e.Graded = true
|
||||||
|
v := isReal.Bool
|
||||||
|
e.IsReal = &v
|
||||||
|
e.Severity, e.Notes, e.Grader, e.GradedAt = sev.String, notes.String, grader.String, gradedAt.String
|
||||||
|
if useful.Valid {
|
||||||
|
u := int(useful.Int64)
|
||||||
|
e.Usefulness = &u
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out = append(out, e)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModelStat is the per-model rollup the scoreboard returns. It is intentionally
|
||||||
|
// POINTS-FREE: raw minutes/tokens and a confirmed-by-severity histogram, so the
|
||||||
|
// client applies its own weights for points and value-per-minute/token.
|
||||||
|
type ModelStat struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Provider string `json:"provider,omitempty"`
|
||||||
|
Runs int `json:"runs"`
|
||||||
|
Minutes float64 `json:"minutes"`
|
||||||
|
InputTokens int64 `json:"input_tokens"`
|
||||||
|
OutputTokens int64 `json:"output_tokens"`
|
||||||
|
Findings int `json:"findings"`
|
||||||
|
Confirmed int `json:"confirmed"`
|
||||||
|
FalsePositive int `json:"false_positive"`
|
||||||
|
Ungraded int `json:"ungraded"`
|
||||||
|
BySeverity map[string]int `json:"by_severity"` // confirmed findings per severity
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scoreboard rolls runs + reports + latest grades up per model. All counts of
|
||||||
|
// findings are DISTINCT by finding (a model re-reporting across runs counts once).
|
||||||
|
func (s *Store) Scoreboard() ([]ModelStat, error) {
|
||||||
|
stats := map[string]*ModelStat{}
|
||||||
|
get := func(model, provider string) *ModelStat {
|
||||||
|
m, ok := stats[model]
|
||||||
|
if !ok {
|
||||||
|
m = &ModelStat{Model: model, Provider: provider, BySeverity: map[string]int{}}
|
||||||
|
stats[model] = m
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runs: minutes + tokens + run counts.
|
||||||
|
rrows, err := s.db.Query(`
|
||||||
|
SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0),
|
||||||
|
COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0)
|
||||||
|
FROM runs GROUP BY model, provider`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for rrows.Next() {
|
||||||
|
var model, provider string
|
||||||
|
var runs int
|
||||||
|
var dur float64
|
||||||
|
var in, out int64
|
||||||
|
if err := rrows.Scan(&model, &provider, &runs, &dur, &in, &out); err != nil {
|
||||||
|
rrows.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
m := get(model, provider)
|
||||||
|
m.Runs += runs
|
||||||
|
m.Minutes += dur / 60
|
||||||
|
m.InputTokens += in
|
||||||
|
m.OutputTokens += out
|
||||||
|
}
|
||||||
|
rrows.Close()
|
||||||
|
|
||||||
|
// Findings: distinct per model, split by latest-grade state.
|
||||||
|
frows, err := s.db.Query(`
|
||||||
|
SELECT r.model,
|
||||||
|
COUNT(DISTINCT r.finding_id),
|
||||||
|
COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END),
|
||||||
|
COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END),
|
||||||
|
COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END)
|
||||||
|
FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
|
||||||
|
GROUP BY r.model`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for frows.Next() {
|
||||||
|
var model string
|
||||||
|
var total, confirmed, fp, ungraded int
|
||||||
|
if err := frows.Scan(&model, &total, &confirmed, &fp, &ungraded); err != nil {
|
||||||
|
frows.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
m := get(model, "")
|
||||||
|
m.Findings, m.Confirmed, m.FalsePositive, m.Ungraded = total, confirmed, fp, ungraded
|
||||||
|
}
|
||||||
|
frows.Close()
|
||||||
|
|
||||||
|
// Confirmed-by-severity histogram (distinct findings).
|
||||||
|
srows, err := s.db.Query(`
|
||||||
|
SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id)
|
||||||
|
FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id
|
||||||
|
WHERE lg.is_real=1 AND lg.severity IS NOT NULL
|
||||||
|
GROUP BY r.model, lg.severity`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for srows.Next() {
|
||||||
|
var model, sev string
|
||||||
|
var n int
|
||||||
|
if err := srows.Scan(&model, &sev, &n); err != nil {
|
||||||
|
srows.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
get(model, "").BySeverity[sev] = n
|
||||||
|
}
|
||||||
|
srows.Close()
|
||||||
|
|
||||||
|
out := make([]ModelStat, 0, len(stats))
|
||||||
|
for _, m := range stats {
|
||||||
|
out = append(out, *m)
|
||||||
|
}
|
||||||
|
sort.Slice(out, func(i, j int) bool { return out[i].Model < out[j].Model })
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortedSeverities() []string {
|
||||||
|
out := make([]string, 0, len(validSeverities))
|
||||||
|
for s := range validSeverities {
|
||||||
|
out = append(out, s)
|
||||||
|
}
|
||||||
|
sort.Strings(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func nullStr(s string) any {
|
||||||
|
if s == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
+132
@@ -0,0 +1,132 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func testStore(t *testing.T) *Store {
|
||||||
|
t.Helper()
|
||||||
|
s, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { s.Close() })
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func i64(v int64) *int64 { return &v }
|
||||||
|
func intp(v int) *int { return &v }
|
||||||
|
|
||||||
|
// TestConsensusAndGrade: two models reporting the SAME location collapse to one
|
||||||
|
// finding with two reports; a single grade applies to both models' scoreboards.
|
||||||
|
func TestConsensusAndGrade(t *testing.T) {
|
||||||
|
s := testStore(t)
|
||||||
|
|
||||||
|
if err := s.AddRun(Run{RunID: "r-cloud", Repo: "steve/x", PR: 2, Model: "minimax", Provider: "ollama-cloud", Lenses: 3, DurationSecs: 300, InputTokens: i64(1000), OutputTokens: i64(500)}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := s.AddRun(Run{RunID: "r-m1", Repo: "steve/x", PR: 2, Model: "qwen3", Provider: "m1", Lenses: 3, DurationSecs: 1740}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Both models flag the same file:line under the same lens.
|
||||||
|
ids, err := s.AddReports([]ReportIn{
|
||||||
|
{Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "SetIteration never called", Model: "minimax", Provider: "ollama-cloud", RunID: "r-cloud", RawSeverity: "Blocking"},
|
||||||
|
{Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "iteration counter dead", Model: "qwen3", Provider: "m1", RunID: "r-m1", RawSeverity: "Blocking"},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if ids[0] != ids[1] {
|
||||||
|
t.Fatalf("same location should collapse to one finding id, got %q and %q", ids[0], ids[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high", Usefulness: intp(4), Grader: "claude"}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
board, err := s.Scoreboard()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
byModel := map[string]ModelStat{}
|
||||||
|
for _, m := range board {
|
||||||
|
byModel[m.Model] = m
|
||||||
|
}
|
||||||
|
for _, name := range []string{"minimax", "qwen3"} {
|
||||||
|
m := byModel[name]
|
||||||
|
if m.Findings != 1 || m.Confirmed != 1 || m.BySeverity["high"] != 1 {
|
||||||
|
t.Errorf("%s: findings=%d confirmed=%d high=%d, want 1/1/1", name, m.Findings, m.Confirmed, m.BySeverity["high"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := byModel["minimax"].Minutes; got != 5 {
|
||||||
|
t.Errorf("minimax minutes = %v, want 5", got)
|
||||||
|
}
|
||||||
|
if got := byModel["qwen3"].Minutes; got != 29 {
|
||||||
|
t.Errorf("qwen3 minutes = %v, want 29", got)
|
||||||
|
}
|
||||||
|
if got := byModel["minimax"].InputTokens; got != 1000 {
|
||||||
|
t.Errorf("minimax input_tokens = %d, want 1000", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestLatestGradeWins: a re-grade supersedes the prior one everywhere.
|
||||||
|
func TestLatestGradeWins(t *testing.T) {
|
||||||
|
s := testStore(t)
|
||||||
|
if err := s.AddRun(Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
ids, err := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 5, Title: "x", Model: "m", Provider: "p", RunID: "r1"}})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
id := ids[0]
|
||||||
|
if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "critical"}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
board, _ := s.Scoreboard()
|
||||||
|
m := board[0]
|
||||||
|
if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 {
|
||||||
|
t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGradeValidation rejects bad severity / usefulness / unknown finding.
|
||||||
|
func TestGradeValidation(t *testing.T) {
|
||||||
|
s := testStore(t)
|
||||||
|
ids, _ := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "perf", File: "a.go", Line: 1, Title: "t", Model: "m", Provider: "p", RunID: "r1"}})
|
||||||
|
id := ids[0]
|
||||||
|
|
||||||
|
if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "huge"}); err == nil {
|
||||||
|
t.Error("expected error for invalid severity")
|
||||||
|
}
|
||||||
|
if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "high", Usefulness: intp(9)}); err == nil {
|
||||||
|
t.Error("expected error for out-of-range usefulness")
|
||||||
|
}
|
||||||
|
if err := s.AddGrade(Grade{FindingID: "nope", IsReal: true, Severity: "high"}); err == nil {
|
||||||
|
t.Error("expected error for unknown finding")
|
||||||
|
}
|
||||||
|
// A false positive needs no severity.
|
||||||
|
if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil {
|
||||||
|
t.Errorf("false positive without severity should be valid: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestFindingIDLocationKeyed: id depends on location, not wording; line matters.
|
||||||
|
func TestFindingIDLocationKeyed(t *testing.T) {
|
||||||
|
a := findingID("r", 1, "security", "a.go", 10)
|
||||||
|
sameWordingDiff := findingID("r", 1, "security", "a.go", 10) // any title — id ignores it
|
||||||
|
if a != sameWordingDiff {
|
||||||
|
t.Error("same location must yield same id regardless of wording")
|
||||||
|
}
|
||||||
|
if a == findingID("r", 1, "security", "a.go", 11) {
|
||||||
|
t.Error("different line must yield different id")
|
||||||
|
}
|
||||||
|
if a == findingID("r", 1, "correctness", "a.go", 10) {
|
||||||
|
t.Error("different lens must yield different id")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user