diff --git a/Dockerfile b/Dockerfile index 19974ec..7e1efa8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,5 +15,10 @@ ENV GADFLY_REPORTS_ADDR=:8090 \ GADFLY_REPORTS_DB=/data/gadfly-reports.db EXPOSE 8090 VOLUME ["/data"] +# Fast probe so an orchestrator (e.g. Traefik) resumes routing within a few seconds +# of a (re)start — the daemon binds the port in milliseconds. First probe at +# --interval (5s); --start-period keeps early failures from flapping the status. +HEALTHCHECK --interval=5s --timeout=3s --start-period=10s --retries=3 \ + CMD wget -q -O - http://localhost:8090/healthz || exit 1 ENTRYPOINT ["/usr/local/bin/gadfly-reports"] CMD ["serve"] diff --git a/README.md b/README.md index d4c934e..1b0c8b3 100644 --- a/README.md +++ b/README.md @@ -57,9 +57,14 @@ services: networks: [traefik] healthcheck: test: ["CMD", "wget", "-q", "-O", "-", "http://localhost:8090/healthz"] - interval: 30s - timeout: 5s + # Fast probe so Traefik resumes routing within ~1s of a restart (the daemon + # binds the port in milliseconds). Without a fast probe Traefik 502s until the + # first check — the usual "why is it down for 30s after restart". + interval: 5s + timeout: 3s retries: 3 + start_period: 10s + start_interval: 1s # probe every 1s during start_period (needs Docker 25+) labels: - "traefik.enable=true" - "traefik.http.routers.gadfly-reports.rule=Host(`reports.example.com`)" @@ -83,6 +88,13 @@ Traefik bits to your setup — the **host** (`reports.example.com`), the **entry to the container's `:8090`. Then point `gadfly`'s `GADFLY_FINDINGS_URL` and `gadfly-mcp`'s `--store` at `https://reports.example.com` (with the same token). +On `docker compose pull && docker compose up -d`, the fast healthcheck lets Traefik resume routing +within ~1s (the daemon starts in milliseconds — Traefik just won't route to a container whose health +probe hasn't passed yet, which is the "down for 30s after restart" gotcha). Your data lives on the +`gadfly-reports-data` volume and survives restarts; the only loss exposure is a review POSTing +findings during that ~1s window, since gadfly's emit is fire-and-forget (no retry) — negligible +against reviews that take minutes. + ## HTTP API (the canonical contract) | Method & path | Body / query | Purpose | @@ -143,6 +155,10 @@ number of models that reported one is known, so a confirmed finding that **only The `solo` column counts those. This is derived from the data (reporter count); the grader never has to flag it. Set the bonus to `1` to disable. +Its mirror, **solo-error penalty ×** (default `1.5`), multiplies the FP penalty when a false positive +was made by **only that model** — a unique wrong claim is noisier than a shared mistake. So a +Blocking-claimed solo FP costs `high(8) × -0.5 × 1.5 = -6` vs `-4` for a shared one. Set to `1` to disable. + Auth: the `/ui` shell is public (it holds no data); paste the store token into its **connect** box, or open `/ui?token=` once (remembered in `localStorage`). Prefer your own dashboard? Point Grafana/Metabase/etc. at the SQLite file or the same `/export` + `/scoreboard` + `/runs` JSON. diff --git a/ui.html b/ui.html index e114f46..b5b2157 100644 --- a/ui.html +++ b/ui.html @@ -81,6 +81,7 @@ critical false-positive penalty × solo-find bonus × + solo-error penalty × @@ -168,6 +169,7 @@ function curve(){ } function fpMult(){ const v = parseFloat(document.getElementById("fp_mult").value); return isNaN(v) ? 0 : v; } function soloBonus(){ const v = parseFloat(document.getElementById("solo_bonus").value); return isNaN(v) ? 1 : v; } +function soloErr(){ const v = parseFloat(document.getElementById("solo_err").value); return isNaN(v) ? 1 : v; } // A false positive has no graded severity, so penalize it by the severity the // MODEL claimed — its lens verdict (raw_severity) — mapped onto the curve. The // louder the wrong cry, the bigger the penalty. @@ -235,7 +237,7 @@ function aggregate(f){ else { m.ungraded.add(r.finding_id); } } - const fpm = fpMult(), sb = soloBonus(); + const fpm = fpMult(), sb = soloBonus(), se = soloErr(); const out = [...M.values()].map(m => { const sevCounts = Object.fromEntries(SEVS.map(s=>[s,0])); let confirmedPoints = 0, solo = 0; @@ -245,7 +247,7 @@ function aggregate(f){ if (isSolo) solo++; confirmedPoints += (c[sevv] || 0) * (isSolo ? sb : 1); } - let fpPen = 0; for (const k of m.fp.values()) fpPen += (c[k]||0) * fpm; // negative when fpm<0 + let fpPen = 0; for (const [fid, k] of m.fp){ const soloE = (reporters.get(fid)?.size || 1) === 1; fpPen += (c[k]||0) * fpm * (soloE ? se : 1); } // solo (unique) errors penalized extra const points = confirmedPoints + fpPen; // NET: solo-boosted confirmed + FP penalty const findings = m.findings.size, confirmed = m.confirmed.size; return { model:m.model, provider:m.provider, runs:m.runs, minutes:m.minutes,