Compare commits
5 Commits
14cbee8e25
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| e381c0ad41 | |||
| 7fce78a664 | |||
| 1af115fdf1 | |||
| 2f003dd132 | |||
| dd8ada479e |
@@ -106,7 +106,7 @@ against reviews that take minutes.
|
||||
| `POST /findings/{id}/grade` | `{is_real, severity?, usefulness?, notes?, grader?}` | record a triage grade |
|
||||
| `GET /export` | — | flat report×finding×run×latest-grade rows — the dashboard feed |
|
||||
| `GET /runs` | — | list all runs (timing/tokens), oldest first |
|
||||
| `GET /scoreboard` | — | points-free per-model rollup |
|
||||
| `GET /scoreboard` | `?repo=<repo>` `&pr=<n>` (repeatable or comma-list, e.g. `?pr=10,11`) | points-free per-model rollup, optionally narrowed to specific PRs so models are compared on the same work |
|
||||
|
||||
`POST /runs` body: `{run_id, repo, pr, model, provider, lenses, duration_secs, input_tokens?, output_tokens?, cost_usd?}`
|
||||
(re-posting the same `run_id` updates it).
|
||||
@@ -138,6 +138,14 @@ ungraded, points, **points-per-minute**, points-per-run, by-severity — with **
|
||||
(date range, repo, provider, model, lens, grade/severity), free-text search, and a click-to-scope
|
||||
findings detail table.
|
||||
|
||||
Comparisons can be scoped by **excluding PRs**: the **PRs** button opens a searchable checkbox popup
|
||||
listing every `repo#pr` newest-first, each with model coverage and last-review date
|
||||
(`steve/x#12 · 3/5 models · 2026-07-01`) — untick a PR and the entire table (runs, minutes,
|
||||
findings, points) stops counting it. It's an *exclusion* (not an opt-in) so it persists in
|
||||
`localStorage` and **new PRs are included automatically** as they arrive; reset doesn't touch it.
|
||||
**all**/**none** apply to the current search, so you can filter to a repo and exclude or restore all
|
||||
its PRs in one click.
|
||||
|
||||
True to the store's "no points" rule, **scoring lives in the browser**: the page has an editable
|
||||
points curve (default `trivial=1, small=3, medium=5, high=8, critical=20`) and computes
|
||||
`points = Σ weight[severity]·count` and `value/min = points / minutes` on the fly — retune it without
|
||||
@@ -159,6 +167,12 @@ Its mirror, **solo-error penalty ×** (default `1.5`), multiplies the FP penalty
|
||||
was made by **only that model** — a unique wrong claim is noisier than a shared mistake. So a
|
||||
Blocking-claimed solo FP costs `high(8) × -0.5 × 1.5 = -6` vs `-4` for a shared one. Set to `1` to disable.
|
||||
|
||||
**Hiding models.** The **models shown** button opens the same style of popup with a checkbox per
|
||||
model — untick to hide one (handy for retired ones, e.g. `m1`), re-tick to restore. Hidden models
|
||||
drop out of the table, the totals, and the findings drill-down (but **not** from solo-ness, which
|
||||
stays computed against all models — hiding is a view filter, not a rescoring). The hidden set
|
||||
persists in `localStorage` across reloads; unlike the other filters, **reset** doesn't touch it.
|
||||
|
||||
Auth: the `/ui` shell is public (it holds no data); paste the store token into its **connect** box,
|
||||
or open `/ui?token=<token>` once (remembered in `localStorage`). Prefer your own dashboard? Point
|
||||
Grafana/Metabase/etc. at the SQLite file or the same `/export` + `/scoreboard` + `/runs` JSON.
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"log"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -21,7 +22,9 @@ import (
|
||||
// GET /healthz liveness (public)
|
||||
// GET /runs list all runs (timing/tokens), oldest first
|
||||
// GET /export flat report×finding×grade rows (the dashboard feed)
|
||||
// GET /scoreboard points-free per-model rollup
|
||||
// GET /scoreboard points-free per-model rollup; ?repo= and ?pr= (repeatable
|
||||
// or comma-list) narrow it to specific PRs so models are
|
||||
// compared on the same work
|
||||
// POST /runs upsert one run (model review of a PR; timing/tokens)
|
||||
// POST /reports record a batch of findings + this model's reports
|
||||
// POST /findings/{id}/grade record a triage grade (is_real, severity, …)
|
||||
@@ -79,8 +82,24 @@ func newServer(store *Store, token string) http.Handler {
|
||||
writeJSON(w, http.StatusOK, rows)
|
||||
})
|
||||
|
||||
mux.HandleFunc("GET /scoreboard", func(w http.ResponseWriter, _ *http.Request) {
|
||||
stats, err := store.Scoreboard()
|
||||
mux.HandleFunc("GET /scoreboard", func(w http.ResponseWriter, r *http.Request) {
|
||||
f := ScoreboardFilter{Repo: r.URL.Query().Get("repo")}
|
||||
// pr is repeatable and accepts comma lists: ?pr=1&pr=2 or ?pr=1,2
|
||||
for _, v := range r.URL.Query()["pr"] {
|
||||
for part := range strings.SplitSeq(v, ",") {
|
||||
part = strings.TrimSpace(part)
|
||||
if part == "" {
|
||||
continue
|
||||
}
|
||||
n, err := strconv.Atoi(part)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusBadRequest, errors.New("invalid pr number: "+part))
|
||||
return
|
||||
}
|
||||
f.PRs = append(f.PRs, n)
|
||||
}
|
||||
}
|
||||
stats, err := store.Scoreboard(f)
|
||||
if err != nil {
|
||||
writeErr(w, http.StatusInternalServerError, err)
|
||||
return
|
||||
|
||||
@@ -72,6 +72,33 @@ func TestServerEndToEnd(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestScoreboardQueryFilter: ?repo= and ?pr= narrow the scoreboard; a bad pr is a 400.
|
||||
func TestScoreboardQueryFilter(t *testing.T) {
|
||||
srv := testServer(t, "")
|
||||
|
||||
post(t, srv, "", "/runs", Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60})
|
||||
post(t, srv, "", "/runs", Run{RunID: "r2", Repo: "r", PR: 2, Model: "m", Provider: "p", DurationSecs: 60})
|
||||
post(t, srv, "", "/runs", Run{RunID: "r3", Repo: "other", PR: 1, Model: "m", Provider: "p", DurationSecs: 60})
|
||||
|
||||
resp := mustGet(t, srv, "", "/scoreboard?repo=r&pr=1,2")
|
||||
var board []ModelStat
|
||||
json.NewDecoder(resp.Body).Decode(&board)
|
||||
if len(board) != 1 || board[0].Runs != 2 {
|
||||
t.Fatalf("filtered scoreboard: %+v, want 2 runs (repo 'other' excluded)", board)
|
||||
}
|
||||
|
||||
resp = mustGet(t, srv, "", "/scoreboard?pr=1")
|
||||
board = nil
|
||||
json.NewDecoder(resp.Body).Decode(&board)
|
||||
if len(board) != 1 || board[0].Runs != 2 {
|
||||
t.Fatalf("pr-only filter: %+v, want 2 runs (both repos' PR 1)", board)
|
||||
}
|
||||
|
||||
if resp := mustGet(t, srv, "", "/scoreboard?pr=abc"); resp.StatusCode != http.StatusBadRequest {
|
||||
t.Errorf("GET /scoreboard?pr=abc = %d, want 400", resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
// TestServerAuth: a set token gates writes but leaves /healthz open.
|
||||
func TestServerAuth(t *testing.T) {
|
||||
srv := testServer(t, "secret")
|
||||
|
||||
@@ -379,9 +379,42 @@ type ModelStat struct {
|
||||
BySeverity map[string]int `json:"by_severity"` // confirmed findings per severity
|
||||
}
|
||||
|
||||
// ScoreboardFilter narrows the scoreboard to a repo and/or a set of PRs, so
|
||||
// models with very different run counts can be compared on the exact same work
|
||||
// (e.g. only the PRs every model reviewed). Zero value = no filtering.
|
||||
type ScoreboardFilter struct {
|
||||
Repo string
|
||||
PRs []int
|
||||
}
|
||||
|
||||
// conds returns SQL conditions (and their args) against the given repo/pr
|
||||
// column names — runs carry repo/pr directly, reports get them via findings.
|
||||
func (f ScoreboardFilter) conds(repoCol, prCol string) ([]string, []any) {
|
||||
var conds []string
|
||||
var args []any
|
||||
if f.Repo != "" {
|
||||
conds = append(conds, repoCol+" = ?")
|
||||
args = append(args, f.Repo)
|
||||
}
|
||||
if len(f.PRs) > 0 {
|
||||
conds = append(conds, prCol+" IN (?"+strings.Repeat(",?", len(f.PRs)-1)+")")
|
||||
for _, p := range f.PRs {
|
||||
args = append(args, p)
|
||||
}
|
||||
}
|
||||
return conds, args
|
||||
}
|
||||
|
||||
func whereClause(conds []string) string {
|
||||
if len(conds) == 0 {
|
||||
return ""
|
||||
}
|
||||
return " WHERE " + strings.Join(conds, " AND ")
|
||||
}
|
||||
|
||||
// Scoreboard rolls runs + reports + latest grades up per model. All counts of
|
||||
// findings are DISTINCT by finding (a model re-reporting across runs counts once).
|
||||
func (s *Store) Scoreboard() ([]ModelStat, error) {
|
||||
func (s *Store) Scoreboard(f ScoreboardFilter) ([]ModelStat, error) {
|
||||
stats := map[string]*ModelStat{}
|
||||
get := func(model, provider string) *ModelStat {
|
||||
m, ok := stats[model]
|
||||
@@ -393,10 +426,11 @@ func (s *Store) Scoreboard() ([]ModelStat, error) {
|
||||
}
|
||||
|
||||
// Runs: minutes + tokens + run counts.
|
||||
runConds, runArgs := f.conds("repo", "pr")
|
||||
rrows, err := s.db.Query(`
|
||||
SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0),
|
||||
COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0)
|
||||
FROM runs GROUP BY model, provider`)
|
||||
FROM runs`+whereClause(runConds)+` GROUP BY model, provider`, runArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -418,14 +452,17 @@ FROM runs GROUP BY model, provider`)
|
||||
rrows.Close()
|
||||
|
||||
// Findings: distinct per model, split by latest-grade state.
|
||||
findConds, findArgs := f.conds("fi.repo", "fi.pr")
|
||||
frows, err := s.db.Query(`
|
||||
SELECT r.model,
|
||||
COUNT(DISTINCT r.finding_id),
|
||||
COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END),
|
||||
COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END),
|
||||
COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END)
|
||||
FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
|
||||
GROUP BY r.model`)
|
||||
FROM reports r
|
||||
JOIN findings fi ON fi.id = r.finding_id
|
||||
LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id`+whereClause(findConds)+`
|
||||
GROUP BY r.model`, findArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -442,11 +479,14 @@ GROUP BY r.model`)
|
||||
frows.Close()
|
||||
|
||||
// Confirmed-by-severity histogram (distinct findings).
|
||||
sevConds, sevArgs := f.conds("fi.repo", "fi.pr")
|
||||
srows, err := s.db.Query(`
|
||||
SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id)
|
||||
FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id
|
||||
WHERE lg.is_real=1 AND lg.severity IS NOT NULL
|
||||
GROUP BY r.model, lg.severity`)
|
||||
FROM reports r
|
||||
JOIN findings fi ON fi.id = r.finding_id
|
||||
JOIN latest_grades lg ON lg.finding_id = r.finding_id`+
|
||||
whereClause(append(sevConds, "lg.is_real=1", "lg.severity IS NOT NULL"))+`
|
||||
GROUP BY r.model, lg.severity`, sevArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
+54
-2
@@ -46,7 +46,7 @@ func TestConsensusAndGrade(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
board, err := s.Scoreboard()
|
||||
board, err := s.Scoreboard(ScoreboardFilter{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -88,7 +88,7 @@ func TestLatestGradeWins(t *testing.T) {
|
||||
if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive
|
||||
t.Fatal(err)
|
||||
}
|
||||
board, _ := s.Scoreboard()
|
||||
board, _ := s.Scoreboard(ScoreboardFilter{})
|
||||
m := board[0]
|
||||
if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 {
|
||||
t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"])
|
||||
@@ -116,6 +116,58 @@ func TestGradeValidation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestScoreboardFilter: narrowing to repo/PRs drops runs and findings outside
|
||||
// the selection, so a model with many extra runs is compared on the same work.
|
||||
func TestScoreboardFilter(t *testing.T) {
|
||||
s := testStore(t)
|
||||
|
||||
// fable reviewed only PR 1; veteran reviewed PRs 1 and 2 (and another repo).
|
||||
for _, r := range []Run{
|
||||
{RunID: "f-1", Repo: "steve/x", PR: 1, Model: "fable", Provider: "p", DurationSecs: 60},
|
||||
{RunID: "v-1", Repo: "steve/x", PR: 1, Model: "veteran", Provider: "p", DurationSecs: 120},
|
||||
{RunID: "v-2", Repo: "steve/x", PR: 2, Model: "veteran", Provider: "p", DurationSecs: 120},
|
||||
{RunID: "v-3", Repo: "steve/y", PR: 1, Model: "veteran", Provider: "p", DurationSecs: 120},
|
||||
} {
|
||||
if err := s.AddRun(r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
ids, err := s.AddReports([]ReportIn{
|
||||
{Repo: "steve/x", PR: 1, Lens: "security", File: "a.go", Line: 1, Title: "shared", Model: "fable", Provider: "p", RunID: "f-1"},
|
||||
{Repo: "steve/x", PR: 1, Lens: "security", File: "a.go", Line: 1, Title: "shared", Model: "veteran", Provider: "p", RunID: "v-1"},
|
||||
{Repo: "steve/x", PR: 2, Lens: "security", File: "b.go", Line: 2, Title: "pr2 only", Model: "veteran", Provider: "p", RunID: "v-2"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high"}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := s.AddGrade(Grade{FindingID: ids[2], IsReal: true, Severity: "critical"}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
board, err := s.Scoreboard(ScoreboardFilter{Repo: "steve/x", PRs: []int{1}})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
byModel := map[string]ModelStat{}
|
||||
for _, m := range board {
|
||||
byModel[m.Model] = m
|
||||
}
|
||||
v := byModel["veteran"]
|
||||
if v.Runs != 1 || v.Minutes != 2 {
|
||||
t.Errorf("veteran runs=%d minutes=%v, want 1 run / 2 min (PR 2 and steve/y excluded)", v.Runs, v.Minutes)
|
||||
}
|
||||
if v.Findings != 1 || v.Confirmed != 1 || v.BySeverity["critical"] != 0 || v.BySeverity["high"] != 1 {
|
||||
t.Errorf("veteran findings=%d confirmed=%d by_severity=%v, want only the PR-1 finding", v.Findings, v.Confirmed, v.BySeverity)
|
||||
}
|
||||
fbl := byModel["fable"]
|
||||
if fbl.Runs != 1 || fbl.Findings != 1 || fbl.Confirmed != 1 {
|
||||
t.Errorf("fable runs=%d findings=%d confirmed=%d, want 1/1/1", fbl.Runs, fbl.Findings, fbl.Confirmed)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFindingIDLocationKeyed: id depends on location, not wording; line matters.
|
||||
func TestFindingIDLocationKeyed(t *testing.T) {
|
||||
a := findingID("r", 1, "security", "a.go", 10)
|
||||
|
||||
@@ -22,6 +22,18 @@
|
||||
input[type=number] { width:64px; }
|
||||
input[type=date] { width:140px; }
|
||||
input.search { width:220px; }
|
||||
#modalback { position:fixed; inset:0; background:rgba(0,0,0,.55); display:none; z-index:40; }
|
||||
#modal { position:fixed; top:12vh; left:50%; transform:translateX(-50%); width:min(560px,92vw); max-height:72vh;
|
||||
background:var(--panel); border:1px solid var(--line); border-radius:10px; display:none; flex-direction:column;
|
||||
z-index:41; box-shadow:0 12px 40px rgba(0,0,0,.5); }
|
||||
#modal .mhead { display:flex; gap:8px; align-items:center; padding:10px 12px; border-bottom:1px solid var(--line); }
|
||||
#modal .mhead b { margin-right:auto; white-space:nowrap; }
|
||||
#modal .mhead input { flex:1; min-width:80px; width:auto; }
|
||||
#mlist { flex:1 1 auto; min-height:0; overflow:auto; padding:6px 0; }
|
||||
#mlist label.item { display:flex; gap:10px; align-items:center; padding:6px 14px; cursor:pointer; font-size:13px; }
|
||||
#mlist label.item:hover { background:#1d212b; }
|
||||
#mlist .note { margin-left:auto; color:var(--mut); font-size:12px; }
|
||||
#modal .mfoot { padding:8px 12px; border-top:1px solid var(--line); color:var(--mut); font-size:12px; }
|
||||
button { cursor:pointer; }
|
||||
button.primary { background:var(--acc); color:#0c0e12; border-color:var(--acc); font-weight:600; }
|
||||
button.link { background:none; border:none; color:var(--acc); padding:0; text-decoration:underline; }
|
||||
@@ -64,6 +76,8 @@
|
||||
<div class="f"><label>from</label><input type="date" id="from"></div>
|
||||
<div class="f"><label>to</label><input type="date" id="to"></div>
|
||||
<div class="f"><label>repo</label><select id="repo"></select></div>
|
||||
<div class="f"><label>PRs</label><button id="prbtn" onclick="openModal('pr')" title="Exclude PRs from the comparison (persists in this browser). New PRs are included automatically as they arrive.">all PRs ▾</button></div>
|
||||
<div class="f"><label>models shown</label><button id="modelsbtn" onclick="openModal('models')" title="Hide models from the scoreboard entirely (e.g. retired ones). Persists in this browser.">all ▾</button></div>
|
||||
<div class="f"><label>provider</label><select id="provider"></select></div>
|
||||
<div class="f"><label>model</label><select id="model"></select></div>
|
||||
<div class="f"><label>lens</label><select id="lens"></select></div>
|
||||
@@ -109,12 +123,126 @@
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<!-- shared picker modal: PR scope + model visibility (outside <main> so its
|
||||
search box doesn't trigger the global re-render listener) -->
|
||||
<div id="modalback" onclick="closeModal()"></div>
|
||||
<div id="modal">
|
||||
<div class="mhead">
|
||||
<b id="mtitle"></b>
|
||||
<input id="msearch" placeholder="filter…" oninput="fillModal()">
|
||||
<button onclick="modalSetAll(true)">all</button>
|
||||
<button onclick="modalSetAll(false)">none</button>
|
||||
<button class="primary" onclick="closeModal()">done</button>
|
||||
</div>
|
||||
<div id="mlist"></div>
|
||||
<div class="mfoot" id="mfoot"></div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const SEVS = ["trivial","small","medium","high","critical"];
|
||||
const SEVCOLOR = { trivial:"#3b4252", small:"#2e4d3a", medium:"#4d4a2e", high:"#5a3b2e", critical:"#5a2e3a" };
|
||||
let RUNS = [], ROWS = [];
|
||||
let sortKey = "ptsPerMin", sortAsc = false, selModel = null;
|
||||
|
||||
// Persistently-excluded models (e.g. retired ones like m1). Hidden from the
|
||||
// scoreboard, totals, and drill-down; persisted in localStorage across reloads.
|
||||
// Solo-ness is still computed against ALL models (hiding is a view filter, not a
|
||||
// rescoring), so hiding one model never fakes another's solo finds.
|
||||
function loadHidden(){ try { return new Set(JSON.parse(localStorage.getItem("grt-hidden") || "[]")); } catch { return new Set(); } }
|
||||
let HIDDEN = loadHidden();
|
||||
function saveHidden(){ localStorage.setItem("grt-hidden", JSON.stringify([...HIDDEN].sort())); }
|
||||
|
||||
// PRs excluded from the comparison (repo#pr keys), persisted like HIDDEN.
|
||||
// Exclusion (not opt-in) so new PRs count automatically as they arrive.
|
||||
function loadXPRs(){ try { return new Set(JSON.parse(localStorage.getItem("grt-xprs") || "[]")); } catch { return new Set(); } }
|
||||
let EXPRS = loadXPRs();
|
||||
function saveXPRs(){ localStorage.setItem("grt-xprs", JSON.stringify([...EXPRS].sort())); }
|
||||
// excluded PRs actually present in the current data (EXPRS may hold stale keys)
|
||||
function excludedCount(){
|
||||
const seen = new Set();
|
||||
for (const r of [...RUNS, ...ROWS]){ const k = prKey(r); if (EXPRS.has(k)) seen.add(k); }
|
||||
return seen.size;
|
||||
}
|
||||
|
||||
// ---- picker modal (shared by the PR excluder and the model hider) ----
|
||||
let modalKind = null;
|
||||
function openModal(kind){
|
||||
modalKind = kind;
|
||||
document.getElementById("mtitle").textContent = kind==="pr" ? "PRs compared" : "models shown";
|
||||
document.getElementById("msearch").value = "";
|
||||
document.getElementById("modalback").style.display = "block";
|
||||
document.getElementById("modal").style.display = "flex";
|
||||
fillModal();
|
||||
document.getElementById("msearch").focus();
|
||||
}
|
||||
function closeModal(){
|
||||
modalKind = null;
|
||||
document.getElementById("modalback").style.display = "none";
|
||||
document.getElementById("modal").style.display = "none";
|
||||
}
|
||||
// The full item list, rebuilt from the raw data on every open/refresh so it
|
||||
// always has every PR / model. checked = counted; untick to exclude.
|
||||
function modalItems(){
|
||||
const allModels = uniq([...RUNS.map(r=>r.model), ...ROWS.map(r=>r.model)]);
|
||||
if (modalKind === "pr"){
|
||||
const byPR = new Map(), last = new Map();
|
||||
for (const r of [...RUNS, ...ROWS]){
|
||||
const k = prKey(r);
|
||||
if (!byPR.has(k)) byPR.set(k, new Set());
|
||||
if (r.model) byPR.get(k).add(r.model);
|
||||
const t = r.created_at || r.reported_at || "";
|
||||
if (t > (last.get(k) || "")) last.set(k, t);
|
||||
}
|
||||
// reverse chronological: most recently reviewed PR first
|
||||
return [...byPR.keys()].sort((a,b)=> (last.get(b)||"").localeCompare(last.get(a)||""))
|
||||
.map(k => ({ value:k, label:k,
|
||||
note:`${byPR.get(k).size}/${allModels.length} models · ${(last.get(k)||"").slice(0,10)}`,
|
||||
checked:!EXPRS.has(k) }));
|
||||
}
|
||||
return allModels.map(m => ({ value:m, label:m, note:HIDDEN.has(m)?"hidden":"", checked:!HIDDEN.has(m) }));
|
||||
}
|
||||
function visibleModalItems(){
|
||||
const q = document.getElementById("msearch").value.trim().toLowerCase();
|
||||
return modalItems().filter(it => !q || it.label.toLowerCase().includes(q));
|
||||
}
|
||||
function fillModal(){
|
||||
const items = modalItems(), vis = visibleModalItems();
|
||||
const list = document.getElementById("mlist"); list.innerHTML = "";
|
||||
for (const it of vis){
|
||||
const lab = document.createElement("label"); lab.className = "item";
|
||||
const cb = document.createElement("input"); cb.type = "checkbox"; cb.checked = it.checked;
|
||||
cb.onchange = ()=> modalToggle(it.value, cb.checked);
|
||||
const name = document.createElement("span"); name.textContent = it.label;
|
||||
const note = document.createElement("span"); note.className = "note"; note.textContent = it.note;
|
||||
lab.append(cb, name, note);
|
||||
list.appendChild(lab);
|
||||
}
|
||||
const off = items.filter(it=>!it.checked).length;
|
||||
const counts = vis.length===items.length ? `all ${items.length}` : `${vis.length} of ${items.length}`;
|
||||
document.getElementById("mfoot").textContent = modalKind==="pr"
|
||||
? `showing ${counts} PRs · ` + (off ? `${off} excluded — stays excluded as new PRs arrive (persists in this browser)` : "none excluded — every PR counts, new ones included automatically")
|
||||
: `showing ${counts} models · ` + (off ? `${off} hidden — excluded from the scoreboard (persists in this browser)` : "all models shown");
|
||||
}
|
||||
function modalToggle(v, on){
|
||||
if (modalKind === "pr"){ if (on) EXPRS.delete(v); else EXPRS.add(v); saveXPRs(); }
|
||||
else {
|
||||
if (on) HIDDEN.delete(v); else { HIDDEN.add(v); if (selModel===v) selModel=null; }
|
||||
saveHidden();
|
||||
}
|
||||
fillModal(); render();
|
||||
}
|
||||
// all/none apply to the search-filtered items, so you can e.g. type a repo
|
||||
// name and exclude/restore all its PRs at once.
|
||||
function modalSetAll(on){
|
||||
for (const it of visibleModalItems()){
|
||||
if (modalKind === "pr"){ if (on) EXPRS.delete(it.value); else EXPRS.add(it.value); }
|
||||
else if (on) HIDDEN.delete(it.value);
|
||||
else { HIDDEN.add(it.value); if (selModel===it.value) selModel=null; }
|
||||
}
|
||||
if (modalKind === "pr") saveXPRs(); else saveHidden();
|
||||
fillModal(); render();
|
||||
}
|
||||
|
||||
function token(){
|
||||
const q = new URL(location.href).searchParams.get("token");
|
||||
if (q) { localStorage.setItem("grt", q); return q; }
|
||||
@@ -155,6 +283,7 @@ function opt(sel, vals, label){
|
||||
for (const v of vals){ const o = document.createElement("option"); o.value = v; o.textContent = v; sel.appendChild(o); }
|
||||
if (vals.includes(cur)) sel.value = cur;
|
||||
}
|
||||
function prKey(o){ return o.repo + "#" + o.pr; }
|
||||
function buildFacets(){
|
||||
opt(document.getElementById("repo"), uniq([...RUNS.map(r=>r.repo), ...ROWS.map(r=>r.repo)]), "all repos");
|
||||
opt(document.getElementById("provider"), uniq([...RUNS.map(r=>r.provider), ...ROWS.map(r=>r.provider)]), "all providers");
|
||||
@@ -193,10 +322,13 @@ function filters(){
|
||||
};
|
||||
}
|
||||
function dateOK(ts, f){ const d = (ts||"").slice(0,10); return (!f.from || d >= f.from) && (!f.to || d <= f.to); }
|
||||
// run-level filters only (date/repo/provider/model) — severity/lens/search are finding-level.
|
||||
// prOK drops runs/rows from excluded PRs; everything else (including PRs that
|
||||
// arrive after the exclusions were set) counts.
|
||||
function prOK(o){ return !EXPRS.has(prKey(o)); }
|
||||
// run-level filters only (date/repo/provider/model/pr) — severity/lens/search are finding-level.
|
||||
function runMatch(r, f){
|
||||
return dateOK(r.created_at, f) && (!f.repo || r.repo===f.repo) &&
|
||||
(!f.provider || r.provider===f.provider) && (!f.model || r.model===f.model);
|
||||
(!f.provider || r.provider===f.provider) && (!f.model || r.model===f.model) && prOK(r);
|
||||
}
|
||||
function gradeMatch(row, g){
|
||||
if (!g) return true;
|
||||
@@ -213,6 +345,7 @@ function rowMatch(row, f){
|
||||
if (f.lens && row.lens!==f.lens) return false;
|
||||
if (!gradeMatch(row, f.grade)) return false;
|
||||
if (f.q && !((row.title||"")+" "+(row.file||"")+" "+(row.repo||"")).toLowerCase().includes(f.q)) return false;
|
||||
if (!prOK(row)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -229,7 +362,7 @@ function aggregate(f){
|
||||
for (const r of RUNS){ if(!runMatch(r,f)) continue; const m=get(r.model); m.runs++; m.minutes += (r.duration_secs||0)/60;
|
||||
m.inTok += r.input_tokens||0; m.outTok += r.output_tokens||0; if(r.provider) m.provider=r.provider; }
|
||||
|
||||
const rows = ROWS.filter(r => rowMatch(r, f));
|
||||
const rows = ROWS.filter(r => rowMatch(r, f) && !HIDDEN.has(r.model));
|
||||
for (const r of rows){ const m=get(r.model); if(r.provider) m.provider=m.provider||r.provider;
|
||||
m.findings.add(r.finding_id);
|
||||
if (r.graded && r.is_real === true){ m.confirmed.set(r.finding_id, r.severity || ""); }
|
||||
@@ -256,7 +389,7 @@ function aggregate(f){
|
||||
ptsPerMin: m.minutes>0 ? points/m.minutes : null,
|
||||
ptsPerRun: m.runs>0 ? points/m.runs : null,
|
||||
confirmedPct: findings>0 ? confirmed/findings*100 : null };
|
||||
}).filter(m => m.runs>0 || m.findings>0);
|
||||
}).filter(m => (m.runs>0 || m.findings>0) && !HIDDEN.has(m.model));
|
||||
return { models: out, rows };
|
||||
}
|
||||
|
||||
@@ -306,9 +439,18 @@ function render(){
|
||||
}
|
||||
mb.appendChild(tr);
|
||||
}
|
||||
// picker buttons reflect current scope
|
||||
const exN = excludedCount();
|
||||
document.getElementById("prbtn").textContent =
|
||||
exN ? `${exN} PR${exN===1?"":"s"} excluded ▾` : "all PRs ▾";
|
||||
document.getElementById("modelsbtn").textContent =
|
||||
HIDDEN.size ? `${HIDDEN.size} hidden ▾` : "all ▾";
|
||||
|
||||
const tot = models.reduce((a,m)=>({runs:a.runs+m.runs, min:a.min+m.minutes, find:a.find+m.findings, conf:a.conf+m.confirmed, pts:a.pts+m.points}), {runs:0,min:0,find:0,conf:0,pts:0});
|
||||
const prNote = exN ? ` · <b>${exN} PR${exN===1?"":"s"} excluded</b>` : "";
|
||||
document.getElementById("summary").innerHTML =
|
||||
`${models.length} models · ${tot.runs} runs · ${tot.min.toFixed(0)} min · ${tot.find} findings · ${tot.conf} confirmed · ${tot.pts.toFixed(0)} pts` +
|
||||
prNote +
|
||||
(selModel ? ` · <b>scoped to ${selModel}</b> <span class="pill" onclick="event.stopPropagation();selModel=null;render()">clear</span>` : "");
|
||||
|
||||
// detail
|
||||
@@ -334,12 +476,14 @@ function esc(s){ return (s==null?"":String(s)).replace(/[&<>]/g, m=>({"&":"&
|
||||
function resetFilters(){
|
||||
for (const id of ["from","to","q"]) document.getElementById(id).value="";
|
||||
for (const id of ["repo","provider","model","lens","grade"]) document.getElementById(id).value="";
|
||||
// excluded PRs and hidden models are persistent preferences, not filters — reset leaves them
|
||||
selModel = null; render();
|
||||
}
|
||||
|
||||
document.addEventListener("input", e=>{
|
||||
if (e.target.closest("main")) render();
|
||||
});
|
||||
document.addEventListener("keydown", e=>{ if (e.key === "Escape") closeModal(); });
|
||||
load();
|
||||
</script>
|
||||
</body>
|
||||
|
||||
Reference in New Issue
Block a user