feat: PR filter — compare models on the same set of PRs
Build & push image / build-and-push (push) Successful in 13s
CI / test (push) Successful in 9m51s

UI: a repo#pr multi-select (labeled with how many models ran each PR)
scopes the whole table — runs, minutes, findings, points — to the chosen
PRs, so a model with 2 runs can be fairly compared against one with 60.
API: GET /scoreboard accepts ?repo= and ?pr= (repeatable or comma-list).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-02 22:55:43 -04:00
parent 2f003dd132
commit 1af115fdf1
6 changed files with 202 additions and 19 deletions
+27
View File
@@ -72,6 +72,33 @@ func TestServerEndToEnd(t *testing.T) {
}
}
// TestScoreboardQueryFilter: ?repo= and ?pr= narrow the scoreboard; a bad pr is a 400.
func TestScoreboardQueryFilter(t *testing.T) {
srv := testServer(t, "")
post(t, srv, "", "/runs", Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60})
post(t, srv, "", "/runs", Run{RunID: "r2", Repo: "r", PR: 2, Model: "m", Provider: "p", DurationSecs: 60})
post(t, srv, "", "/runs", Run{RunID: "r3", Repo: "other", PR: 1, Model: "m", Provider: "p", DurationSecs: 60})
resp := mustGet(t, srv, "", "/scoreboard?repo=r&pr=1,2")
var board []ModelStat
json.NewDecoder(resp.Body).Decode(&board)
if len(board) != 1 || board[0].Runs != 2 {
t.Fatalf("filtered scoreboard: %+v, want 2 runs (repo 'other' excluded)", board)
}
resp = mustGet(t, srv, "", "/scoreboard?pr=1")
board = nil
json.NewDecoder(resp.Body).Decode(&board)
if len(board) != 1 || board[0].Runs != 2 {
t.Fatalf("pr-only filter: %+v, want 2 runs (both repos' PR 1)", board)
}
if resp := mustGet(t, srv, "", "/scoreboard?pr=abc"); resp.StatusCode != http.StatusBadRequest {
t.Errorf("GET /scoreboard?pr=abc = %d, want 400", resp.StatusCode)
}
}
// TestServerAuth: a set token gates writes but leaves /healthz open.
func TestServerAuth(t *testing.T) {
srv := testServer(t, "secret")