feat: PR filter — compare models on the same set of PRs
UI: a repo#pr multi-select (labeled with how many models ran each PR) scopes the whole table — runs, minutes, findings, points — to the chosen PRs, so a model with 2 runs can be fairly compared against one with 60. API: GET /scoreboard accepts ?repo= and ?pr= (repeatable or comma-list). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+54
-2
@@ -46,7 +46,7 @@ func TestConsensusAndGrade(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
board, err := s.Scoreboard()
|
||||
board, err := s.Scoreboard(ScoreboardFilter{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -88,7 +88,7 @@ func TestLatestGradeWins(t *testing.T) {
|
||||
if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive
|
||||
t.Fatal(err)
|
||||
}
|
||||
board, _ := s.Scoreboard()
|
||||
board, _ := s.Scoreboard(ScoreboardFilter{})
|
||||
m := board[0]
|
||||
if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 {
|
||||
t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"])
|
||||
@@ -116,6 +116,58 @@ func TestGradeValidation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestScoreboardFilter: narrowing to repo/PRs drops runs and findings outside
|
||||
// the selection, so a model with many extra runs is compared on the same work.
|
||||
func TestScoreboardFilter(t *testing.T) {
|
||||
s := testStore(t)
|
||||
|
||||
// fable reviewed only PR 1; veteran reviewed PRs 1 and 2 (and another repo).
|
||||
for _, r := range []Run{
|
||||
{RunID: "f-1", Repo: "steve/x", PR: 1, Model: "fable", Provider: "p", DurationSecs: 60},
|
||||
{RunID: "v-1", Repo: "steve/x", PR: 1, Model: "veteran", Provider: "p", DurationSecs: 120},
|
||||
{RunID: "v-2", Repo: "steve/x", PR: 2, Model: "veteran", Provider: "p", DurationSecs: 120},
|
||||
{RunID: "v-3", Repo: "steve/y", PR: 1, Model: "veteran", Provider: "p", DurationSecs: 120},
|
||||
} {
|
||||
if err := s.AddRun(r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
ids, err := s.AddReports([]ReportIn{
|
||||
{Repo: "steve/x", PR: 1, Lens: "security", File: "a.go", Line: 1, Title: "shared", Model: "fable", Provider: "p", RunID: "f-1"},
|
||||
{Repo: "steve/x", PR: 1, Lens: "security", File: "a.go", Line: 1, Title: "shared", Model: "veteran", Provider: "p", RunID: "v-1"},
|
||||
{Repo: "steve/x", PR: 2, Lens: "security", File: "b.go", Line: 2, Title: "pr2 only", Model: "veteran", Provider: "p", RunID: "v-2"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high"}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := s.AddGrade(Grade{FindingID: ids[2], IsReal: true, Severity: "critical"}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
board, err := s.Scoreboard(ScoreboardFilter{Repo: "steve/x", PRs: []int{1}})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
byModel := map[string]ModelStat{}
|
||||
for _, m := range board {
|
||||
byModel[m.Model] = m
|
||||
}
|
||||
v := byModel["veteran"]
|
||||
if v.Runs != 1 || v.Minutes != 2 {
|
||||
t.Errorf("veteran runs=%d minutes=%v, want 1 run / 2 min (PR 2 and steve/y excluded)", v.Runs, v.Minutes)
|
||||
}
|
||||
if v.Findings != 1 || v.Confirmed != 1 || v.BySeverity["critical"] != 0 || v.BySeverity["high"] != 1 {
|
||||
t.Errorf("veteran findings=%d confirmed=%d by_severity=%v, want only the PR-1 finding", v.Findings, v.Confirmed, v.BySeverity)
|
||||
}
|
||||
fbl := byModel["fable"]
|
||||
if fbl.Runs != 1 || fbl.Findings != 1 || fbl.Confirmed != 1 {
|
||||
t.Errorf("fable runs=%d findings=%d confirmed=%d, want 1/1/1", fbl.Runs, fbl.Findings, fbl.Confirmed)
|
||||
}
|
||||
}
|
||||
|
||||
// TestFindingIDLocationKeyed: id depends on location, not wording; line matters.
|
||||
func TestFindingIDLocationKeyed(t *testing.T) {
|
||||
a := findingID("r", 1, "security", "a.go", 10)
|
||||
|
||||
Reference in New Issue
Block a user