feat: PR filter — compare models on the same set of PRs
UI: a repo#pr multi-select (labeled with how many models ran each PR) scopes the whole table — runs, minutes, findings, points — to the chosen PRs, so a model with 2 runs can be fairly compared against one with 60. API: GET /scoreboard accepts ?repo= and ?pr= (repeatable or comma-list). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -379,9 +379,42 @@ type ModelStat struct {
|
||||
BySeverity map[string]int `json:"by_severity"` // confirmed findings per severity
|
||||
}
|
||||
|
||||
// ScoreboardFilter narrows the scoreboard to a repo and/or a set of PRs, so
|
||||
// models with very different run counts can be compared on the exact same work
|
||||
// (e.g. only the PRs every model reviewed). Zero value = no filtering.
|
||||
type ScoreboardFilter struct {
|
||||
Repo string
|
||||
PRs []int
|
||||
}
|
||||
|
||||
// conds returns SQL conditions (and their args) against the given repo/pr
|
||||
// column names — runs carry repo/pr directly, reports get them via findings.
|
||||
func (f ScoreboardFilter) conds(repoCol, prCol string) ([]string, []any) {
|
||||
var conds []string
|
||||
var args []any
|
||||
if f.Repo != "" {
|
||||
conds = append(conds, repoCol+" = ?")
|
||||
args = append(args, f.Repo)
|
||||
}
|
||||
if len(f.PRs) > 0 {
|
||||
conds = append(conds, prCol+" IN (?"+strings.Repeat(",?", len(f.PRs)-1)+")")
|
||||
for _, p := range f.PRs {
|
||||
args = append(args, p)
|
||||
}
|
||||
}
|
||||
return conds, args
|
||||
}
|
||||
|
||||
func whereClause(conds []string) string {
|
||||
if len(conds) == 0 {
|
||||
return ""
|
||||
}
|
||||
return " WHERE " + strings.Join(conds, " AND ")
|
||||
}
|
||||
|
||||
// Scoreboard rolls runs + reports + latest grades up per model. All counts of
|
||||
// findings are DISTINCT by finding (a model re-reporting across runs counts once).
|
||||
func (s *Store) Scoreboard() ([]ModelStat, error) {
|
||||
func (s *Store) Scoreboard(f ScoreboardFilter) ([]ModelStat, error) {
|
||||
stats := map[string]*ModelStat{}
|
||||
get := func(model, provider string) *ModelStat {
|
||||
m, ok := stats[model]
|
||||
@@ -393,10 +426,11 @@ func (s *Store) Scoreboard() ([]ModelStat, error) {
|
||||
}
|
||||
|
||||
// Runs: minutes + tokens + run counts.
|
||||
runConds, runArgs := f.conds("repo", "pr")
|
||||
rrows, err := s.db.Query(`
|
||||
SELECT model, provider, COUNT(*), COALESCE(SUM(duration_secs),0),
|
||||
COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0)
|
||||
FROM runs GROUP BY model, provider`)
|
||||
FROM runs`+whereClause(runConds)+` GROUP BY model, provider`, runArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -418,14 +452,17 @@ FROM runs GROUP BY model, provider`)
|
||||
rrows.Close()
|
||||
|
||||
// Findings: distinct per model, split by latest-grade state.
|
||||
findConds, findArgs := f.conds("fi.repo", "fi.pr")
|
||||
frows, err := s.db.Query(`
|
||||
SELECT r.model,
|
||||
COUNT(DISTINCT r.finding_id),
|
||||
COUNT(DISTINCT CASE WHEN lg.is_real=1 THEN r.finding_id END),
|
||||
COUNT(DISTINCT CASE WHEN lg.is_real=0 THEN r.finding_id END),
|
||||
COUNT(DISTINCT CASE WHEN lg.is_real IS NULL THEN r.finding_id END)
|
||||
FROM reports r LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id
|
||||
GROUP BY r.model`)
|
||||
FROM reports r
|
||||
JOIN findings fi ON fi.id = r.finding_id
|
||||
LEFT JOIN latest_grades lg ON lg.finding_id = r.finding_id`+whereClause(findConds)+`
|
||||
GROUP BY r.model`, findArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -442,11 +479,14 @@ GROUP BY r.model`)
|
||||
frows.Close()
|
||||
|
||||
// Confirmed-by-severity histogram (distinct findings).
|
||||
sevConds, sevArgs := f.conds("fi.repo", "fi.pr")
|
||||
srows, err := s.db.Query(`
|
||||
SELECT r.model, lg.severity, COUNT(DISTINCT r.finding_id)
|
||||
FROM reports r JOIN latest_grades lg ON lg.finding_id = r.finding_id
|
||||
WHERE lg.is_real=1 AND lg.severity IS NOT NULL
|
||||
GROUP BY r.model, lg.severity`)
|
||||
FROM reports r
|
||||
JOIN findings fi ON fi.id = r.finding_id
|
||||
JOIN latest_grades lg ON lg.finding_id = r.finding_id`+
|
||||
whereClause(append(sevConds, "lg.is_real=1", "lg.severity IS NOT NULL"))+`
|
||||
GROUP BY r.model, lg.severity`, sevArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user