package main import ( "path/filepath" "testing" ) func testStore(t *testing.T) *Store { t.Helper() s, err := Open(filepath.Join(t.TempDir(), "gadfly-reports.db")) if err != nil { t.Fatalf("open: %v", err) } t.Cleanup(func() { s.Close() }) return s } func i64(v int64) *int64 { return &v } func intp(v int) *int { return &v } // TestConsensusAndGrade: two models reporting the SAME location collapse to one // finding with two reports; a single grade applies to both models' scoreboards. func TestConsensusAndGrade(t *testing.T) { s := testStore(t) if err := s.AddRun(Run{RunID: "r-cloud", Repo: "steve/x", PR: 2, Model: "minimax", Provider: "ollama-cloud", Lenses: 3, DurationSecs: 300, InputTokens: i64(1000), OutputTokens: i64(500)}); err != nil { t.Fatal(err) } if err := s.AddRun(Run{RunID: "r-m1", Repo: "steve/x", PR: 2, Model: "qwen3", Provider: "m1", Lenses: 3, DurationSecs: 1740}); err != nil { t.Fatal(err) } // Both models flag the same file:line under the same lens. ids, err := s.AddReports([]ReportIn{ {Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "SetIteration never called", Model: "minimax", Provider: "ollama-cloud", RunID: "r-cloud", RawSeverity: "Blocking"}, {Repo: "steve/x", PR: 2, Lens: "correctness", File: "run/executor.go", Line: 166, Title: "iteration counter dead", Model: "qwen3", Provider: "m1", RunID: "r-m1", RawSeverity: "Blocking"}, }) if err != nil { t.Fatal(err) } if ids[0] != ids[1] { t.Fatalf("same location should collapse to one finding id, got %q and %q", ids[0], ids[1]) } if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high", Usefulness: intp(4), Grader: "claude"}); err != nil { t.Fatal(err) } board, err := s.Scoreboard(ScoreboardFilter{}) if err != nil { t.Fatal(err) } byModel := map[string]ModelStat{} for _, m := range board { byModel[m.Model] = m } for _, name := range []string{"minimax", "qwen3"} { m := byModel[name] if m.Findings != 1 || m.Confirmed != 1 || m.BySeverity["high"] != 1 { t.Errorf("%s: findings=%d confirmed=%d high=%d, want 1/1/1", name, m.Findings, m.Confirmed, m.BySeverity["high"]) } } if got := byModel["minimax"].Minutes; got != 5 { t.Errorf("minimax minutes = %v, want 5", got) } if got := byModel["qwen3"].Minutes; got != 29 { t.Errorf("qwen3 minutes = %v, want 29", got) } if got := byModel["minimax"].InputTokens; got != 1000 { t.Errorf("minimax input_tokens = %d, want 1000", got) } } // TestLatestGradeWins: a re-grade supersedes the prior one everywhere. func TestLatestGradeWins(t *testing.T) { s := testStore(t) if err := s.AddRun(Run{RunID: "r1", Repo: "r", PR: 1, Model: "m", Provider: "p", DurationSecs: 60}); err != nil { t.Fatal(err) } ids, err := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "security", File: "a.go", Line: 5, Title: "x", Model: "m", Provider: "p", RunID: "r1"}}) if err != nil { t.Fatal(err) } id := ids[0] if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "critical"}); err != nil { t.Fatal(err) } if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { // re-graded as a false positive t.Fatal(err) } board, _ := s.Scoreboard(ScoreboardFilter{}) m := board[0] if m.Confirmed != 0 || m.FalsePositive != 1 || m.BySeverity["critical"] != 0 { t.Errorf("after re-grade: confirmed=%d fp=%d critical=%d, want 0/1/0", m.Confirmed, m.FalsePositive, m.BySeverity["critical"]) } } // TestGradeValidation rejects bad severity / usefulness / unknown finding. func TestGradeValidation(t *testing.T) { s := testStore(t) ids, _ := s.AddReports([]ReportIn{{Repo: "r", PR: 1, Lens: "perf", File: "a.go", Line: 1, Title: "t", Model: "m", Provider: "p", RunID: "r1"}}) id := ids[0] if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "huge"}); err == nil { t.Error("expected error for invalid severity") } if err := s.AddGrade(Grade{FindingID: id, IsReal: true, Severity: "high", Usefulness: intp(9)}); err == nil { t.Error("expected error for out-of-range usefulness") } if err := s.AddGrade(Grade{FindingID: "nope", IsReal: true, Severity: "high"}); err == nil { t.Error("expected error for unknown finding") } // A false positive needs no severity. if err := s.AddGrade(Grade{FindingID: id, IsReal: false}); err != nil { t.Errorf("false positive without severity should be valid: %v", err) } } // TestScoreboardFilter: narrowing to repo/PRs drops runs and findings outside // the selection, so a model with many extra runs is compared on the same work. func TestScoreboardFilter(t *testing.T) { s := testStore(t) // fable reviewed only PR 1; veteran reviewed PRs 1 and 2 (and another repo). for _, r := range []Run{ {RunID: "f-1", Repo: "steve/x", PR: 1, Model: "fable", Provider: "p", DurationSecs: 60}, {RunID: "v-1", Repo: "steve/x", PR: 1, Model: "veteran", Provider: "p", DurationSecs: 120}, {RunID: "v-2", Repo: "steve/x", PR: 2, Model: "veteran", Provider: "p", DurationSecs: 120}, {RunID: "v-3", Repo: "steve/y", PR: 1, Model: "veteran", Provider: "p", DurationSecs: 120}, } { if err := s.AddRun(r); err != nil { t.Fatal(err) } } ids, err := s.AddReports([]ReportIn{ {Repo: "steve/x", PR: 1, Lens: "security", File: "a.go", Line: 1, Title: "shared", Model: "fable", Provider: "p", RunID: "f-1"}, {Repo: "steve/x", PR: 1, Lens: "security", File: "a.go", Line: 1, Title: "shared", Model: "veteran", Provider: "p", RunID: "v-1"}, {Repo: "steve/x", PR: 2, Lens: "security", File: "b.go", Line: 2, Title: "pr2 only", Model: "veteran", Provider: "p", RunID: "v-2"}, }) if err != nil { t.Fatal(err) } if err := s.AddGrade(Grade{FindingID: ids[0], IsReal: true, Severity: "high"}); err != nil { t.Fatal(err) } if err := s.AddGrade(Grade{FindingID: ids[2], IsReal: true, Severity: "critical"}); err != nil { t.Fatal(err) } board, err := s.Scoreboard(ScoreboardFilter{Repo: "steve/x", PRs: []int{1}}) if err != nil { t.Fatal(err) } byModel := map[string]ModelStat{} for _, m := range board { byModel[m.Model] = m } v := byModel["veteran"] if v.Runs != 1 || v.Minutes != 2 { t.Errorf("veteran runs=%d minutes=%v, want 1 run / 2 min (PR 2 and steve/y excluded)", v.Runs, v.Minutes) } if v.Findings != 1 || v.Confirmed != 1 || v.BySeverity["critical"] != 0 || v.BySeverity["high"] != 1 { t.Errorf("veteran findings=%d confirmed=%d by_severity=%v, want only the PR-1 finding", v.Findings, v.Confirmed, v.BySeverity) } fbl := byModel["fable"] if fbl.Runs != 1 || fbl.Findings != 1 || fbl.Confirmed != 1 { t.Errorf("fable runs=%d findings=%d confirmed=%d, want 1/1/1", fbl.Runs, fbl.Findings, fbl.Confirmed) } } // TestFindingIDLocationKeyed: id depends on location, not wording; line matters. func TestFindingIDLocationKeyed(t *testing.T) { a := findingID("r", 1, "security", "a.go", 10) sameWordingDiff := findingID("r", 1, "security", "a.go", 10) // any title — id ignores it if a != sameWordingDiff { t.Error("same location must yield same id regardless of wording") } if a == findingID("r", 1, "security", "a.go", 11) { t.Error("different line must yield different id") } if a == findingID("r", 1, "correctness", "a.go", 10) { t.Error("different lens must yield different id") } }