85f3b2546b
A lens whose verdict is "No material issues found" still tends to write
path:line references — but as verification notes ("verified X at
file:line is safe"), not problems. The telemetry was extracting those as
findings, which (a) pollutes the gadfly-reports store with false
positives and (b) unfairly penalizes thorough reviewers that do clean
passes — the FP penalty hit clean security passes from claude-code/sonnet,
deepseek, and minimax even though they correctly found nothing.
emit() now skips findings extraction for a clean-verdict lens (the run is
still recorded). Surfaced by grading the dogfood reviews: a large share
of "false positives" were exactly these clean-verification bullets.
Added TestEmit_SkipsCleanVerdictLens; README telemetry section updated.
gofmt clean, go vet quiet, go test -race green.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
255 lines
8.7 KiB
Go
255 lines
8.7 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// sampleLensMarkdown is a realistic lens output: a bold verdict lead-in, an ATX
|
|
// "###" heading anchoring one finding, a numbered + bold lead-in anchoring
|
|
// another, plus a duplicate "path:line" reference that must be deduped away.
|
|
const sampleLensMarkdown = "" +
|
|
"**Blocking issues found**\n" +
|
|
"\n" +
|
|
"### Swallowed error\n" +
|
|
"In `run/executor.go:166` the returned error is ignored, so a failed step is\n" +
|
|
"reported as success.\n" +
|
|
"\n" +
|
|
"1. **Missing nil check** — `compact/compactor.go:225` dereferences `cfg`\n" +
|
|
" which can be nil when the caller passes an empty config.\n" +
|
|
"\n" +
|
|
"See again run/executor.go:166 for the same root cause.\n"
|
|
|
|
func TestParseFindings(t *testing.T) {
|
|
got := parseFindings(Specialist{Name: "security"}, sampleLensMarkdown)
|
|
if len(got) != 2 {
|
|
t.Fatalf("parseFindings returned %d findings, want 2 (deduped): %+v", len(got), got)
|
|
}
|
|
|
|
if got[0].file != "run/executor.go" || got[0].line != 166 {
|
|
t.Errorf("finding[0] location = %s:%d, want run/executor.go:166", got[0].file, got[0].line)
|
|
}
|
|
if got[0].title != "Swallowed error" {
|
|
t.Errorf("finding[0] title = %q, want %q (nearest ### heading)", got[0].title, "Swallowed error")
|
|
}
|
|
|
|
if got[1].file != "compact/compactor.go" || got[1].line != 225 {
|
|
t.Errorf("finding[1] location = %s:%d, want compact/compactor.go:225", got[1].file, got[1].line)
|
|
}
|
|
if got[1].title != "Missing nil check" {
|
|
t.Errorf("finding[1] title = %q, want %q (numbered bold lead-in)", got[1].title, "Missing nil check")
|
|
}
|
|
if got[1].detail == "" {
|
|
t.Error("finding[1] detail should carry the paragraph context, got empty")
|
|
}
|
|
}
|
|
|
|
func TestParseFindings_ErroredLensYieldsNone(t *testing.T) {
|
|
notice := "⚠️ This reviewer failed to complete: context deadline exceeded (file.go:10)"
|
|
if f := parseFindings(Specialist{Name: "security"}, notice); len(f) != 0 {
|
|
t.Errorf("a failure-notice lens should yield no findings, got %+v", f)
|
|
}
|
|
if f := parseFindings(Specialist{Name: "security"}, " "); len(f) != 0 {
|
|
t.Errorf("empty output should yield no findings, got %+v", f)
|
|
}
|
|
}
|
|
|
|
func TestParseFindings_FallbackParagraphTitle(t *testing.T) {
|
|
md := "No material issues found.\n\nThe helper in src/util.go:42 returns the wrong code. Investigate.\n"
|
|
got := parseFindings(Specialist{Name: "correctness"}, md)
|
|
if len(got) != 1 {
|
|
t.Fatalf("want 1 finding, got %d: %+v", len(got), got)
|
|
}
|
|
if got[0].title != "The helper in src/util.go:42 returns the wrong code" {
|
|
t.Errorf("fallback title = %q, want first sentence of the paragraph", got[0].title)
|
|
}
|
|
}
|
|
|
|
func TestEmit_PostsRunsAndReports(t *testing.T) {
|
|
var runs, reports int32
|
|
var runBody map[string]any
|
|
var reportBody []map[string]any
|
|
var sawAuth string
|
|
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if a := r.Header.Get("Authorization"); a != "" {
|
|
sawAuth = a
|
|
}
|
|
data, _ := io.ReadAll(r.Body)
|
|
switch r.URL.Path {
|
|
case "/runs":
|
|
atomic.AddInt32(&runs, 1)
|
|
_ = json.Unmarshal(data, &runBody)
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte(`{"run_id":"x"}`))
|
|
case "/reports":
|
|
atomic.AddInt32(&reports, 1)
|
|
_ = json.Unmarshal(data, &reportBody)
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte(`{"finding_ids":[1,2]}`))
|
|
default:
|
|
http.Error(w, "not found", http.StatusNotFound)
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
t.Setenv("GADFLY_FINDINGS_URL", srv.URL)
|
|
t.Setenv("GADFLY_FINDINGS_TOKEN", "secret-token")
|
|
t.Setenv("GADFLY_REPO", "owner/repo")
|
|
t.Setenv("GADFLY_PR", "7")
|
|
t.Setenv("GADFLY_MODEL", "ollama-cloud/qwen3")
|
|
t.Setenv("GADFLY_PROVIDER", "")
|
|
|
|
results := []specialistResult{
|
|
{spec: Specialist{Name: "security", Title: "🔒 Security"}, out: sampleLensMarkdown, verdict: verdictBlocking},
|
|
}
|
|
|
|
emit(results, 3*time.Second)
|
|
|
|
if got := atomic.LoadInt32(&runs); got != 1 {
|
|
t.Fatalf("/runs received %d times, want exactly 1", got)
|
|
}
|
|
if got := atomic.LoadInt32(&reports); got != 1 {
|
|
t.Fatalf("/reports received %d times, want exactly 1", got)
|
|
}
|
|
|
|
if sawAuth != "Bearer secret-token" {
|
|
t.Errorf("Authorization header = %q, want %q", sawAuth, "Bearer secret-token")
|
|
}
|
|
|
|
// --- /runs body: exact field names + values ---
|
|
for _, k := range []string{"run_id", "repo", "pr", "model", "provider", "lenses", "duration_secs", "input_tokens", "output_tokens", "cost_usd"} {
|
|
if _, ok := runBody[k]; !ok {
|
|
t.Errorf("/runs body missing field %q (got keys %v)", k, keysOf(runBody))
|
|
}
|
|
}
|
|
if runBody["run_id"] != "owner/repo#7:ollama-cloud/qwen3" {
|
|
t.Errorf("run_id = %v, want owner/repo#7:ollama-cloud/qwen3", runBody["run_id"])
|
|
}
|
|
if runBody["repo"] != "owner/repo" {
|
|
t.Errorf("repo = %v, want owner/repo", runBody["repo"])
|
|
}
|
|
if runBody["pr"] != float64(7) {
|
|
t.Errorf("pr = %v, want 7", runBody["pr"])
|
|
}
|
|
if runBody["model"] != "ollama-cloud/qwen3" {
|
|
t.Errorf("model = %v, want ollama-cloud/qwen3", runBody["model"])
|
|
}
|
|
if runBody["provider"] != "ollama-cloud" {
|
|
t.Errorf("provider = %v, want ollama-cloud", runBody["provider"])
|
|
}
|
|
if runBody["lenses"] != float64(1) {
|
|
t.Errorf("lenses = %v, want 1", runBody["lenses"])
|
|
}
|
|
if runBody["duration_secs"] != float64(3) {
|
|
t.Errorf("duration_secs = %v, want 3", runBody["duration_secs"])
|
|
}
|
|
if v, ok := runBody["input_tokens"]; !ok || v != nil {
|
|
t.Errorf("input_tokens = %v, want JSON null", v)
|
|
}
|
|
|
|
// --- /reports body: exact field names + values ---
|
|
if len(reportBody) != 2 {
|
|
t.Fatalf("/reports array length = %d, want 2", len(reportBody))
|
|
}
|
|
for _, k := range []string{"repo", "pr", "lens", "file", "line", "title", "model", "provider", "run_id", "raw_severity", "detail"} {
|
|
if _, ok := reportBody[0][k]; !ok {
|
|
t.Errorf("/reports[0] missing field %q (got keys %v)", k, keysOf(reportBody[0]))
|
|
}
|
|
}
|
|
if reportBody[0]["lens"] != "security" {
|
|
t.Errorf("reports[0].lens = %v, want security", reportBody[0]["lens"])
|
|
}
|
|
if reportBody[0]["file"] != "run/executor.go" {
|
|
t.Errorf("reports[0].file = %v, want run/executor.go", reportBody[0]["file"])
|
|
}
|
|
if reportBody[0]["line"] != float64(166) {
|
|
t.Errorf("reports[0].line = %v, want 166", reportBody[0]["line"])
|
|
}
|
|
if reportBody[0]["raw_severity"] != "Blocking issues found" {
|
|
t.Errorf("reports[0].raw_severity = %v, want 'Blocking issues found'", reportBody[0]["raw_severity"])
|
|
}
|
|
if reportBody[0]["run_id"] != "owner/repo#7:ollama-cloud/qwen3" {
|
|
t.Errorf("reports[0].run_id = %v, want owner/repo#7:ollama-cloud/qwen3", reportBody[0]["run_id"])
|
|
}
|
|
}
|
|
|
|
// A clean "No material issues found" lens must NOT emit findings, even though
|
|
// its markdown contains path:line references (those are verification notes, not
|
|
// problems). /runs is still posted; /reports is not.
|
|
func TestEmit_SkipsCleanVerdictLens(t *testing.T) {
|
|
var runs, reports int32
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
switch r.URL.Path {
|
|
case "/runs":
|
|
atomic.AddInt32(&runs, 1)
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte(`{"run_id":"x"}`))
|
|
case "/reports":
|
|
atomic.AddInt32(&reports, 1)
|
|
w.WriteHeader(http.StatusOK)
|
|
default:
|
|
http.Error(w, "not found", http.StatusNotFound)
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
t.Setenv("GADFLY_FINDINGS_URL", srv.URL)
|
|
t.Setenv("GADFLY_REPO", "owner/repo")
|
|
t.Setenv("GADFLY_PR", "7")
|
|
t.Setenv("GADFLY_MODEL", "ollama-cloud/qwen3")
|
|
|
|
// Clean verdict, but the markdown is full of path:line "verified X" notes.
|
|
cleanMarkdown := "No material issues found.\n\nVerified `run/executor.go:166` handles the error.\n"
|
|
results := []specialistResult{
|
|
{spec: Specialist{Name: "security"}, out: cleanMarkdown, verdict: verdictClean},
|
|
}
|
|
emit(results, time.Second)
|
|
|
|
if got := atomic.LoadInt32(&runs); got != 1 {
|
|
t.Fatalf("/runs received %d times, want 1 (the run is still recorded)", got)
|
|
}
|
|
if got := atomic.LoadInt32(&reports); got != 0 {
|
|
t.Fatalf("/reports received %d times, want 0 (clean lens emits no findings)", got)
|
|
}
|
|
}
|
|
|
|
func TestEmit_DisabledMakesNoRequests(t *testing.T) {
|
|
var hits int32
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
atomic.AddInt32(&hits, 1)
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
t.Setenv("GADFLY_FINDINGS_URL", "") // disabled
|
|
t.Setenv("GADFLY_REPO", "owner/repo")
|
|
t.Setenv("GADFLY_PR", "7")
|
|
t.Setenv("GADFLY_MODEL", "ollama-cloud/qwen3")
|
|
|
|
results := []specialistResult{
|
|
{spec: Specialist{Name: "security"}, out: sampleLensMarkdown, verdict: verdictBlocking},
|
|
}
|
|
|
|
// Must not panic and must not hit the server.
|
|
emit(results, time.Second)
|
|
emit(nil, 0)
|
|
|
|
if got := atomic.LoadInt32(&hits); got != 0 {
|
|
t.Fatalf("telemetry disabled but server received %d requests, want 0", got)
|
|
}
|
|
}
|
|
|
|
func keysOf(m map[string]any) []string {
|
|
ks := make([]string, 0, len(m))
|
|
for k := range m {
|
|
ks = append(ks, k)
|
|
}
|
|
return ks
|
|
}
|