f92e54e3ed
CI / test (push) Successful in 10m10s
Thin, stateless stdio MCP server (official Go SDK) that exposes a gadfly-reports store to an MCP client (e.g. Claude). Tools: list_findings, record_finding_grade, scoreboard (grader forced to claude). Launch via 'go run ...@latest' — nothing to install. Core logic tested against httptest, no daemon required. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
175 lines
6.2 KiB
Go
175 lines
6.2 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
)
|
|
|
|
func intp(i int) *int { return &i }
|
|
func boolp(b bool) *bool { return &b }
|
|
|
|
// sample /export rows: finding f1 reported by two models (one ungraded? no,
|
|
// graded real), f2 reported once and graded false-positive, f3 ungraded, plus a
|
|
// row for a different repo/pr to exercise filtering.
|
|
func sampleRows() []exportRow {
|
|
return []exportRow{
|
|
{FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"},
|
|
{FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "qwen2.5-coder:7b", Provider: "ollama", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"},
|
|
{FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"}, // dup model -> deduped
|
|
{FindingID: "f2", Repo: "acme/widget", PR: 7, Lens: "correctness", File: "b.go", Line: 22, Title: "off by one", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(false), Grader: "claude"},
|
|
{FindingID: "f3", Repo: "acme/widget", PR: 7, Lens: "performance", File: "c.go", Line: 3, Title: "n+1 query", Model: "qwen2.5-coder:7b", Provider: "ollama", Graded: false},
|
|
{FindingID: "f4", Repo: "other/repo", PR: 99, Lens: "docs", File: "d.go", Line: 1, Title: "typo", Model: "gpt-4o", Provider: "openai", Graded: false},
|
|
}
|
|
}
|
|
|
|
func TestGroupFindings_GroupingAndDedup(t *testing.T) {
|
|
got := groupFindings(sampleRows(), "", nil, false)
|
|
if len(got) != 4 {
|
|
t.Fatalf("want 4 grouped findings, got %d", len(got))
|
|
}
|
|
var f1 *findingOut
|
|
for i := range got {
|
|
if got[i].FindingID == "f1" {
|
|
f1 = &got[i]
|
|
}
|
|
}
|
|
if f1 == nil {
|
|
t.Fatal("f1 missing")
|
|
}
|
|
if len(f1.Models) != 2 {
|
|
t.Fatalf("f1 should have 2 distinct models, got %v", f1.Models)
|
|
}
|
|
if f1.Models[0] != "gpt-4o" || f1.Models[1] != "qwen2.5-coder:7b" {
|
|
t.Fatalf("f1 model order/dedup wrong: %v", f1.Models)
|
|
}
|
|
if !f1.Graded || f1.IsReal == nil || !*f1.IsReal || f1.Severity != "high" {
|
|
t.Fatalf("f1 grade not propagated: %+v", f1)
|
|
}
|
|
}
|
|
|
|
func TestGroupFindings_FilterRepoAndPR(t *testing.T) {
|
|
got := groupFindings(sampleRows(), "other/repo", nil, false)
|
|
if len(got) != 1 || got[0].FindingID != "f4" {
|
|
t.Fatalf("repo filter failed: %+v", got)
|
|
}
|
|
|
|
pr := 99
|
|
got = groupFindings(sampleRows(), "", &pr, false)
|
|
if len(got) != 1 || got[0].FindingID != "f4" {
|
|
t.Fatalf("pr filter failed: %+v", got)
|
|
}
|
|
|
|
pr = 7
|
|
got = groupFindings(sampleRows(), "acme/widget", &pr, false)
|
|
if len(got) != 3 {
|
|
t.Fatalf("combined repo+pr filter want 3, got %d", len(got))
|
|
}
|
|
}
|
|
|
|
func TestGroupFindings_OnlyUngraded(t *testing.T) {
|
|
got := groupFindings(sampleRows(), "", nil, true)
|
|
ids := map[string]bool{}
|
|
for _, f := range got {
|
|
ids[f.FindingID] = true
|
|
if f.Graded {
|
|
t.Fatalf("only_ungraded returned a graded finding: %s", f.FindingID)
|
|
}
|
|
}
|
|
if !ids["f3"] || !ids["f4"] || ids["f1"] || ids["f2"] {
|
|
t.Fatalf("only_ungraded set wrong: %v", ids)
|
|
}
|
|
}
|
|
|
|
func TestListFindings_EmptyModelsIsArray(t *testing.T) {
|
|
// A row with no model should still produce models:[] (not null) in JSON.
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
_ = json.NewEncoder(w).Encode([]exportRow{{FindingID: "x", Repo: "r", PR: 1, Lens: "l", Title: "t"}})
|
|
}))
|
|
defer srv.Close()
|
|
|
|
out, err := listFindings(context.Background(), newClient(srv.URL, ""), "", nil, false)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
var parsed []findingOut
|
|
if err := json.Unmarshal([]byte(out), &parsed); err != nil {
|
|
t.Fatalf("output not valid JSON: %v\n%s", err, out)
|
|
}
|
|
if len(parsed) != 1 || parsed[0].Models == nil {
|
|
t.Fatalf("expected models:[] non-nil, got %+v", parsed)
|
|
}
|
|
}
|
|
|
|
func TestRecordGrade_PathBodyAndAuth(t *testing.T) {
|
|
var gotPath, gotAuth string
|
|
var gotBody gradeReq
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
gotPath = r.URL.Path
|
|
gotAuth = r.Header.Get("Authorization")
|
|
b, _ := io.ReadAll(r.Body)
|
|
_ = json.Unmarshal(b, &gotBody)
|
|
_ = json.NewEncoder(w).Encode(map[string]string{"finding_id": "abc123"})
|
|
}))
|
|
defer srv.Close()
|
|
|
|
c := newClient(srv.URL, "sekret")
|
|
msg, err := recordGrade(context.Background(), c, "abc123", gradeReq{IsReal: true, Severity: "high", Usefulness: intp(4)})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if gotPath != "/findings/abc123/grade" {
|
|
t.Fatalf("wrong path: %s", gotPath)
|
|
}
|
|
if gotAuth != "Bearer sekret" {
|
|
t.Fatalf("auth header not sent: %q", gotAuth)
|
|
}
|
|
if gotBody.Grader != "claude" {
|
|
t.Fatalf("grader should be forced to claude, got %q", gotBody.Grader)
|
|
}
|
|
if !gotBody.IsReal || gotBody.Severity != "high" || gotBody.Usefulness == nil || *gotBody.Usefulness != 4 {
|
|
t.Fatalf("body not forwarded correctly: %+v", gotBody)
|
|
}
|
|
if msg == "" {
|
|
t.Fatal("expected a confirmation message")
|
|
}
|
|
}
|
|
|
|
func TestRecordGrade_StoreErrorSurfaced(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
_ = json.NewEncoder(w).Encode(map[string]string{"error": "unknown finding_id"})
|
|
}))
|
|
defer srv.Close()
|
|
|
|
_, err := recordGrade(context.Background(), newClient(srv.URL, ""), "nope", gradeReq{IsReal: false})
|
|
if err == nil {
|
|
t.Fatal("expected non-2xx to surface as an error")
|
|
}
|
|
}
|
|
|
|
func TestScoreboard_FilterByModel(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
_ = json.NewEncoder(w).Encode([]modelStat{
|
|
{Model: "gpt-4o", Provider: "openai", Runs: 3, Findings: 10},
|
|
{Model: "qwen2.5-coder:7b", Provider: "ollama", Runs: 5, Findings: 4},
|
|
})
|
|
}))
|
|
defer srv.Close()
|
|
|
|
out, err := scoreboard(context.Background(), newClient(srv.URL, ""), "gpt-4o")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
var parsed []modelStat
|
|
if err := json.Unmarshal([]byte(out), &parsed); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(parsed) != 1 || parsed[0].Model != "gpt-4o" {
|
|
t.Fatalf("model filter failed: %+v", parsed)
|
|
}
|
|
}
|