feat: gadfly-mcp — MCP server for grading gadfly-reports findings
CI / test (push) Successful in 10m10s

Thin, stateless stdio MCP server (official Go SDK) that exposes a gadfly-reports store to an MCP client (e.g. Claude). Tools: list_findings, record_finding_grade, scoreboard (grader forced to claude). Launch via 'go run ...@latest' — nothing to install. Core logic tested against httptest, no daemon required.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-26 23:55:24 -04:00
parent bb6bd209b4
commit f92e54e3ed
9 changed files with 730 additions and 28 deletions
+174
View File
@@ -0,0 +1,174 @@
package main
import (
"context"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"testing"
)
func intp(i int) *int { return &i }
func boolp(b bool) *bool { return &b }
// sample /export rows: finding f1 reported by two models (one ungraded? no,
// graded real), f2 reported once and graded false-positive, f3 ungraded, plus a
// row for a different repo/pr to exercise filtering.
func sampleRows() []exportRow {
return []exportRow{
{FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"},
{FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "qwen2.5-coder:7b", Provider: "ollama", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"},
{FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"}, // dup model -> deduped
{FindingID: "f2", Repo: "acme/widget", PR: 7, Lens: "correctness", File: "b.go", Line: 22, Title: "off by one", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(false), Grader: "claude"},
{FindingID: "f3", Repo: "acme/widget", PR: 7, Lens: "performance", File: "c.go", Line: 3, Title: "n+1 query", Model: "qwen2.5-coder:7b", Provider: "ollama", Graded: false},
{FindingID: "f4", Repo: "other/repo", PR: 99, Lens: "docs", File: "d.go", Line: 1, Title: "typo", Model: "gpt-4o", Provider: "openai", Graded: false},
}
}
func TestGroupFindings_GroupingAndDedup(t *testing.T) {
got := groupFindings(sampleRows(), "", nil, false)
if len(got) != 4 {
t.Fatalf("want 4 grouped findings, got %d", len(got))
}
var f1 *findingOut
for i := range got {
if got[i].FindingID == "f1" {
f1 = &got[i]
}
}
if f1 == nil {
t.Fatal("f1 missing")
}
if len(f1.Models) != 2 {
t.Fatalf("f1 should have 2 distinct models, got %v", f1.Models)
}
if f1.Models[0] != "gpt-4o" || f1.Models[1] != "qwen2.5-coder:7b" {
t.Fatalf("f1 model order/dedup wrong: %v", f1.Models)
}
if !f1.Graded || f1.IsReal == nil || !*f1.IsReal || f1.Severity != "high" {
t.Fatalf("f1 grade not propagated: %+v", f1)
}
}
func TestGroupFindings_FilterRepoAndPR(t *testing.T) {
got := groupFindings(sampleRows(), "other/repo", nil, false)
if len(got) != 1 || got[0].FindingID != "f4" {
t.Fatalf("repo filter failed: %+v", got)
}
pr := 99
got = groupFindings(sampleRows(), "", &pr, false)
if len(got) != 1 || got[0].FindingID != "f4" {
t.Fatalf("pr filter failed: %+v", got)
}
pr = 7
got = groupFindings(sampleRows(), "acme/widget", &pr, false)
if len(got) != 3 {
t.Fatalf("combined repo+pr filter want 3, got %d", len(got))
}
}
func TestGroupFindings_OnlyUngraded(t *testing.T) {
got := groupFindings(sampleRows(), "", nil, true)
ids := map[string]bool{}
for _, f := range got {
ids[f.FindingID] = true
if f.Graded {
t.Fatalf("only_ungraded returned a graded finding: %s", f.FindingID)
}
}
if !ids["f3"] || !ids["f4"] || ids["f1"] || ids["f2"] {
t.Fatalf("only_ungraded set wrong: %v", ids)
}
}
func TestListFindings_EmptyModelsIsArray(t *testing.T) {
// A row with no model should still produce models:[] (not null) in JSON.
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
_ = json.NewEncoder(w).Encode([]exportRow{{FindingID: "x", Repo: "r", PR: 1, Lens: "l", Title: "t"}})
}))
defer srv.Close()
out, err := listFindings(context.Background(), newClient(srv.URL, ""), "", nil, false)
if err != nil {
t.Fatal(err)
}
var parsed []findingOut
if err := json.Unmarshal([]byte(out), &parsed); err != nil {
t.Fatalf("output not valid JSON: %v\n%s", err, out)
}
if len(parsed) != 1 || parsed[0].Models == nil {
t.Fatalf("expected models:[] non-nil, got %+v", parsed)
}
}
func TestRecordGrade_PathBodyAndAuth(t *testing.T) {
var gotPath, gotAuth string
var gotBody gradeReq
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
gotAuth = r.Header.Get("Authorization")
b, _ := io.ReadAll(r.Body)
_ = json.Unmarshal(b, &gotBody)
_ = json.NewEncoder(w).Encode(map[string]string{"finding_id": "abc123"})
}))
defer srv.Close()
c := newClient(srv.URL, "sekret")
msg, err := recordGrade(context.Background(), c, "abc123", gradeReq{IsReal: true, Severity: "high", Usefulness: intp(4)})
if err != nil {
t.Fatal(err)
}
if gotPath != "/findings/abc123/grade" {
t.Fatalf("wrong path: %s", gotPath)
}
if gotAuth != "Bearer sekret" {
t.Fatalf("auth header not sent: %q", gotAuth)
}
if gotBody.Grader != "claude" {
t.Fatalf("grader should be forced to claude, got %q", gotBody.Grader)
}
if !gotBody.IsReal || gotBody.Severity != "high" || gotBody.Usefulness == nil || *gotBody.Usefulness != 4 {
t.Fatalf("body not forwarded correctly: %+v", gotBody)
}
if msg == "" {
t.Fatal("expected a confirmation message")
}
}
func TestRecordGrade_StoreErrorSurfaced(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusBadRequest)
_ = json.NewEncoder(w).Encode(map[string]string{"error": "unknown finding_id"})
}))
defer srv.Close()
_, err := recordGrade(context.Background(), newClient(srv.URL, ""), "nope", gradeReq{IsReal: false})
if err == nil {
t.Fatal("expected non-2xx to surface as an error")
}
}
func TestScoreboard_FilterByModel(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
_ = json.NewEncoder(w).Encode([]modelStat{
{Model: "gpt-4o", Provider: "openai", Runs: 3, Findings: 10},
{Model: "qwen2.5-coder:7b", Provider: "ollama", Runs: 5, Findings: 4},
})
}))
defer srv.Close()
out, err := scoreboard(context.Background(), newClient(srv.URL, ""), "gpt-4o")
if err != nil {
t.Fatal(err)
}
var parsed []modelStat
if err := json.Unmarshal([]byte(out), &parsed); err != nil {
t.Fatal(err)
}
if len(parsed) != 1 || parsed[0].Model != "gpt-4o" {
t.Fatalf("model filter failed: %+v", parsed)
}
}