feat: gadfly-mcp — MCP server for grading gadfly-reports findings

Thin, stateless stdio MCP server (official Go SDK) that exposes a gadfly-reports store to an MCP client (e.g. Claude). Tools: list_findings, record_finding_grade, scoreboard (grader forced to claude). Launch via 'go run ...@latest' — nothing to install. Core logic tested against httptest, no daemon required. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 23:55:24 -04:00
parent bb6bd209b4
commit f92e54e3ed
9 changed files with 730 additions and 28 deletions
@@ -0,0 +1,362 @@
+// Command gadfly-mcp is a stdio MCP server that exposes a gadfly-reports findings
+// store to an MCP client (e.g. Claude). It is a THIN HTTP client to the gadfly-reports daemon: it
+// never opens the SQLite database directly and does not import the daemon's
+// package, so it mirrors the store's JSON shapes with small local structs.
+//
+// Launch it with:
+//
+//	go run gitea.stevedudenhoeffer.com/steve/gadfly-mcp@latest
+//
+// Configuration:
+//
+//	--store        base URL of the gadfly-reports daemon
+//	               (default $GADFLY_REPORTS_URL, else http://localhost:8090)
+//	$GADFLY_REPORTS_TOKEN  if set, sent as "Authorization: Bearer <token>" on every request
+//
+// Tools: list_findings, record_finding_grade, scoreboard. The grader is always
+// "claude". gadfly-reports stores no points; the scoreboard tool's points are a
+// client-side concern (severity -> points, divided by minutes).
+package main
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/modelcontextprotocol/go-sdk/mcp"
+)
+
+// ---- local mirrors of the store's JSON shapes (see ../../store.go) ----
+
+// exportRow mirrors store.ExportRow: one report joined with its finding, run
+// timing, and latest grade. Many rows can share a finding_id (one per reporting
+// model), which is why list_findings groups them.
+type exportRow struct {
+	FindingID  string `json:"finding_id"`
+	Repo       string `json:"repo"`
+	PR         int    `json:"pr"`
+	Lens       string `json:"lens"`
+	File       string `json:"file"`
+	Line       int    `json:"line"`
+	Title      string `json:"title"`
+	Model      string `json:"model"`
+	Provider   string `json:"provider"`
+	RunID      string `json:"run_id"`
+	Graded     bool   `json:"graded"`
+	IsReal     *bool  `json:"is_real"`
+	Severity   string `json:"severity"`
+	Usefulness *int   `json:"usefulness"`
+	Grader     string `json:"grader"`
+}
+
+// modelStat mirrors store.ModelStat: the points-free per-model rollup.
+type modelStat struct {
+	Model         string         `json:"model"`
+	Provider      string         `json:"provider"`
+	Runs          int            `json:"runs"`
+	Minutes       float64        `json:"minutes"`
+	InputTokens   int64          `json:"input_tokens"`
+	OutputTokens  int64          `json:"output_tokens"`
+	Findings      int            `json:"findings"`
+	Confirmed     int            `json:"confirmed"`
+	FalsePositive int            `json:"false_positive"`
+	Ungraded      int            `json:"ungraded"`
+	BySeverity    map[string]int `json:"by_severity"`
+}
+
+// findingOut is one grouped finding as list_findings returns it.
+type findingOut struct {
+	FindingID  string   `json:"finding_id"`
+	Repo       string   `json:"repo"`
+	PR         int      `json:"pr"`
+	Lens       string   `json:"lens"`
+	File       string   `json:"file,omitempty"`
+	Line       int      `json:"line,omitempty"`
+	Title      string   `json:"title"`
+	Models     []string `json:"models"`
+	Graded     bool     `json:"graded"`
+	IsReal     *bool    `json:"is_real,omitempty"`
+	Severity   string   `json:"severity,omitempty"`
+	Usefulness *int     `json:"usefulness,omitempty"`
+	Grader     string   `json:"grader,omitempty"`
+}
+
+// gradeReq is the POST body for /findings/{id}/grade. grader is always "claude".
+type gradeReq struct {
+	IsReal     bool   `json:"is_real"`
+	Severity   string `json:"severity,omitempty"`
+	Usefulness *int   `json:"usefulness,omitempty"`
+	Notes      string `json:"notes,omitempty"`
+	Grader     string `json:"grader"`
+}
+
+// ---- thin HTTP client to the gadfly-reports daemon ----
+
+type client struct {
+	base  string
+	token string
+	hc    *http.Client
+}
+
+func newClient(base, token string) *client {
+	return &client{
+		base:  strings.TrimRight(base, "/"),
+		token: token,
+		hc:    &http.Client{Timeout: 30 * time.Second},
+	}
+}
+
+// do issues a request, attaching the bearer token if configured, and returns the
+// response body. A non-2xx status becomes an error carrying the body (which the
+// daemon shapes as {"error":...}).
+func (c *client) do(ctx context.Context, method, path string, body io.Reader) ([]byte, error) {
+	req, err := http.NewRequestWithContext(ctx, method, c.base+path, body)
+	if err != nil {
+		return nil, err
+	}
+	if body != nil {
+		req.Header.Set("Content-Type", "application/json")
+	}
+	if c.token != "" {
+		req.Header.Set("Authorization", "Bearer "+c.token)
+	}
+	resp, err := c.hc.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	b, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return nil, fmt.Errorf("gadfly-reports %s %s: %s: %s", method, path, resp.Status, strings.TrimSpace(string(b)))
+	}
+	return b, nil
+}
+
+func (c *client) getJSON(ctx context.Context, path string, out any) error {
+	b, err := c.do(ctx, http.MethodGet, path, nil)
+	if err != nil {
+		return err
+	}
+	return json.Unmarshal(b, out)
+}
+
+func (c *client) postJSON(ctx context.Context, path string, in any) ([]byte, error) {
+	buf, err := json.Marshal(in)
+	if err != nil {
+		return nil, err
+	}
+	return c.do(ctx, http.MethodPost, path, bytes.NewReader(buf))
+}
+
+// ---- core logic (kept free of MCP types so it is directly testable) ----
+
+// groupFindings collapses export rows (one per reporting model) into one entry
+// per finding_id, preserving first-seen order, with distinct reporting models.
+// Filters: repo (exact, when non-empty), pr (when non-nil), only-ungraded.
+func groupFindings(rows []exportRow, repo string, pr *int, onlyUngraded bool) []findingOut {
+	type acc struct {
+		out  *findingOut
+		seen map[string]bool
+	}
+	byID := map[string]*acc{}
+	var order []string
+
+	for _, r := range rows {
+		if repo != "" && r.Repo != repo {
+			continue
+		}
+		if pr != nil && r.PR != *pr {
+			continue
+		}
+		a, ok := byID[r.FindingID]
+		if !ok {
+			a = &acc{
+				out: &findingOut{
+					FindingID:  r.FindingID,
+					Repo:       r.Repo,
+					PR:         r.PR,
+					Lens:       r.Lens,
+					File:       r.File,
+					Line:       r.Line,
+					Title:      r.Title,
+					Graded:     r.Graded,
+					IsReal:     r.IsReal,
+					Severity:   r.Severity,
+					Usefulness: r.Usefulness,
+					Grader:     r.Grader,
+				},
+				seen: map[string]bool{},
+			}
+			byID[r.FindingID] = a
+			order = append(order, r.FindingID)
+		}
+		if r.Model != "" && !a.seen[r.Model] {
+			a.seen[r.Model] = true
+			a.out.Models = append(a.out.Models, r.Model)
+		}
+	}
+
+	out := make([]findingOut, 0, len(order))
+	for _, id := range order {
+		f := byID[id].out
+		if onlyUngraded && f.Graded {
+			continue
+		}
+		if f.Models == nil {
+			f.Models = []string{}
+		}
+		out = append(out, *f)
+	}
+	return out
+}
+
+// listFindings fetches /export, groups + filters it, and returns pretty JSON.
+func listFindings(ctx context.Context, c *client, repo string, pr *int, onlyUngraded bool) (string, error) {
+	var rows []exportRow
+	if err := c.getJSON(ctx, "/export", &rows); err != nil {
+		return "", err
+	}
+	return prettyJSON(groupFindings(rows, repo, pr, onlyUngraded))
+}
+
+// recordGrade POSTs a grade for findingID (grader forced to "claude").
+func recordGrade(ctx context.Context, c *client, findingID string, g gradeReq) (string, error) {
+	g.Grader = "claude"
+	path := "/findings/" + url.PathEscape(findingID) + "/grade"
+	b, err := c.postJSON(ctx, path, g)
+	if err != nil {
+		return "", err
+	}
+	verdict := "false positive"
+	if g.IsReal {
+		verdict = "real"
+		if g.Severity != "" {
+			verdict += " (" + g.Severity + ")"
+		}
+	}
+	return fmt.Sprintf("graded finding %s as %s [%s]", findingID, verdict, strings.TrimSpace(string(b))), nil
+}
+
+// scoreboard fetches /scoreboard, optionally narrows to one model, returns JSON.
+func scoreboard(ctx context.Context, c *client, model string) (string, error) {
+	var stats []modelStat
+	if err := c.getJSON(ctx, "/scoreboard", &stats); err != nil {
+		return "", err
+	}
+	if model != "" {
+		filtered := make([]modelStat, 0, 1)
+		for _, s := range stats {
+			if s.Model == model {
+				filtered = append(filtered, s)
+			}
+		}
+		stats = filtered
+	}
+	return prettyJSON(stats)
+}
+
+func prettyJSON(v any) (string, error) {
+	b, err := json.MarshalIndent(v, "", "  ")
+	if err != nil {
+		return "", err
+	}
+	return string(b), nil
+}
+
+func textResult(s string) *mcp.CallToolResult {
+	return &mcp.CallToolResult{Content: []mcp.Content{&mcp.TextContent{Text: s}}}
+}
+
+// ---- MCP tool input shapes (json/jsonschema tags drive the input schema) ----
+
+type listFindingsIn struct {
+	Repo         string `json:"repo,omitempty" jsonschema:"filter to this repository (exact match)"`
+	PR           *int   `json:"pr,omitempty" jsonschema:"filter to this pull request number"`
+	OnlyUngraded bool   `json:"only_ungraded,omitempty" jsonschema:"when true, return only findings that have no grade yet"`
+}
+
+type recordGradeIn struct {
+	FindingID  string `json:"finding_id" jsonschema:"the finding id to grade"`
+	IsReal     bool   `json:"is_real" jsonschema:"true if the finding is a genuine problem, false if a false positive"`
+	Severity   string `json:"severity,omitempty" jsonschema:"required when is_real is true: one of trivial, small, medium, high, critical; omit when is_real is false"`
+	Usefulness *int   `json:"usefulness,omitempty" jsonschema:"optional 1..5 rating of how useful the finding was"`
+	Notes      string `json:"notes,omitempty" jsonschema:"optional free-text rationale for the grade"`
+}
+
+type scoreboardIn struct {
+	Model string `json:"model,omitempty" jsonschema:"optional: narrow the scoreboard to a single model"`
+}
+
+func main() {
+	store := flag.String("store", envOr("GADFLY_REPORTS_URL", "http://localhost:8090"), "base URL of the gadfly-reports store daemon")
+	flag.Parse()
+
+	c := newClient(*store, os.Getenv("GADFLY_REPORTS_TOKEN"))
+
+	server := mcp.NewServer(&mcp.Implementation{Name: "gadfly-mcp", Version: "0.1.0"}, nil)
+
+	mcp.AddTool(server, &mcp.Tool{
+		Name:        "list_findings",
+		Description: "List Gadfly review findings from the gadfly-reports store, one entry per finding (reports from multiple models are grouped, with the distinct reporting models listed). Optionally filter by repo, pr, or only_ungraded to focus on findings that still need a grade.",
+	}, func(ctx context.Context, _ *mcp.CallToolRequest, in listFindingsIn) (*mcp.CallToolResult, any, error) {
+		out, err := listFindings(ctx, c, in.Repo, in.PR, in.OnlyUngraded)
+		if err != nil {
+			return nil, nil, err
+		}
+		return textResult(out), nil, nil
+	})
+
+	mcp.AddTool(server, &mcp.Tool{
+		Name:        "record_finding_grade",
+		Description: "Grade a single finding in the gadfly-reports store (grader is always \"claude\"). Set is_real=true with a severity (trivial|small|medium|high|critical) for a genuine problem, or is_real=false (omit severity) for a false positive.",
+	}, func(ctx context.Context, _ *mcp.CallToolRequest, in recordGradeIn) (*mcp.CallToolResult, any, error) {
+		if strings.TrimSpace(in.FindingID) == "" {
+			return nil, nil, fmt.Errorf("finding_id is required")
+		}
+		msg, err := recordGrade(ctx, c, in.FindingID, gradeReq{
+			IsReal:     in.IsReal,
+			Severity:   in.Severity,
+			Usefulness: in.Usefulness,
+			Notes:      in.Notes,
+		})
+		if err != nil {
+			return nil, nil, err
+		}
+		return textResult(msg), nil, nil
+	})
+
+	mcp.AddTool(server, &mcp.Tool{
+		Name:        "scoreboard",
+		Description: "Per-model rollup from the gadfly-reports store (runs, minutes, tokens, findings, confirmed/false-positive/ungraded counts, and a confirmed-by-severity histogram). NOTE: gadfly-reports stores no points; any points/value-per-minute ranking is computed CLIENT-SIDE by mapping severity to points and dividing by minutes. Optionally filter to a single model.",
+	}, func(ctx context.Context, _ *mcp.CallToolRequest, in scoreboardIn) (*mcp.CallToolResult, any, error) {
+		out, err := scoreboard(ctx, c, in.Model)
+		if err != nil {
+			return nil, nil, err
+		}
+		return textResult(out), nil, nil
+	})
+
+	if err := server.Run(context.Background(), &mcp.StdioTransport{}); err != nil {
+		log.Printf("gadfly-reports mcp: %v", err)
+		os.Exit(1)
+	}
+}
+
+func envOr(key, def string) string {
+	if v := os.Getenv(key); v != "" {
+		return v
+	}
+	return def
+}