feat: gadfly-mcp — MCP server for grading gadfly-reports findings
CI / test (push) Successful in 10m10s
CI / test (push) Successful in 10m10s
Thin, stateless stdio MCP server (official Go SDK) that exposes a gadfly-reports store to an MCP client (e.g. Claude). Tools: list_findings, record_finding_grade, scoreboard (grader forced to claude). Launch via 'go run ...@latest' — nothing to install. Core logic tested against httptest, no daemon required. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,362 @@
|
||||
// Command gadfly-mcp is a stdio MCP server that exposes a gadfly-reports findings
|
||||
// store to an MCP client (e.g. Claude). It is a THIN HTTP client to the gadfly-reports daemon: it
|
||||
// never opens the SQLite database directly and does not import the daemon's
|
||||
// package, so it mirrors the store's JSON shapes with small local structs.
|
||||
//
|
||||
// Launch it with:
|
||||
//
|
||||
// go run gitea.stevedudenhoeffer.com/steve/gadfly-mcp@latest
|
||||
//
|
||||
// Configuration:
|
||||
//
|
||||
// --store base URL of the gadfly-reports daemon
|
||||
// (default $GADFLY_REPORTS_URL, else http://localhost:8090)
|
||||
// $GADFLY_REPORTS_TOKEN if set, sent as "Authorization: Bearer <token>" on every request
|
||||
//
|
||||
// Tools: list_findings, record_finding_grade, scoreboard. The grader is always
|
||||
// "claude". gadfly-reports stores no points; the scoreboard tool's points are a
|
||||
// client-side concern (severity -> points, divided by minutes).
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/modelcontextprotocol/go-sdk/mcp"
|
||||
)
|
||||
|
||||
// ---- local mirrors of the store's JSON shapes (see ../../store.go) ----
|
||||
|
||||
// exportRow mirrors store.ExportRow: one report joined with its finding, run
|
||||
// timing, and latest grade. Many rows can share a finding_id (one per reporting
|
||||
// model), which is why list_findings groups them.
|
||||
type exportRow struct {
|
||||
FindingID string `json:"finding_id"`
|
||||
Repo string `json:"repo"`
|
||||
PR int `json:"pr"`
|
||||
Lens string `json:"lens"`
|
||||
File string `json:"file"`
|
||||
Line int `json:"line"`
|
||||
Title string `json:"title"`
|
||||
Model string `json:"model"`
|
||||
Provider string `json:"provider"`
|
||||
RunID string `json:"run_id"`
|
||||
Graded bool `json:"graded"`
|
||||
IsReal *bool `json:"is_real"`
|
||||
Severity string `json:"severity"`
|
||||
Usefulness *int `json:"usefulness"`
|
||||
Grader string `json:"grader"`
|
||||
}
|
||||
|
||||
// modelStat mirrors store.ModelStat: the points-free per-model rollup.
|
||||
type modelStat struct {
|
||||
Model string `json:"model"`
|
||||
Provider string `json:"provider"`
|
||||
Runs int `json:"runs"`
|
||||
Minutes float64 `json:"minutes"`
|
||||
InputTokens int64 `json:"input_tokens"`
|
||||
OutputTokens int64 `json:"output_tokens"`
|
||||
Findings int `json:"findings"`
|
||||
Confirmed int `json:"confirmed"`
|
||||
FalsePositive int `json:"false_positive"`
|
||||
Ungraded int `json:"ungraded"`
|
||||
BySeverity map[string]int `json:"by_severity"`
|
||||
}
|
||||
|
||||
// findingOut is one grouped finding as list_findings returns it.
|
||||
type findingOut struct {
|
||||
FindingID string `json:"finding_id"`
|
||||
Repo string `json:"repo"`
|
||||
PR int `json:"pr"`
|
||||
Lens string `json:"lens"`
|
||||
File string `json:"file,omitempty"`
|
||||
Line int `json:"line,omitempty"`
|
||||
Title string `json:"title"`
|
||||
Models []string `json:"models"`
|
||||
Graded bool `json:"graded"`
|
||||
IsReal *bool `json:"is_real,omitempty"`
|
||||
Severity string `json:"severity,omitempty"`
|
||||
Usefulness *int `json:"usefulness,omitempty"`
|
||||
Grader string `json:"grader,omitempty"`
|
||||
}
|
||||
|
||||
// gradeReq is the POST body for /findings/{id}/grade. grader is always "claude".
|
||||
type gradeReq struct {
|
||||
IsReal bool `json:"is_real"`
|
||||
Severity string `json:"severity,omitempty"`
|
||||
Usefulness *int `json:"usefulness,omitempty"`
|
||||
Notes string `json:"notes,omitempty"`
|
||||
Grader string `json:"grader"`
|
||||
}
|
||||
|
||||
// ---- thin HTTP client to the gadfly-reports daemon ----
|
||||
|
||||
type client struct {
|
||||
base string
|
||||
token string
|
||||
hc *http.Client
|
||||
}
|
||||
|
||||
func newClient(base, token string) *client {
|
||||
return &client{
|
||||
base: strings.TrimRight(base, "/"),
|
||||
token: token,
|
||||
hc: &http.Client{Timeout: 30 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
// do issues a request, attaching the bearer token if configured, and returns the
|
||||
// response body. A non-2xx status becomes an error carrying the body (which the
|
||||
// daemon shapes as {"error":...}).
|
||||
func (c *client) do(ctx context.Context, method, path string, body io.Reader) ([]byte, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, method, c.base+path, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if body != nil {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
if c.token != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||
}
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("gadfly-reports %s %s: %s: %s", method, path, resp.Status, strings.TrimSpace(string(b)))
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (c *client) getJSON(ctx context.Context, path string, out any) error {
|
||||
b, err := c.do(ctx, http.MethodGet, path, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return json.Unmarshal(b, out)
|
||||
}
|
||||
|
||||
func (c *client) postJSON(ctx context.Context, path string, in any) ([]byte, error) {
|
||||
buf, err := json.Marshal(in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c.do(ctx, http.MethodPost, path, bytes.NewReader(buf))
|
||||
}
|
||||
|
||||
// ---- core logic (kept free of MCP types so it is directly testable) ----
|
||||
|
||||
// groupFindings collapses export rows (one per reporting model) into one entry
|
||||
// per finding_id, preserving first-seen order, with distinct reporting models.
|
||||
// Filters: repo (exact, when non-empty), pr (when non-nil), only-ungraded.
|
||||
func groupFindings(rows []exportRow, repo string, pr *int, onlyUngraded bool) []findingOut {
|
||||
type acc struct {
|
||||
out *findingOut
|
||||
seen map[string]bool
|
||||
}
|
||||
byID := map[string]*acc{}
|
||||
var order []string
|
||||
|
||||
for _, r := range rows {
|
||||
if repo != "" && r.Repo != repo {
|
||||
continue
|
||||
}
|
||||
if pr != nil && r.PR != *pr {
|
||||
continue
|
||||
}
|
||||
a, ok := byID[r.FindingID]
|
||||
if !ok {
|
||||
a = &acc{
|
||||
out: &findingOut{
|
||||
FindingID: r.FindingID,
|
||||
Repo: r.Repo,
|
||||
PR: r.PR,
|
||||
Lens: r.Lens,
|
||||
File: r.File,
|
||||
Line: r.Line,
|
||||
Title: r.Title,
|
||||
Graded: r.Graded,
|
||||
IsReal: r.IsReal,
|
||||
Severity: r.Severity,
|
||||
Usefulness: r.Usefulness,
|
||||
Grader: r.Grader,
|
||||
},
|
||||
seen: map[string]bool{},
|
||||
}
|
||||
byID[r.FindingID] = a
|
||||
order = append(order, r.FindingID)
|
||||
}
|
||||
if r.Model != "" && !a.seen[r.Model] {
|
||||
a.seen[r.Model] = true
|
||||
a.out.Models = append(a.out.Models, r.Model)
|
||||
}
|
||||
}
|
||||
|
||||
out := make([]findingOut, 0, len(order))
|
||||
for _, id := range order {
|
||||
f := byID[id].out
|
||||
if onlyUngraded && f.Graded {
|
||||
continue
|
||||
}
|
||||
if f.Models == nil {
|
||||
f.Models = []string{}
|
||||
}
|
||||
out = append(out, *f)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// listFindings fetches /export, groups + filters it, and returns pretty JSON.
|
||||
func listFindings(ctx context.Context, c *client, repo string, pr *int, onlyUngraded bool) (string, error) {
|
||||
var rows []exportRow
|
||||
if err := c.getJSON(ctx, "/export", &rows); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return prettyJSON(groupFindings(rows, repo, pr, onlyUngraded))
|
||||
}
|
||||
|
||||
// recordGrade POSTs a grade for findingID (grader forced to "claude").
|
||||
func recordGrade(ctx context.Context, c *client, findingID string, g gradeReq) (string, error) {
|
||||
g.Grader = "claude"
|
||||
path := "/findings/" + url.PathEscape(findingID) + "/grade"
|
||||
b, err := c.postJSON(ctx, path, g)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
verdict := "false positive"
|
||||
if g.IsReal {
|
||||
verdict = "real"
|
||||
if g.Severity != "" {
|
||||
verdict += " (" + g.Severity + ")"
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("graded finding %s as %s [%s]", findingID, verdict, strings.TrimSpace(string(b))), nil
|
||||
}
|
||||
|
||||
// scoreboard fetches /scoreboard, optionally narrows to one model, returns JSON.
|
||||
func scoreboard(ctx context.Context, c *client, model string) (string, error) {
|
||||
var stats []modelStat
|
||||
if err := c.getJSON(ctx, "/scoreboard", &stats); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if model != "" {
|
||||
filtered := make([]modelStat, 0, 1)
|
||||
for _, s := range stats {
|
||||
if s.Model == model {
|
||||
filtered = append(filtered, s)
|
||||
}
|
||||
}
|
||||
stats = filtered
|
||||
}
|
||||
return prettyJSON(stats)
|
||||
}
|
||||
|
||||
func prettyJSON(v any) (string, error) {
|
||||
b, err := json.MarshalIndent(v, "", " ")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
func textResult(s string) *mcp.CallToolResult {
|
||||
return &mcp.CallToolResult{Content: []mcp.Content{&mcp.TextContent{Text: s}}}
|
||||
}
|
||||
|
||||
// ---- MCP tool input shapes (json/jsonschema tags drive the input schema) ----
|
||||
|
||||
type listFindingsIn struct {
|
||||
Repo string `json:"repo,omitempty" jsonschema:"filter to this repository (exact match)"`
|
||||
PR *int `json:"pr,omitempty" jsonschema:"filter to this pull request number"`
|
||||
OnlyUngraded bool `json:"only_ungraded,omitempty" jsonschema:"when true, return only findings that have no grade yet"`
|
||||
}
|
||||
|
||||
type recordGradeIn struct {
|
||||
FindingID string `json:"finding_id" jsonschema:"the finding id to grade"`
|
||||
IsReal bool `json:"is_real" jsonschema:"true if the finding is a genuine problem, false if a false positive"`
|
||||
Severity string `json:"severity,omitempty" jsonschema:"required when is_real is true: one of trivial, small, medium, high, critical; omit when is_real is false"`
|
||||
Usefulness *int `json:"usefulness,omitempty" jsonschema:"optional 1..5 rating of how useful the finding was"`
|
||||
Notes string `json:"notes,omitempty" jsonschema:"optional free-text rationale for the grade"`
|
||||
}
|
||||
|
||||
type scoreboardIn struct {
|
||||
Model string `json:"model,omitempty" jsonschema:"optional: narrow the scoreboard to a single model"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
store := flag.String("store", envOr("GADFLY_REPORTS_URL", "http://localhost:8090"), "base URL of the gadfly-reports store daemon")
|
||||
flag.Parse()
|
||||
|
||||
c := newClient(*store, os.Getenv("GADFLY_REPORTS_TOKEN"))
|
||||
|
||||
server := mcp.NewServer(&mcp.Implementation{Name: "gadfly-mcp", Version: "0.1.0"}, nil)
|
||||
|
||||
mcp.AddTool(server, &mcp.Tool{
|
||||
Name: "list_findings",
|
||||
Description: "List Gadfly review findings from the gadfly-reports store, one entry per finding (reports from multiple models are grouped, with the distinct reporting models listed). Optionally filter by repo, pr, or only_ungraded to focus on findings that still need a grade.",
|
||||
}, func(ctx context.Context, _ *mcp.CallToolRequest, in listFindingsIn) (*mcp.CallToolResult, any, error) {
|
||||
out, err := listFindings(ctx, c, in.Repo, in.PR, in.OnlyUngraded)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return textResult(out), nil, nil
|
||||
})
|
||||
|
||||
mcp.AddTool(server, &mcp.Tool{
|
||||
Name: "record_finding_grade",
|
||||
Description: "Grade a single finding in the gadfly-reports store (grader is always \"claude\"). Set is_real=true with a severity (trivial|small|medium|high|critical) for a genuine problem, or is_real=false (omit severity) for a false positive.",
|
||||
}, func(ctx context.Context, _ *mcp.CallToolRequest, in recordGradeIn) (*mcp.CallToolResult, any, error) {
|
||||
if strings.TrimSpace(in.FindingID) == "" {
|
||||
return nil, nil, fmt.Errorf("finding_id is required")
|
||||
}
|
||||
msg, err := recordGrade(ctx, c, in.FindingID, gradeReq{
|
||||
IsReal: in.IsReal,
|
||||
Severity: in.Severity,
|
||||
Usefulness: in.Usefulness,
|
||||
Notes: in.Notes,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return textResult(msg), nil, nil
|
||||
})
|
||||
|
||||
mcp.AddTool(server, &mcp.Tool{
|
||||
Name: "scoreboard",
|
||||
Description: "Per-model rollup from the gadfly-reports store (runs, minutes, tokens, findings, confirmed/false-positive/ungraded counts, and a confirmed-by-severity histogram). NOTE: gadfly-reports stores no points; any points/value-per-minute ranking is computed CLIENT-SIDE by mapping severity to points and dividing by minutes. Optionally filter to a single model.",
|
||||
}, func(ctx context.Context, _ *mcp.CallToolRequest, in scoreboardIn) (*mcp.CallToolResult, any, error) {
|
||||
out, err := scoreboard(ctx, c, in.Model)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return textResult(out), nil, nil
|
||||
})
|
||||
|
||||
if err := server.Run(context.Background(), &mcp.StdioTransport{}); err != nil {
|
||||
log.Printf("gadfly-reports mcp: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func envOr(key, def string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return def
|
||||
}
|
||||
Reference in New Issue
Block a user