diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..acafab3 --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,26 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: {} + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: "1.26" + - name: Build + run: go build ./... + - name: Vet + run: go vet ./... + - name: gofmt + run: test -z "$(gofmt -l .)" || { gofmt -l .; echo "gofmt needed"; exit 1; } + - name: Test (race) + run: go test -race ./... diff --git a/.gitignore b/.gitignore index 5b90e79..8c3a718 100644 --- a/.gitignore +++ b/.gitignore @@ -1,27 +1,2 @@ -# ---> Go -# If you prefer the allow list template instead of the deny list, see community template: -# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore -# -# Binaries for programs and plugins -*.exe -*.exe~ -*.dll -*.so -*.dylib - -# Test binary, built with `go test -c` -*.test - -# Output of the go coverage tool, specifically when used with LiteIDE -*.out - -# Dependency directories (remove the comment below to include it) -# vendor/ - -# Go workspace file -go.work -go.work.sum - -# env file -.env - +# build output +/gadfly-mcp diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..9a75c6d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,50 @@ +# gadfly-mcp — Developer Guide + +A stdio [MCP](https://modelcontextprotocol.io) server exposing the +[gadfly-reports](https://gitea.stevedudenhoeffer.com/steve/gadfly-reports) findings store to an MCP +client (e.g. Claude). It is a **thin, stateless HTTP client** to the store — it never opens SQLite +and never imports the store's package. + +> This is a public, **vibe-coded** project (built largely by an AI agent). Keep that honest in the +> README; it's homelab-grade. + +## Shape + +- Single `main.go` (`package main`) at the repo root, so the launch path is just + `go run gitea.stevedudenhoeffer.com/steve/gadfly-mcp@latest` — no `cmd/` subpath. This is the + whole point: the client compiles + caches it on demand; nothing to install or manage. +- Uses the official Go MCP SDK (`github.com/modelcontextprotocol/go-sdk`): `mcp.NewServer` → + `mcp.AddTool[In,Out]` (input schemas inferred from struct + `jsonschema` tags) → `server.Run(ctx, + &mcp.StdioTransport{})`. +- Config: `--store` flag (default `$GADFLY_REPORTS_URL`, else `http://localhost:8090`); bearer token + from `$GADFLY_REPORTS_TOKEN`, sent on every request when set. + +## Contract with gadfly-reports + +The store's HTTP/JSON API is the contract — its README is the **source of truth**. This client +mirrors only the subset it needs with small local structs (`exportRow`, `modelStat`, `gradeReq`, +…). If you change a field here, check it against gadfly-reports' `server.go`/`store.go`. Endpoints +used: `GET /export`, `POST /findings/{id}/grade`, `GET /scoreboard`. + +Three tools: `list_findings`, `record_finding_grade`, `scoreboard`. The grader is always forced to +`"claude"`. The store holds **no points**; ranking by points/value-per-minute is a client concern — +say so in the `scoreboard` tool description. + +## Tests + +The core logic (`groupFindings` / `listFindings` / `recordGrade` / `scoreboard`) is factored free of +MCP types and tested against an `httptest.Server`, so tests need no real daemon. Keep it that way — +add a test when you add a tool or change the grouping/filtering. + +```sh +go build ./... +go vet ./... +gofmt -l . # must be empty +go test -race ./... +``` + +## When making changes + +- Keep this a thin client: no SQLite, no business logic the store should own. +- Keep the launch path a root `package main` (don't move it under `cmd/`), so `go run …@latest` + stays the one-liner the README documents. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6ab7069 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Steve Dudenhoeffer + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index f0f69cd..77b1aff 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,62 @@ -# gadfly-mcp +# 🪰🔌 gadfly-mcp +An [MCP](https://modelcontextprotocol.io) server that lets an MCP client (e.g. Claude) read and +**grade** [Gadfly](https://gitea.stevedudenhoeffer.com/steve/gadfly) review findings stored in +[gadfly-reports](https://gitea.stevedudenhoeffer.com/steve/gadfly-reports). It's a tiny, stateless +stdio process — a thin HTTP client to the store — so there's nothing to install or manage: your MCP +client launches it on demand with `go run …@latest`. + +> ### 🤖 Heads up: this is a vibe-coded project +> gadfly-mcp was built almost entirely by an AI agent (Claude Code) — code and docs. It's small and +> tested, but treat it as homelab-grade. Issues and PRs welcome. + +## Add it to Claude + +The store (`gadfly-reports`) runs persistently somewhere; this MCP server is throwaway. Point your +client at it via `go run` (first launch compiles + caches; needs Go + access to the module host): + +```jsonc +{ + "mcpServers": { + "gadfly": { + "command": "go", + "args": [ + "run", "gitea.stevedudenhoeffer.com/steve/gadfly-mcp@latest", + "--store", "https://gadfly-reports.your-host:8090" + ], + "env": { "GADFLY_REPORTS_TOKEN": "the-same-token-the-store-uses" } + } + } +} +``` + +`--store` defaults to `$GADFLY_REPORTS_URL` (else `http://localhost:8090`). If the store requires a +bearer token, set `GADFLY_REPORTS_TOKEN`. + +## Tools + +| Tool | Args | Does | +|---|---|---| +| `list_findings` | `repo?`, `pr?`, `only_ungraded?` | lists findings (one entry per finding; reports from multiple models grouped, distinct models listed) | +| `record_finding_grade` | `finding_id`, `is_real`, `severity?`, `usefulness?`, `notes?` | records a triage grade (grader is always `claude`) | +| `scoreboard` | `model?` | per-model rollup (runs, minutes, tokens, confirmed-by-severity histogram) | + +`severity` is one of `trivial|small|medium|high|critical` (set it when `is_real=true`; omit for a +false positive). **Points are not stored or returned** — gadfly-reports keeps raw facts, so any +"value per minute / per token" ranking is computed client-side (map severity → points, divide by +minutes). Use `scoreboard` for the raw material. + +Typical flow: *"List the ungraded gadfly findings on PR 2, look into each against the code, and +record a grade for each."* + +## Build / test + +```sh +go build ./... +go test ./... +gofmt -l . # must be clean +``` + +## License + +MIT © 2026 Steve Dudenhoeffer. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..69fc7d6 --- /dev/null +++ b/go.mod @@ -0,0 +1,14 @@ +module gitea.stevedudenhoeffer.com/steve/gadfly-mcp + +go 1.26 + +require github.com/modelcontextprotocol/go-sdk v1.6.1 + +require ( + github.com/google/jsonschema-go v0.4.3 // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.41.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..2d6f186 --- /dev/null +++ b/go.sum @@ -0,0 +1,20 @@ +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0= +github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/modelcontextprotocol/go-sdk v1.6.1 h1:0zOSupjKUxPKSocPT1Wtago+mUHU2/uZ4xSOY0FGReU= +github.com/modelcontextprotocol/go-sdk v1.6.1/go.mod h1:kzm3kzFL1/+AziGOE0nUs3gvPoNxMCvkxokMkuFapXQ= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= diff --git a/main.go b/main.go new file mode 100644 index 0000000..4c23c4d --- /dev/null +++ b/main.go @@ -0,0 +1,362 @@ +// Command gadfly-mcp is a stdio MCP server that exposes a gadfly-reports findings +// store to an MCP client (e.g. Claude). It is a THIN HTTP client to the gadfly-reports daemon: it +// never opens the SQLite database directly and does not import the daemon's +// package, so it mirrors the store's JSON shapes with small local structs. +// +// Launch it with: +// +// go run gitea.stevedudenhoeffer.com/steve/gadfly-mcp@latest +// +// Configuration: +// +// --store base URL of the gadfly-reports daemon +// (default $GADFLY_REPORTS_URL, else http://localhost:8090) +// $GADFLY_REPORTS_TOKEN if set, sent as "Authorization: Bearer " on every request +// +// Tools: list_findings, record_finding_grade, scoreboard. The grader is always +// "claude". gadfly-reports stores no points; the scoreboard tool's points are a +// client-side concern (severity -> points, divided by minutes). +package main + +import ( + "bytes" + "context" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "strings" + "time" + + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +// ---- local mirrors of the store's JSON shapes (see ../../store.go) ---- + +// exportRow mirrors store.ExportRow: one report joined with its finding, run +// timing, and latest grade. Many rows can share a finding_id (one per reporting +// model), which is why list_findings groups them. +type exportRow struct { + FindingID string `json:"finding_id"` + Repo string `json:"repo"` + PR int `json:"pr"` + Lens string `json:"lens"` + File string `json:"file"` + Line int `json:"line"` + Title string `json:"title"` + Model string `json:"model"` + Provider string `json:"provider"` + RunID string `json:"run_id"` + Graded bool `json:"graded"` + IsReal *bool `json:"is_real"` + Severity string `json:"severity"` + Usefulness *int `json:"usefulness"` + Grader string `json:"grader"` +} + +// modelStat mirrors store.ModelStat: the points-free per-model rollup. +type modelStat struct { + Model string `json:"model"` + Provider string `json:"provider"` + Runs int `json:"runs"` + Minutes float64 `json:"minutes"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + Findings int `json:"findings"` + Confirmed int `json:"confirmed"` + FalsePositive int `json:"false_positive"` + Ungraded int `json:"ungraded"` + BySeverity map[string]int `json:"by_severity"` +} + +// findingOut is one grouped finding as list_findings returns it. +type findingOut struct { + FindingID string `json:"finding_id"` + Repo string `json:"repo"` + PR int `json:"pr"` + Lens string `json:"lens"` + File string `json:"file,omitempty"` + Line int `json:"line,omitempty"` + Title string `json:"title"` + Models []string `json:"models"` + Graded bool `json:"graded"` + IsReal *bool `json:"is_real,omitempty"` + Severity string `json:"severity,omitempty"` + Usefulness *int `json:"usefulness,omitempty"` + Grader string `json:"grader,omitempty"` +} + +// gradeReq is the POST body for /findings/{id}/grade. grader is always "claude". +type gradeReq struct { + IsReal bool `json:"is_real"` + Severity string `json:"severity,omitempty"` + Usefulness *int `json:"usefulness,omitempty"` + Notes string `json:"notes,omitempty"` + Grader string `json:"grader"` +} + +// ---- thin HTTP client to the gadfly-reports daemon ---- + +type client struct { + base string + token string + hc *http.Client +} + +func newClient(base, token string) *client { + return &client{ + base: strings.TrimRight(base, "/"), + token: token, + hc: &http.Client{Timeout: 30 * time.Second}, + } +} + +// do issues a request, attaching the bearer token if configured, and returns the +// response body. A non-2xx status becomes an error carrying the body (which the +// daemon shapes as {"error":...}). +func (c *client) do(ctx context.Context, method, path string, body io.Reader) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, method, c.base+path, body) + if err != nil { + return nil, err + } + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + if c.token != "" { + req.Header.Set("Authorization", "Bearer "+c.token) + } + resp, err := c.hc.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + b, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return nil, fmt.Errorf("gadfly-reports %s %s: %s: %s", method, path, resp.Status, strings.TrimSpace(string(b))) + } + return b, nil +} + +func (c *client) getJSON(ctx context.Context, path string, out any) error { + b, err := c.do(ctx, http.MethodGet, path, nil) + if err != nil { + return err + } + return json.Unmarshal(b, out) +} + +func (c *client) postJSON(ctx context.Context, path string, in any) ([]byte, error) { + buf, err := json.Marshal(in) + if err != nil { + return nil, err + } + return c.do(ctx, http.MethodPost, path, bytes.NewReader(buf)) +} + +// ---- core logic (kept free of MCP types so it is directly testable) ---- + +// groupFindings collapses export rows (one per reporting model) into one entry +// per finding_id, preserving first-seen order, with distinct reporting models. +// Filters: repo (exact, when non-empty), pr (when non-nil), only-ungraded. +func groupFindings(rows []exportRow, repo string, pr *int, onlyUngraded bool) []findingOut { + type acc struct { + out *findingOut + seen map[string]bool + } + byID := map[string]*acc{} + var order []string + + for _, r := range rows { + if repo != "" && r.Repo != repo { + continue + } + if pr != nil && r.PR != *pr { + continue + } + a, ok := byID[r.FindingID] + if !ok { + a = &acc{ + out: &findingOut{ + FindingID: r.FindingID, + Repo: r.Repo, + PR: r.PR, + Lens: r.Lens, + File: r.File, + Line: r.Line, + Title: r.Title, + Graded: r.Graded, + IsReal: r.IsReal, + Severity: r.Severity, + Usefulness: r.Usefulness, + Grader: r.Grader, + }, + seen: map[string]bool{}, + } + byID[r.FindingID] = a + order = append(order, r.FindingID) + } + if r.Model != "" && !a.seen[r.Model] { + a.seen[r.Model] = true + a.out.Models = append(a.out.Models, r.Model) + } + } + + out := make([]findingOut, 0, len(order)) + for _, id := range order { + f := byID[id].out + if onlyUngraded && f.Graded { + continue + } + if f.Models == nil { + f.Models = []string{} + } + out = append(out, *f) + } + return out +} + +// listFindings fetches /export, groups + filters it, and returns pretty JSON. +func listFindings(ctx context.Context, c *client, repo string, pr *int, onlyUngraded bool) (string, error) { + var rows []exportRow + if err := c.getJSON(ctx, "/export", &rows); err != nil { + return "", err + } + return prettyJSON(groupFindings(rows, repo, pr, onlyUngraded)) +} + +// recordGrade POSTs a grade for findingID (grader forced to "claude"). +func recordGrade(ctx context.Context, c *client, findingID string, g gradeReq) (string, error) { + g.Grader = "claude" + path := "/findings/" + url.PathEscape(findingID) + "/grade" + b, err := c.postJSON(ctx, path, g) + if err != nil { + return "", err + } + verdict := "false positive" + if g.IsReal { + verdict = "real" + if g.Severity != "" { + verdict += " (" + g.Severity + ")" + } + } + return fmt.Sprintf("graded finding %s as %s [%s]", findingID, verdict, strings.TrimSpace(string(b))), nil +} + +// scoreboard fetches /scoreboard, optionally narrows to one model, returns JSON. +func scoreboard(ctx context.Context, c *client, model string) (string, error) { + var stats []modelStat + if err := c.getJSON(ctx, "/scoreboard", &stats); err != nil { + return "", err + } + if model != "" { + filtered := make([]modelStat, 0, 1) + for _, s := range stats { + if s.Model == model { + filtered = append(filtered, s) + } + } + stats = filtered + } + return prettyJSON(stats) +} + +func prettyJSON(v any) (string, error) { + b, err := json.MarshalIndent(v, "", " ") + if err != nil { + return "", err + } + return string(b), nil +} + +func textResult(s string) *mcp.CallToolResult { + return &mcp.CallToolResult{Content: []mcp.Content{&mcp.TextContent{Text: s}}} +} + +// ---- MCP tool input shapes (json/jsonschema tags drive the input schema) ---- + +type listFindingsIn struct { + Repo string `json:"repo,omitempty" jsonschema:"filter to this repository (exact match)"` + PR *int `json:"pr,omitempty" jsonschema:"filter to this pull request number"` + OnlyUngraded bool `json:"only_ungraded,omitempty" jsonschema:"when true, return only findings that have no grade yet"` +} + +type recordGradeIn struct { + FindingID string `json:"finding_id" jsonschema:"the finding id to grade"` + IsReal bool `json:"is_real" jsonschema:"true if the finding is a genuine problem, false if a false positive"` + Severity string `json:"severity,omitempty" jsonschema:"required when is_real is true: one of trivial, small, medium, high, critical; omit when is_real is false"` + Usefulness *int `json:"usefulness,omitempty" jsonschema:"optional 1..5 rating of how useful the finding was"` + Notes string `json:"notes,omitempty" jsonschema:"optional free-text rationale for the grade"` +} + +type scoreboardIn struct { + Model string `json:"model,omitempty" jsonschema:"optional: narrow the scoreboard to a single model"` +} + +func main() { + store := flag.String("store", envOr("GADFLY_REPORTS_URL", "http://localhost:8090"), "base URL of the gadfly-reports store daemon") + flag.Parse() + + c := newClient(*store, os.Getenv("GADFLY_REPORTS_TOKEN")) + + server := mcp.NewServer(&mcp.Implementation{Name: "gadfly-mcp", Version: "0.1.0"}, nil) + + mcp.AddTool(server, &mcp.Tool{ + Name: "list_findings", + Description: "List Gadfly review findings from the gadfly-reports store, one entry per finding (reports from multiple models are grouped, with the distinct reporting models listed). Optionally filter by repo, pr, or only_ungraded to focus on findings that still need a grade.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, in listFindingsIn) (*mcp.CallToolResult, any, error) { + out, err := listFindings(ctx, c, in.Repo, in.PR, in.OnlyUngraded) + if err != nil { + return nil, nil, err + } + return textResult(out), nil, nil + }) + + mcp.AddTool(server, &mcp.Tool{ + Name: "record_finding_grade", + Description: "Grade a single finding in the gadfly-reports store (grader is always \"claude\"). Set is_real=true with a severity (trivial|small|medium|high|critical) for a genuine problem, or is_real=false (omit severity) for a false positive.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, in recordGradeIn) (*mcp.CallToolResult, any, error) { + if strings.TrimSpace(in.FindingID) == "" { + return nil, nil, fmt.Errorf("finding_id is required") + } + msg, err := recordGrade(ctx, c, in.FindingID, gradeReq{ + IsReal: in.IsReal, + Severity: in.Severity, + Usefulness: in.Usefulness, + Notes: in.Notes, + }) + if err != nil { + return nil, nil, err + } + return textResult(msg), nil, nil + }) + + mcp.AddTool(server, &mcp.Tool{ + Name: "scoreboard", + Description: "Per-model rollup from the gadfly-reports store (runs, minutes, tokens, findings, confirmed/false-positive/ungraded counts, and a confirmed-by-severity histogram). NOTE: gadfly-reports stores no points; any points/value-per-minute ranking is computed CLIENT-SIDE by mapping severity to points and dividing by minutes. Optionally filter to a single model.", + }, func(ctx context.Context, _ *mcp.CallToolRequest, in scoreboardIn) (*mcp.CallToolResult, any, error) { + out, err := scoreboard(ctx, c, in.Model) + if err != nil { + return nil, nil, err + } + return textResult(out), nil, nil + }) + + if err := server.Run(context.Background(), &mcp.StdioTransport{}); err != nil { + log.Printf("gadfly-reports mcp: %v", err) + os.Exit(1) + } +} + +func envOr(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..f1ad8d6 --- /dev/null +++ b/main_test.go @@ -0,0 +1,174 @@ +package main + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "testing" +) + +func intp(i int) *int { return &i } +func boolp(b bool) *bool { return &b } + +// sample /export rows: finding f1 reported by two models (one ungraded? no, +// graded real), f2 reported once and graded false-positive, f3 ungraded, plus a +// row for a different repo/pr to exercise filtering. +func sampleRows() []exportRow { + return []exportRow{ + {FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"}, + {FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "qwen2.5-coder:7b", Provider: "ollama", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"}, + {FindingID: "f1", Repo: "acme/widget", PR: 7, Lens: "security", File: "a.go", Line: 10, Title: "SQL injection", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(true), Severity: "high", Usefulness: intp(5), Grader: "claude"}, // dup model -> deduped + {FindingID: "f2", Repo: "acme/widget", PR: 7, Lens: "correctness", File: "b.go", Line: 22, Title: "off by one", Model: "gpt-4o", Provider: "openai", Graded: true, IsReal: boolp(false), Grader: "claude"}, + {FindingID: "f3", Repo: "acme/widget", PR: 7, Lens: "performance", File: "c.go", Line: 3, Title: "n+1 query", Model: "qwen2.5-coder:7b", Provider: "ollama", Graded: false}, + {FindingID: "f4", Repo: "other/repo", PR: 99, Lens: "docs", File: "d.go", Line: 1, Title: "typo", Model: "gpt-4o", Provider: "openai", Graded: false}, + } +} + +func TestGroupFindings_GroupingAndDedup(t *testing.T) { + got := groupFindings(sampleRows(), "", nil, false) + if len(got) != 4 { + t.Fatalf("want 4 grouped findings, got %d", len(got)) + } + var f1 *findingOut + for i := range got { + if got[i].FindingID == "f1" { + f1 = &got[i] + } + } + if f1 == nil { + t.Fatal("f1 missing") + } + if len(f1.Models) != 2 { + t.Fatalf("f1 should have 2 distinct models, got %v", f1.Models) + } + if f1.Models[0] != "gpt-4o" || f1.Models[1] != "qwen2.5-coder:7b" { + t.Fatalf("f1 model order/dedup wrong: %v", f1.Models) + } + if !f1.Graded || f1.IsReal == nil || !*f1.IsReal || f1.Severity != "high" { + t.Fatalf("f1 grade not propagated: %+v", f1) + } +} + +func TestGroupFindings_FilterRepoAndPR(t *testing.T) { + got := groupFindings(sampleRows(), "other/repo", nil, false) + if len(got) != 1 || got[0].FindingID != "f4" { + t.Fatalf("repo filter failed: %+v", got) + } + + pr := 99 + got = groupFindings(sampleRows(), "", &pr, false) + if len(got) != 1 || got[0].FindingID != "f4" { + t.Fatalf("pr filter failed: %+v", got) + } + + pr = 7 + got = groupFindings(sampleRows(), "acme/widget", &pr, false) + if len(got) != 3 { + t.Fatalf("combined repo+pr filter want 3, got %d", len(got)) + } +} + +func TestGroupFindings_OnlyUngraded(t *testing.T) { + got := groupFindings(sampleRows(), "", nil, true) + ids := map[string]bool{} + for _, f := range got { + ids[f.FindingID] = true + if f.Graded { + t.Fatalf("only_ungraded returned a graded finding: %s", f.FindingID) + } + } + if !ids["f3"] || !ids["f4"] || ids["f1"] || ids["f2"] { + t.Fatalf("only_ungraded set wrong: %v", ids) + } +} + +func TestListFindings_EmptyModelsIsArray(t *testing.T) { + // A row with no model should still produce models:[] (not null) in JSON. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _ = json.NewEncoder(w).Encode([]exportRow{{FindingID: "x", Repo: "r", PR: 1, Lens: "l", Title: "t"}}) + })) + defer srv.Close() + + out, err := listFindings(context.Background(), newClient(srv.URL, ""), "", nil, false) + if err != nil { + t.Fatal(err) + } + var parsed []findingOut + if err := json.Unmarshal([]byte(out), &parsed); err != nil { + t.Fatalf("output not valid JSON: %v\n%s", err, out) + } + if len(parsed) != 1 || parsed[0].Models == nil { + t.Fatalf("expected models:[] non-nil, got %+v", parsed) + } +} + +func TestRecordGrade_PathBodyAndAuth(t *testing.T) { + var gotPath, gotAuth string + var gotBody gradeReq + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + gotAuth = r.Header.Get("Authorization") + b, _ := io.ReadAll(r.Body) + _ = json.Unmarshal(b, &gotBody) + _ = json.NewEncoder(w).Encode(map[string]string{"finding_id": "abc123"}) + })) + defer srv.Close() + + c := newClient(srv.URL, "sekret") + msg, err := recordGrade(context.Background(), c, "abc123", gradeReq{IsReal: true, Severity: "high", Usefulness: intp(4)}) + if err != nil { + t.Fatal(err) + } + if gotPath != "/findings/abc123/grade" { + t.Fatalf("wrong path: %s", gotPath) + } + if gotAuth != "Bearer sekret" { + t.Fatalf("auth header not sent: %q", gotAuth) + } + if gotBody.Grader != "claude" { + t.Fatalf("grader should be forced to claude, got %q", gotBody.Grader) + } + if !gotBody.IsReal || gotBody.Severity != "high" || gotBody.Usefulness == nil || *gotBody.Usefulness != 4 { + t.Fatalf("body not forwarded correctly: %+v", gotBody) + } + if msg == "" { + t.Fatal("expected a confirmation message") + } +} + +func TestRecordGrade_StoreErrorSurfaced(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]string{"error": "unknown finding_id"}) + })) + defer srv.Close() + + _, err := recordGrade(context.Background(), newClient(srv.URL, ""), "nope", gradeReq{IsReal: false}) + if err == nil { + t.Fatal("expected non-2xx to surface as an error") + } +} + +func TestScoreboard_FilterByModel(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _ = json.NewEncoder(w).Encode([]modelStat{ + {Model: "gpt-4o", Provider: "openai", Runs: 3, Findings: 10}, + {Model: "qwen2.5-coder:7b", Provider: "ollama", Runs: 5, Findings: 4}, + }) + })) + defer srv.Close() + + out, err := scoreboard(context.Background(), newClient(srv.URL, ""), "gpt-4o") + if err != nil { + t.Fatal(err) + } + var parsed []modelStat + if err := json.Unmarshal([]byte(out), &parsed); err != nil { + t.Fatal(err) + } + if len(parsed) != 1 || parsed[0].Model != "gpt-4o" { + t.Fatalf("model filter failed: %+v", parsed) + } +}