ea9475da54
examples/reviewer proves the core is sufficient for a static-binary light host (gadfly's shape) with NO batteries: - config.Env + model.Configure -> env-driven model fleet + tier overrides - model.ParseModelForContext -> tier resolution + failover - fanout.Run (PerKey caps) -> N models x M lenses swarm, per-provider bound - model.GenerateWith[T] -> structured findings per (model, lens) cell - Consolidate -> one verdict-led report section per model Hermetic test runs the full 2x3 swarm against majordomo's fake provider and asserts the consolidated verdicts. A go list -deps CI check asserts the canary imports ZERO batteries (the light-tier invariant) — gadfly's go.sum stays free of gorm/redis/discordgo/sqlite. README + docs updated. This is the canary; migrating the LIVE gadfly repo onto executus core is a follow-up (kept separate to not destabilize the active reviewer). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
192 lines
5.9 KiB
Go
192 lines
5.9 KiB
Go
// Command reviewer is executus's light-tier CANARY: a gadfly-shaped adversarial
|
||
// PR reviewer built on the executus CORE ONLY — no batteries, no DB, no host.
|
||
// It proves the core is sufficient for a static-binary host like gadfly:
|
||
//
|
||
// - config.Env → env-driven model fleet + concurrency (GADFLY_*-style)
|
||
// - model.Configure/... → tier resolution + failover over majordomo
|
||
// - fanout.Run → the N-models × M-lenses swarm, with per-provider caps
|
||
// - model.GenerateWith[T] → structured findings per (model, lens)
|
||
// - consolidation → one report section per model, worst-verdict-led
|
||
//
|
||
// The whole thing imports only executus core packages, so a binary built from it
|
||
// keeps a go.sum free of gorm/redis/discordgo/sqlite — the light-tier invariant.
|
||
//
|
||
// See reviewer_test.go for the hermetic swarm test (majordomo's fake provider).
|
||
package main
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"sort"
|
||
"strings"
|
||
|
||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||
|
||
"gitea.stevedudenhoeffer.com/steve/executus/fanout"
|
||
"gitea.stevedudenhoeffer.com/steve/executus/model"
|
||
)
|
||
|
||
// Severity orders findings; the rank drives a model's worst-verdict header.
|
||
type Severity string
|
||
|
||
const (
|
||
SevTrivial Severity = "trivial"
|
||
SevSmall Severity = "small"
|
||
SevMedium Severity = "medium"
|
||
SevHigh Severity = "high"
|
||
SevCritical Severity = "critical"
|
||
)
|
||
|
||
func severityRank(s Severity) int {
|
||
switch s {
|
||
case SevCritical:
|
||
return 4
|
||
case SevHigh:
|
||
return 3
|
||
case SevMedium:
|
||
return 2
|
||
case SevSmall:
|
||
return 1
|
||
default:
|
||
return 0
|
||
}
|
||
}
|
||
|
||
// Finding is one issue a lens reports. It is the structured-output schema the
|
||
// model must satisfy (majordomo derives the JSON schema from this struct).
|
||
type Finding struct {
|
||
Severity Severity `json:"severity" jsonschema:"enum=trivial,enum=small,enum=medium,enum=high,enum=critical"`
|
||
Title string `json:"title"`
|
||
Detail string `json:"detail"`
|
||
}
|
||
|
||
// lensReport is the per-(model,lens) structured response.
|
||
type lensReport struct {
|
||
Findings []Finding `json:"findings"`
|
||
}
|
||
|
||
// Lens is one review dimension (security / correctness / …).
|
||
type Lens struct {
|
||
Name string
|
||
Focus string // appended to the base system prompt
|
||
}
|
||
|
||
// NamedModel is a resolved model plus the label + provider used for fan-out
|
||
// keying (per-provider concurrency) and reporting.
|
||
type NamedModel struct {
|
||
Name string // display label (the tier/spec the host configured)
|
||
Provider string // fan-out key for PerKey concurrency (e.g. "ollama-cloud")
|
||
Model llm.Model
|
||
}
|
||
|
||
// LensResult is one swarm cell's outcome.
|
||
type LensResult struct {
|
||
Model string
|
||
Lens string
|
||
Findings []Finding
|
||
Err error
|
||
}
|
||
|
||
const baseSystemPrompt = "You are an adversarial code reviewer. Review the diff for real, verifiable problems only — no style nits. Return ONLY JSON matching the schema. Report nothing if you find nothing."
|
||
|
||
// Review runs every (model × lens) cell of the swarm concurrently, bounded by
|
||
// opts (total + per-provider caps), and returns one LensResult per cell. A cell
|
||
// whose model call fails carries the error in LensResult.Err — one bad cell
|
||
// never aborts the swarm (fanout captures per-item errors).
|
||
func Review(ctx context.Context, models []NamedModel, lenses []Lens, diff string, opts fanout.Options[cell]) []LensResult {
|
||
cells := make([]cell, 0, len(models)*len(lenses))
|
||
for _, m := range models {
|
||
for _, l := range lenses {
|
||
cells = append(cells, cell{model: m, lens: l})
|
||
}
|
||
}
|
||
// Key each cell by its provider so PerKey throttles per backend (the
|
||
// GADFLY_PROVIDER_CONCURRENCY analogue).
|
||
if opts.Key == nil {
|
||
opts.Key = func(c cell) string { return c.model.Provider }
|
||
}
|
||
results := fanout.Run(ctx, cells, opts, func(ctx context.Context, c cell) (LensResult, error) {
|
||
sys := baseSystemPrompt
|
||
if c.lens.Focus != "" {
|
||
sys += "\n\nLens — " + c.lens.Name + ": " + c.lens.Focus
|
||
}
|
||
msgs := []llm.Message{{Role: llm.RoleUser, Parts: []llm.Part{llm.Text("Diff under review:\n" + diff)}}}
|
||
rep, err := model.GenerateWith[lensReport](ctx, c.model.Model, sys, msgs)
|
||
lr := LensResult{Model: c.model.Name, Lens: c.lens.Name, Findings: rep.Findings, Err: err}
|
||
// Return the value either way (err embedded) so every cell reports.
|
||
return lr, nil
|
||
})
|
||
out := make([]LensResult, 0, len(results))
|
||
for _, r := range results {
|
||
if r.Err != nil { // a swarm-level error (ctx cancel) with no value
|
||
out = append(out, LensResult{Err: r.Err})
|
||
continue
|
||
}
|
||
out = append(out, r.Value)
|
||
}
|
||
return out
|
||
}
|
||
|
||
// cell is one (model, lens) swarm task.
|
||
type cell struct {
|
||
model NamedModel
|
||
lens Lens
|
||
}
|
||
|
||
// Consolidate renders the swarm's results into one report: a section per model,
|
||
// each led by that model's worst finding severity, mirroring gadfly's
|
||
// one-comment-per-model output.
|
||
func Consolidate(results []LensResult) string {
|
||
byModel := map[string][]LensResult{}
|
||
var order []string
|
||
for _, r := range results {
|
||
if r.Model == "" {
|
||
continue
|
||
}
|
||
if _, ok := byModel[r.Model]; !ok {
|
||
order = append(order, r.Model)
|
||
}
|
||
byModel[r.Model] = append(byModel[r.Model], r)
|
||
}
|
||
sort.Strings(order)
|
||
|
||
var b strings.Builder
|
||
for _, m := range order {
|
||
rs := byModel[m]
|
||
var all []Finding
|
||
worst := -1
|
||
errored := 0
|
||
for _, r := range rs {
|
||
if r.Err != nil {
|
||
errored++
|
||
continue
|
||
}
|
||
all = append(all, r.Findings...)
|
||
for _, f := range r.Findings {
|
||
if severityRank(f.Severity) > worst {
|
||
worst = severityRank(f.Severity)
|
||
}
|
||
}
|
||
}
|
||
verdict := "no issues found"
|
||
if worst >= severityRank(SevHigh) {
|
||
verdict = "blocking issues found"
|
||
} else if worst >= 0 {
|
||
verdict = "minor issues"
|
||
}
|
||
fmt.Fprintf(&b, "## %s — %s", m, verdict)
|
||
if errored > 0 {
|
||
fmt.Fprintf(&b, " (⚠ %d lens(es) errored)", errored)
|
||
}
|
||
b.WriteString("\n")
|
||
sort.SliceStable(all, func(i, j int) bool {
|
||
return severityRank(all[i].Severity) > severityRank(all[j].Severity)
|
||
})
|
||
for _, f := range all {
|
||
fmt.Fprintf(&b, "- [%s] %s — %s\n", f.Severity, f.Title, f.Detail)
|
||
}
|
||
b.WriteString("\n")
|
||
}
|
||
return b.String()
|
||
}
|