executus/examples/reviewer/reviewer.go

// Command reviewer is executus's light-tier CANARY: a gadfly-shaped adversarial
// PR reviewer built on the executus CORE ONLY — no batteries, no DB, no host.
// It proves the core is sufficient for a static-binary host like gadfly:
//
//   - config.Env            → env-driven model fleet + concurrency (GADFLY_*-style)
//   - model.Configure/...   → tier resolution + failover over majordomo
//   - fanout.Run            → the N-models × M-lenses swarm, with per-provider caps
//   - model.GenerateWith[T] → structured findings per (model, lens)
//   - consolidation         → one report section per model, worst-verdict-led
//
// The whole thing imports only executus core packages, so a binary built from it
// keeps a go.sum free of gorm/redis/discordgo/sqlite — the light-tier invariant.
//
// See reviewer_test.go for the hermetic swarm test (majordomo's fake provider).
package main

import (
	"context"
	"fmt"
	"sort"
	"strings"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"

	"gitea.stevedudenhoeffer.com/steve/executus/fanout"
	"gitea.stevedudenhoeffer.com/steve/executus/model"
)

// Severity orders findings; the rank drives a model's worst-verdict header.
type Severity string

const (
	SevTrivial  Severity = "trivial"
	SevSmall    Severity = "small"
	SevMedium   Severity = "medium"
	SevHigh     Severity = "high"
	SevCritical Severity = "critical"
)

func severityRank(s Severity) int {
	switch s {
	case SevCritical:
		return 4
	case SevHigh:
		return 3
	case SevMedium:
		return 2
	case SevSmall:
		return 1
	default:
		return 0
	}
}

// Finding is one issue a lens reports. It is the structured-output schema the
// model must satisfy (majordomo derives the JSON schema from this struct).
type Finding struct {
	Severity Severity `json:"severity" jsonschema:"enum=trivial,enum=small,enum=medium,enum=high,enum=critical"`
	Title    string   `json:"title"`
	Detail   string   `json:"detail"`
}

// lensReport is the per-(model,lens) structured response.
type lensReport struct {
	Findings []Finding `json:"findings"`
}

// Lens is one review dimension (security / correctness / …).
type Lens struct {
	Name  string
	Focus string // appended to the base system prompt
}

// NamedModel is a resolved model plus the label + provider used for fan-out
// keying (per-provider concurrency) and reporting.
type NamedModel struct {
	Name     string // display label (the tier/spec the host configured)
	Provider string // fan-out key for PerKey concurrency (e.g. "ollama-cloud")
	Model    llm.Model
}

// LensResult is one swarm cell's outcome.
type LensResult struct {
	Model    string
	Lens     string
	Findings []Finding
	Err      error
}

const baseSystemPrompt = "You are an adversarial code reviewer. Review the diff for real, verifiable problems only — no style nits. Return ONLY JSON matching the schema. Report nothing if you find nothing."

// Review runs every (model × lens) cell of the swarm concurrently, bounded by
// opts (total + per-provider caps), and returns one LensResult per cell. A cell
// whose model call fails carries the error in LensResult.Err — one bad cell
// never aborts the swarm (fanout captures per-item errors).
func Review(ctx context.Context, models []NamedModel, lenses []Lens, diff string, opts fanout.Options[cell]) []LensResult {
	cells := make([]cell, 0, len(models)*len(lenses))
	for _, m := range models {
		for _, l := range lenses {
			cells = append(cells, cell{model: m, lens: l})
		}
	}
	// Key each cell by its provider so PerKey throttles per backend (the
	// GADFLY_PROVIDER_CONCURRENCY analogue).
	if opts.Key == nil {
		opts.Key = func(c cell) string { return c.model.Provider }
	}
	results := fanout.Run(ctx, cells, opts, func(ctx context.Context, c cell) (LensResult, error) {
		sys := baseSystemPrompt
		if c.lens.Focus != "" {
			sys += "\n\nLens — " + c.lens.Name + ": " + c.lens.Focus
		}
		msgs := []llm.Message{{Role: llm.RoleUser, Parts: []llm.Part{llm.Text("Diff under review:\n" + diff)}}}
		rep, err := model.GenerateWith[lensReport](ctx, c.model.Model, sys, msgs)
		lr := LensResult{Model: c.model.Name, Lens: c.lens.Name, Findings: rep.Findings, Err: err}
		// Return the value either way (err embedded) so every cell reports.
		return lr, nil
	})
	out := make([]LensResult, 0, len(results))
	for _, r := range results {
		if r.Err != nil { // a swarm-level error (ctx cancel) with no value
			out = append(out, LensResult{Err: r.Err})
			continue
		}
		out = append(out, r.Value)
	}
	return out
}

// cell is one (model, lens) swarm task.
type cell struct {
	model NamedModel
	lens  Lens
}

// Consolidate renders the swarm's results into one report: a section per model,
// each led by that model's worst finding severity, mirroring gadfly's
// one-comment-per-model output.
func Consolidate(results []LensResult) string {
	byModel := map[string][]LensResult{}
	var order []string
	for _, r := range results {
		if r.Model == "" {
			continue
		}
		if _, ok := byModel[r.Model]; !ok {
			order = append(order, r.Model)
		}
		byModel[r.Model] = append(byModel[r.Model], r)
	}
	sort.Strings(order)

	var b strings.Builder
	for _, m := range order {
		rs := byModel[m]
		var all []Finding
		worst := -1
		errored := 0
		for _, r := range rs {
			if r.Err != nil {
				errored++
				continue
			}
			all = append(all, r.Findings...)
			for _, f := range r.Findings {
				if severityRank(f.Severity) > worst {
					worst = severityRank(f.Severity)
				}
			}
		}
		verdict := "no issues found"
		if worst >= severityRank(SevHigh) {
			verdict = "blocking issues found"
		} else if worst >= 0 {
			verdict = "minor issues"
		}
		fmt.Fprintf(&b, "## %s — %s", m, verdict)
		if errored > 0 {
			fmt.Fprintf(&b, " (⚠ %d lens(es) errored)", errored)
		}
		b.WriteString("\n")
		sort.SliceStable(all, func(i, j int) bool {
			return severityRank(all[i].Severity) > severityRank(all[j].Severity)
		})
		for _, f := range all {
			fmt.Fprintf(&b, "- [%s] %s — %s\n", f.Severity, f.Title, f.Detail)
		}
		b.WriteString("\n")
	}
	return b.String()
}