From 4b8f9aa39bede61fa31d9e01c9534fc88608dae9 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Thu, 25 Jun 2026 19:35:59 -0400 Subject: [PATCH] feat: dynamic `auto` specialist selection + worker-tier delegation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Phase-2 swarm upgrades: - auto.go: GADFLY_SPECIALISTS=auto routes the review — a selector model (GADFLY_SELECTOR_MODEL, else the review model) reads the changed files + PR description and picks the smallest relevant lens set from the catalog, and may propose ad-hoc lenses for gaps (e.g. migrations). Structured output via majordomo.Generate[T]; capped + de-duped; falls back to the default suite. - delegate.go: GADFLY_WORKER_MODEL adds a delegate_investigation tool so the reviewer offloads mechanical legwork (trace callers, gather usages) to a cheap worker sub-agent that returns an evidence-cited digest — the top model reasons over summaries, not raw file dumps. Workers get an fs-only toolbox (no sub-delegation). Unset = off. resolveSpecialists now also returns the registry + an auto flag. Docs (README Specialists + config table, CLAUDE.md, main.go header) + tests updated. Co-Authored-By: Claude Opus 4.8 (1M context) --- CLAUDE.md | 9 +- README.md | 20 ++++- cmd/gadfly/auto.go | 148 +++++++++++++++++++++++++++++++++ cmd/gadfly/delegate.go | 65 +++++++++++++++ cmd/gadfly/main.go | 36 +++++++- cmd/gadfly/model.go | 15 ++++ cmd/gadfly/specialists.go | 42 ++++++++-- cmd/gadfly/specialists_test.go | 27 ++++-- cmd/gadfly/tools.go | 34 ++++++-- 9 files changed, 370 insertions(+), 26 deletions(-) create mode 100644 cmd/gadfly/auto.go create mode 100644 cmd/gadfly/delegate.go diff --git a/CLAUDE.md b/CLAUDE.md index 25cfa70..29b4160 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,8 +31,10 @@ verifies each one against the actual code, and posts its findings as a comment. cmd/gadfly/ the reviewer binary — pure producer of review markdown (stdout) main.go orchestration: loop specialists, each a review pass + adversarial recheck specialists.go specialist lenses: built-ins, default suite, env + .gadfly.yml resolution + auto.go dynamic `auto` selection: a selector model picks lenses per-diff (may invent) + delegate.go worker-tier delegate_investigation tool (cheap sub-agent does legwork) consolidate.go verdict parsing + one-comment consolidation (a section per specialist) - model.go provider/model resolution (majordomo.Parse) + env endpoint aliases + model.go provider/model + selector + worker resolution (majordomo.Parse) + endpoint aliases tools.go the 5 read-only repo tools (read_file/list_dir/grep/find_files/get_diff) recheck.go second-pass verification prompt + verdict recompute *_test.go sandbox, recheck, wrap-up, spec/endpoint-parse, specialist-resolution tests @@ -92,7 +94,10 @@ error-handling; opt-in built-ins = tests/docs/conventions/improvements. Select v `GADFLY_SPECIALISTS` (csv or `all`); define/override via `GADFLY_SPECIALIST_` env or a repo `.gadfly.yml` (`specialists:` + `define:`). See `cmd/gadfly/specialists.go`. Cost ≈ specialists × models × 2 passes — keep the default model count low (entrypoint defaults to one). -Dynamic `auto` selection (a cheap model picks lenses per-diff) is the planned next step. +**Dynamic `auto`** (`GADFLY_SPECIALISTS=auto`): a selector (`GADFLY_SELECTOR_MODEL` or the review +model) picks lenses per-diff and may invent ad-hoc ones (`cmd/gadfly/auto.go`). **Worker-tier** +(`GADFLY_WORKER_MODEL`): a `delegate_investigation` tool offloads grep/read legwork to a cheap +sub-agent (`cmd/gadfly/delegate.go`). **Tested vs untested:** only the Ollama paths (local + OpenAI-compatible pointed at Ollama) are actually exercised. OpenAI/Anthropic/Google come from majordomo's abstraction and are diff --git a/README.md b/README.md index 226bacf..6bccdfb 100644 --- a/README.md +++ b/README.md @@ -138,9 +138,21 @@ define: focus: "Review schema migrations for destructive ops, missing indexes, table locks." ``` +**Dynamic selection (`auto`):** set `GADFLY_SPECIALISTS: auto` and a selector model reads the +changed files + PR description and picks only the lenses that materially apply (and may invent +an ad-hoc one — e.g. a "migrations" lens for a schema change). The selector is +`GADFLY_SELECTOR_MODEL` if set (a cheap tier is ideal), else the review model. Capped and +de-duplicated; falls back to the default suite if selection fails. + +**Worker-tier delegation:** set `GADFLY_WORKER_MODEL` (a cheap/fast model) to give every +reviewer a `delegate_investigation` tool — it offloads mechanical legwork (trace all callers, +gather every usage, check a pattern across files) to a worker sub-agent that returns a concise, +evidence-cited digest, so the expensive model reasons over summaries instead of raw file dumps. +Unset = no delegation (current behavior). + > **Cost:** each specialist is its own review+recheck, so cost ≈ *specialists × models × 2*. -> The default suite runs on a **single** model. Trim with `GADFLY_SPECIALISTS`, and a future -> `auto` mode will let a cheap model pick only the lenses a given diff actually needs. +> The default suite runs on a **single** model. Trim with `GADFLY_SPECIALISTS`, let `auto` pick +> only what a diff needs, and point heavy legwork at a cheap `GADFLY_WORKER_MODEL`. ### Triggers @@ -178,6 +190,10 @@ The reviewer binary reads these (the stub/entrypoint set sane defaults): | `GADFLY_PROVIDER` | `ollama-cloud` | provider prefix for a bare model id | | `GADFLY_BASE_URL` | — | override endpoint (OpenAI/Ollama-compatible servers) | | `GADFLY_API_KEY` | — | provider key; falls back to the provider's standard env | +| `GADFLY_SPECIALISTS` | default suite | csv of lenses, `all`, or `auto` (dynamic selection) | +| `GADFLY_SELECTOR_MODEL` | review model | model that picks lenses in `auto` mode | +| `GADFLY_WORKER_MODEL` | — | cheap model for `delegate_investigation`; unset = no delegation | +| `GADFLY_WORKER_MAX_STEPS` | 8 | tool-step cap for a delegated worker run | | `GADFLY_MAX_STEPS` | 24 | review-pass tool-step cap | | `GADFLY_RECHECK` | on | set `0`/`false` to skip the recheck pass | | `GADFLY_RECHECK_MAX_STEPS` | 16 | recheck-pass step cap | diff --git a/cmd/gadfly/auto.go b/cmd/gadfly/auto.go new file mode 100644 index 0000000..019f36b --- /dev/null +++ b/cmd/gadfly/auto.go @@ -0,0 +1,148 @@ +package main + +import ( + "context" + "fmt" + "os" + "sort" + "strings" + + "gitea.stevedudenhoeffer.com/steve/majordomo" + llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// maxAutoSpecialists bounds how many lenses dynamic selection may run, so a +// runaway selector can't blow up cost. +const maxAutoSpecialists = 6 + +// autoCustom is an ad-hoc specialist the selector may propose when the change +// needs a lens the catalog lacks (e.g. DB migrations, i18n). +type autoCustom struct { + Name string `json:"name" description:"short lowercase id, e.g. migrations"` + Title string `json:"title" description:"human label for the section, e.g. 🗃️ DB migrations"` + Focus string `json:"focus" description:"one or two sentences telling the reviewer what to look for"` +} + +// autoSelection is the structured result the selector returns. +type autoSelection struct { + Specialists []string `json:"specialists" description:"names of EXISTING catalog specialists that materially apply to this change"` + Custom []autoCustom `json:"custom" description:"ad-hoc specialists to add ONLY when the catalog lacks a clearly-needed lens; usually empty"` +} + +// resolveSelectorModel returns the model that picks specialists in auto mode: +// GADFLY_SELECTOR_MODEL (a cheap tier is ideal) if set, else the review model. +func resolveSelectorModel(fallback llm.Model) (llm.Model, error) { + spec := strings.TrimSpace(os.Getenv("GADFLY_SELECTOR_MODEL")) + if spec == "" { + return fallback, nil + } + provider := strings.TrimSpace(os.Getenv("GADFLY_PROVIDER")) + if provider == "" { + provider = defaultProvider + } + return majordomo.Parse(buildSpec(provider, spec)) +} + +// autoSelectSpecialists asks the selector model which lenses a given diff needs. +// It picks from the registry and may invent a few ad-hoc lenses, capped and +// de-duplicated. On any failure it returns an error and the caller falls back +// to the default suite. +func autoSelectSpecialists(ctx context.Context, selector llm.Model, title, body, diff string, registry map[string]Specialist) ([]Specialist, error) { + system := "You are the router for an adversarial code review. Given a pull request's " + + "changed files and description plus a catalog of available review specialists (lenses), " + + "choose the SMALLEST set that materially applies to THIS change — usually 2 to 4. Skip " + + "lenses the diff doesn't touch (e.g. no 'performance' for a docs-only change). Strongly " + + "prefer existing catalog lenses; only propose a custom one when the change clearly needs a " + + "lens the catalog lacks (e.g. database migrations, i18n, build/CI). Be conservative." + + user := fmt.Sprintf("PR title: %s\n\nPR description:\n%s\n\nChanged files:\n%s\n\nAvailable specialists (name — focus):\n%s\n\nSelect the lenses to run.", + strings.TrimSpace(title), strings.TrimSpace(body), changedFilesList(diff), catalog(registry)) + + sel, err := majordomo.Generate[autoSelection](ctx, selector, majordomo.Request{ + System: system, + Messages: []llm.Message{llm.UserText(user)}, + }) + if err != nil { + return nil, fmt.Errorf("auto-select: %w", err) + } + + var out []Specialist + seen := map[string]bool{} + add := func(sp Specialist) { + if sp.Name == "" || seen[sp.Name] || len(out) >= maxAutoSpecialists { + return + } + seen[sp.Name] = true + out = append(out, sp) + } + for _, name := range sel.Specialists { + if sp, ok := registry[strings.ToLower(strings.TrimSpace(name))]; ok { + add(sp) + } + } + for _, c := range sel.Custom { + name := strings.ToLower(strings.TrimSpace(c.Name)) + if name == "" || strings.TrimSpace(c.Focus) == "" { + continue + } + if sp, ok := registry[name]; ok { // prefer the catalog definition if it exists + add(sp) + continue + } + add(Specialist{Name: name, Title: titleOr(c.Title, name), Focus: c.Focus}) + } + + if len(out) == 0 { + return nil, fmt.Errorf("auto-select returned no usable specialists") + } + return out, nil +} + +// catalog renders the registry as a compact name — focus list for the selector. +func catalog(registry map[string]Specialist) string { + names := make([]string, 0, len(registry)) + for k := range registry { + names = append(names, k) + } + sort.Strings(names) + var b strings.Builder + for _, n := range names { + fmt.Fprintf(&b, "- %s — %s\n", n, firstSentence(registry[n].Focus)) + } + return b.String() +} + +// changedFilesList extracts the changed file paths from a unified diff. +func changedFilesList(diff string) string { + var files []string + seen := map[string]bool{} + for _, line := range strings.Split(diff, "\n") { + if !strings.HasPrefix(line, "+++ ") && !strings.HasPrefix(line, "--- ") { + continue + } + p := strings.TrimSpace(line[4:]) + if p == "/dev/null" { + continue + } + p = strings.TrimPrefix(strings.TrimPrefix(p, "a/"), "b/") + if p != "" && !seen[p] { + seen[p] = true + files = append(files, p) + } + } + if len(files) == 0 { + return "(could not parse file list; review the whole diff)" + } + return "- " + strings.Join(files, "\n- ") +} + +func firstSentence(s string) string { + s = strings.TrimSpace(s) + if i := strings.IndexByte(s, '.'); i > 0 && i < 140 { + return s[:i] + } + if len(s) > 140 { + return s[:140] + "…" + } + return s +} diff --git a/cmd/gadfly/delegate.go b/cmd/gadfly/delegate.go new file mode 100644 index 0000000..63e7900 --- /dev/null +++ b/cmd/gadfly/delegate.go @@ -0,0 +1,65 @@ +package main + +import ( + "context" + "fmt" + "strings" + + "gitea.stevedudenhoeffer.com/steve/majordomo/agent" + llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// defaultWorkerMaxSteps bounds a delegated worker run. +const defaultWorkerMaxSteps = 8 + +// workerSystemPrompt drives a delegated investigation sub-agent. It does the +// grunt work (grep/read) and returns a tight, evidence-cited digest so the +// top reviewer reasons over a summary instead of raw file dumps. +const workerSystemPrompt = "You are a fast code investigator. Use the read-only tools " + + "(read_file, grep, find_files, list_dir, get_diff) to answer the SPECIFIC question you are " + + "given about this repository — nothing more. Be precise and concise: report concrete findings " + + "with `path:line` evidence, quote only the few lines that matter, and state plainly when " + + "something is absent. Do NOT speculate or pad. When you have the answer, stop and report it." + +type delegateArgs struct { + Task string `json:"task" description:"A specific investigation to carry out by reading/grepping the repo, e.g. 'find every caller of parseAmount and list which ones don't check the error'. Ask for a concrete answer, not an opinion."` +} + +// delegateTool lets the reviewer offload mechanical investigation to a (cheaper) +// worker model. Present only when GADFLY_WORKER_MODEL is configured. +func (r *repoFS) delegateTool() llm.Tool { + return llm.DefineTool[delegateArgs]( + "delegate_investigation", + "Delegate focused legwork to a fast worker agent that reads/greps this repo and reports back "+ + "a concise, evidence-cited digest. Use it to offload mechanical investigation (tracing all "+ + "callers, gathering every usage of a symbol, checking a pattern across many files) so you can "+ + "reason over the summary instead of pulling raw files into your own context.", + func(ctx context.Context, args delegateArgs) (any, error) { + task := strings.TrimSpace(args.Task) + if task == "" { + return nil, fmt.Errorf("task is required") + } + box, err := r.workerToolbox() + if err != nil { + return nil, err + } + w := agent.New(r.worker, workerSystemPrompt, + agent.WithToolbox(box), + agent.WithMaxSteps(envInt("GADFLY_WORKER_MAX_STEPS", defaultWorkerMaxSteps)), + agent.WithToolErrorLimits(3, 3), + ) + res, runErr := w.Run(ctx, task) + out := "" + if res != nil { + out = strings.TrimSpace(res.Output) + } + if out == "" { + if runErr != nil { + return nil, fmt.Errorf("worker investigation failed: %w", runErr) + } + return "worker returned no findings", nil + } + return out, nil + }, + ) +} diff --git a/cmd/gadfly/main.go b/cmd/gadfly/main.go index 7adc073..e61fb34 100644 --- a/cmd/gadfly/main.go +++ b/cmd/gadfly/main.go @@ -30,6 +30,9 @@ // GADFLY_API_KEY provider key; optional — falls back to the provider's // standard env (OLLAMA_API_KEY / OPENAI_API_KEY / // ANTHROPIC_API_KEY / GOOGLE_API_KEY|GEMINI_API_KEY). +// GADFLY_SPECIALISTS csv of review lenses, "all", or "auto" (see specialists.go). +// GADFLY_SELECTOR_MODEL model that picks lenses in "auto" mode (default: review model). +// GADFLY_WORKER_MODEL cheap model for the delegate_investigation tool (unset = off). // GADFLY_REPO_DIR path to the checked-out repo (required; the FS sandbox root). // GADFLY_DIFF_FILE path to a file holding the full unified diff (required). // GADFLY_SYSTEM_FILE path to the reviewer system prompt (required). @@ -132,18 +135,43 @@ func run() error { return fmt.Errorf("resolve model: %w", err) } - specialists, serrs := resolveSpecialists(repoDir) + // Optional cheap worker for delegate_investigation. Non-fatal: a bad worker + // spec just disables delegation rather than sinking the review. + if worker, werr := resolveWorkerModel(); werr != nil { + fmt.Fprintln(os.Stderr, "gadfly: worker model disabled:", werr) + } else if worker != nil { + fsTools.worker = worker + } + + specialists, registry, auto, serrs := resolveSpecialists(repoDir) for _, e := range serrs { fmt.Fprintln(os.Stderr, "gadfly:", e) } - if len(specialists) == 0 { - return errors.New("no specialists resolved to run") - } timeout := time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() + // Dynamic selection: a (cheap) model picks the lenses this diff needs. + if auto { + selector, serr := resolveSelectorModel(mdl) + if serr != nil { + return fmt.Errorf("resolve selector model: %w", serr) + } + picked, aerr := autoSelectSpecialists(ctx, selector, os.Getenv("GADFLY_TITLE"), os.Getenv("GADFLY_BODY"), diff, registry) + if aerr != nil { + fmt.Fprintln(os.Stderr, "gadfly: auto-select failed; falling back to the default suite:", aerr) + specialists = suiteFromRegistry(registry, defaultSuite) + } else { + specialists = picked + fmt.Fprintln(os.Stderr, "gadfly: auto-selected specialists:", specialistNamesOf(specialists)) + } + } + + if len(specialists) == 0 { + return errors.New("no specialists resolved to run") + } + base := string(systemBytes) task := buildTask(diff) results := make([]specialistResult, 0, len(specialists)) diff --git a/cmd/gadfly/model.go b/cmd/gadfly/model.go index b3f9e41..097baf1 100644 --- a/cmd/gadfly/model.go +++ b/cmd/gadfly/model.go @@ -96,6 +96,21 @@ func resolveModel() (llm.Model, error) { } } +// resolveWorkerModel builds the optional worker model for delegate_investigation +// from GADFLY_WORKER_MODEL (a cheap tier is ideal). Returns (nil, nil) when +// unset — delegation is simply off. Honors GADFLY_PROVIDER for a bare id. +func resolveWorkerModel() (llm.Model, error) { + spec := strings.TrimSpace(os.Getenv("GADFLY_WORKER_MODEL")) + if spec == "" { + return nil, nil + } + provider := strings.TrimSpace(os.Getenv("GADFLY_PROVIDER")) + if provider == "" { + provider = defaultProvider + } + return majordomo.Parse(buildSpec(provider, spec)) +} + // buildSpec turns (provider, model) into a majordomo spec. A model id that // already carries a "provider/" prefix (or is a multi-element failover chain) // is passed through verbatim; a bare id is prefixed with the provider. diff --git a/cmd/gadfly/specialists.go b/cmd/gadfly/specialists.go index 9c8a002..f7b58e4 100644 --- a/cmd/gadfly/specialists.go +++ b/cmd/gadfly/specialists.go @@ -80,14 +80,17 @@ type fileConfig struct { // resolveSpecialists builds the active specialist list from (in increasing // precedence) the built-ins, a repo .gadfly.yml, and GADFLY_SPECIALIST_ -// env definitions. The active SELECTION is GADFLY_SPECIALISTS (csv or "all"), -// else the file's `specialists:`, else the default suite. Unknown names are -// reported as errors and skipped; valid ones still run. -func resolveSpecialists(repoDir string) ([]Specialist, []error) { - var errs []error +// env definitions. The active SELECTION is GADFLY_SPECIALISTS (csv, "all", or +// "auto"), else the file's `specialists:`, else the default suite. Unknown +// names are reported as errors and skipped; valid ones still run. +// +// It also returns the full registry (the catalog of every defined specialist) +// and an auto flag: when the selection is "auto", specs is nil and the caller +// runs the dynamic selector (see auto.go) against the registry instead. +func resolveSpecialists(repoDir string) (specs []Specialist, registry map[string]Specialist, auto bool, errs []error) { // 1. registry seeded with built-ins. - registry := make(map[string]Specialist, len(builtinSpecialists)) + registry = make(map[string]Specialist, len(builtinSpecialists)) for k, v := range builtinSpecialists { registry[k] = v } @@ -137,6 +140,10 @@ func resolveSpecialists(repoDir string) ([]Specialist, []error) { default: names = defaultSuite } + if len(names) == 1 && strings.EqualFold(names[0], "auto") { + // Dynamic selection: caller runs the selector against the registry. + return nil, registry, true, errs + } if len(names) == 1 && strings.EqualFold(names[0], "all") { names = sortedKeys(registry) } @@ -158,7 +165,28 @@ func resolveSpecialists(repoDir string) ([]Specialist, []error) { } out = append(out, sp) } - return out, errs + return out, registry, false, errs +} + +// suiteFromRegistry resolves names against the registry, skipping any missing. +// Used as the auto-mode fallback (the default suite always exists in builtins). +func suiteFromRegistry(registry map[string]Specialist, names []string) []Specialist { + var out []Specialist + for _, n := range names { + if sp, ok := registry[n]; ok { + out = append(out, sp) + } + } + return out +} + +// specialistNamesOf returns the names of a specialist slice (for logging). +func specialistNamesOf(specs []Specialist) []string { + names := make([]string, len(specs)) + for i, s := range specs { + names[i] = s.Name + } + return names } // composeSpecialistPrompt appends a specialist's lens to the base system prompt. diff --git a/cmd/gadfly/specialists_test.go b/cmd/gadfly/specialists_test.go index 1b73756..c720747 100644 --- a/cmd/gadfly/specialists_test.go +++ b/cmd/gadfly/specialists_test.go @@ -28,7 +28,7 @@ func eq(a, b []string) bool { func TestResolveSpecialists_DefaultSuite(t *testing.T) { t.Setenv("GADFLY_SPECIALISTS", "") - specs, errs := resolveSpecialists(t.TempDir()) + specs, _, _, errs := resolveSpecialists(t.TempDir()) if len(errs) != 0 { t.Fatalf("unexpected errors: %v", errs) } @@ -39,7 +39,7 @@ func TestResolveSpecialists_DefaultSuite(t *testing.T) { func TestResolveSpecialists_EnvSelection(t *testing.T) { t.Setenv("GADFLY_SPECIALISTS", "security, tests") - specs, errs := resolveSpecialists(t.TempDir()) + specs, _, _, errs := resolveSpecialists(t.TempDir()) if len(errs) != 0 { t.Fatalf("unexpected errors: %v", errs) } @@ -48,9 +48,26 @@ func TestResolveSpecialists_EnvSelection(t *testing.T) { } } +func TestResolveSpecialists_AutoFlag(t *testing.T) { + t.Setenv("GADFLY_SPECIALISTS", "auto") + specs, registry, auto, errs := resolveSpecialists(t.TempDir()) + if len(errs) != 0 { + t.Fatalf("unexpected errors: %v", errs) + } + if !auto { + t.Error("expected auto=true for GADFLY_SPECIALISTS=auto") + } + if specs != nil { + t.Errorf("auto mode should return nil specs, got %v", names(specs)) + } + if len(registry) == 0 { + t.Error("auto mode should still return the registry catalog") + } +} + func TestResolveSpecialists_UnknownNameErrors(t *testing.T) { t.Setenv("GADFLY_SPECIALISTS", "security,bogus") - specs, errs := resolveSpecialists(t.TempDir()) + specs, _, _, errs := resolveSpecialists(t.TempDir()) if len(errs) == 0 { t.Fatal("expected an error for unknown specialist") } @@ -62,7 +79,7 @@ func TestResolveSpecialists_UnknownNameErrors(t *testing.T) { func TestResolveSpecialists_EnvCustomDefinition(t *testing.T) { t.Setenv("GADFLY_SPECIALIST_MIGRATIONS", "Review DB migrations for destructive ops.") t.Setenv("GADFLY_SPECIALISTS", "migrations") - specs, errs := resolveSpecialists(t.TempDir()) + specs, _, _, errs := resolveSpecialists(t.TempDir()) if len(errs) != 0 { t.Fatalf("unexpected errors: %v", errs) } @@ -83,7 +100,7 @@ define: t.Fatal(err) } t.Setenv("GADFLY_SPECIALISTS", "") // let the file drive selection - specs, errs := resolveSpecialists(dir) + specs, _, _, errs := resolveSpecialists(dir) if len(errs) != 0 { t.Fatalf("unexpected errors: %v", errs) } diff --git a/cmd/gadfly/tools.go b/cmd/gadfly/tools.go index 8d4e3ea..6a36868 100644 --- a/cmd/gadfly/tools.go +++ b/cmd/gadfly/tools.go @@ -37,8 +37,9 @@ var skipDirs = map[string]bool{ // escapes (symlink or `..` traversal), so a hostile diff can never make the // reviewer read outside the checkout. type repoFS struct { - root string // absolute, symlink-resolved repo root - diff string // the full PR unified diff (served by get_diff) + root string // absolute, symlink-resolved repo root + diff string // the full PR unified diff (served by get_diff) + worker llm.Model // optional cheap model for delegate_investigation; nil = no delegation } // newRepoFS resolves root to an absolute, symlink-free path. @@ -96,16 +97,25 @@ func (r *repoFS) contains(abs string) error { return nil } -// toolbox builds the read-only review toolbox over this sandbox. -func (r *repoFS) toolbox() (*llm.Toolbox, error) { - box := llm.NewToolbox("gadfly") - tools := []llm.Tool{ +// fsTools is the set of read-only repository tools. +func (r *repoFS) fsTools() []llm.Tool { + return []llm.Tool{ r.readFileTool(), r.listDirTool(), r.grepTool(), r.findFilesTool(), r.getDiffTool(), } +} + +// toolbox builds the reviewer's toolbox: the read-only repo tools, plus the +// delegate_investigation tool when a worker model is configured. +func (r *repoFS) toolbox() (*llm.Toolbox, error) { + box := llm.NewToolbox("gadfly") + tools := r.fsTools() + if r.worker != nil { + tools = append(tools, r.delegateTool()) + } for _, t := range tools { if err := box.Add(t); err != nil { return nil, fmt.Errorf("add tool %q: %w", t.Name, err) @@ -114,6 +124,18 @@ func (r *repoFS) toolbox() (*llm.Toolbox, error) { return box, nil } +// workerToolbox is the toolbox handed to a delegated worker sub-agent: the +// read-only repo tools only (no delegate tool — workers don't sub-delegate). +func (r *repoFS) workerToolbox() (*llm.Toolbox, error) { + box := llm.NewToolbox("gadfly-worker") + for _, t := range r.fsTools() { + if err := box.Add(t); err != nil { + return nil, fmt.Errorf("add tool %q: %w", t.Name, err) + } + } + return box, nil +} + type readFileArgs struct { Path string `json:"path" description:"Repository-relative path of the file to read, e.g. pkg/logic/agentexec/pipeline.go"` StartLine int `json:"start_line,omitempty" description:"Optional 1-based line to start from (default 1)."`