feat: re-platform agentic review onto executus + large-PR cost controls #20

Merged
steve merged 2 commits from feat/executus-replatform into main 2026-06-30 15:41:03 +00:00
8 changed files with 176 additions and 71 deletions
Showing only changes of commit f5ca813af3 - Show all commits
+23 -16
View File
1
@@ -56,6 +56,11 @@ const (
// compactor defaults; surfaced as gadfly env knobs for tuning.
defaultCompactKeepRecent = 8
defaultCompactSummaryWords = 200
// contextTokenLookupTimeout bounds the one-shot /api/show call that resolves a
// cloud model's context window at executor-build time. Kept short so a slow or
// unreachable endpoint adds at most this to startup before degrading to
// no-compaction (rather than the provider cache's default 15s).
contextTokenLookupTimeout = 5 * time.Second
)
// runSeq mints a unique-per-process RunID suffix for each executor run so audit
1
@@ -79,10 +84,7 @@ func (w wrappedTool) BuildLLM(_ exectool.Invocation) llm.Tool { return w.t }
// returns it along with the tool names for RunnableAgent.LowLevelTools.
func gadflyToolRegistry(fs *repoFS) (exectool.Registry, []string, error) {
reg := exectool.NewRegistry()
tools := fs.fsTools()
if fs.worker != nil {
tools = append(tools, fs.delegateTool())
}
tools := fs.allTools()
names := make([]string, 0, len(tools))
for _, t := range tools {
if err := reg.Register(wrappedTool{t: t}); err != nil {
@@ -100,6 +102,11 @@ func gadflyToolRegistry(fs *repoFS) (exectool.Registry, []string, error) {
// the engine calls addUsage after each pass with run.Result.Usage. A nil
// *gadflyBudget is never installed — caps of 0 mean "unlimited", so the port is
// only wired when at least one cap is set.
//
// The guard is PASS-granular: Check runs before each pass, so it stops the NEXT
// pass once the budget is spent but cannot abort a single runaway pass mid-flight.
// The swarm-wide GADFLY_PR_BUDGET_SECS wall-clock backstop (entrypoint.sh) is what
// bounds a mid-pass runaway.
type gadflyBudget struct {
mu sync.Mutex
maxTokens int64
1
@@ -254,8 +261,10 @@ func newReviewExecutor(fs *repoFS, mdl, summarizer llm.Model, modelSpec string,
Compactor: compactor,
ContextTokens: ctxTokens,
Defaults: exrun.Defaults{
MaxIterations: envInt("GADFLY_MAX_STEPS", defaultMaxSteps),
MaxRuntime: time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second,
// MaxIterations/MaxRuntime are intentionally omitted: every pass sets its
// own per-run cap on the RunnableAgent below (the review and recheck caps
// differ), so a Defaults value here would always be overridden — dead. This
// leaves only the cross-pass guards + the compaction ratio.
MaxConsecutiveToolErrors: 4,
MaxSameToolCallRepeats: 4,
CompactionThresholdRatio: compactRatio(),
1
@@ -282,7 +291,7 @@ func (r *reviewExecutor) run(ctx context.Context, system, task string, maxSteps
SystemPrompt: system,
ModelTier: r.modelSpec,
MaxIterations: maxSteps,
MaxRuntime: time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second,
MaxRuntime: reviewTimeout(),
LowLevelTools: r.toolNames,
Critic: exrun.CriticConfig{Enabled: true},
}, exectool.Invocation{
1
@@ -316,14 +325,7 @@ func prCallerID() string {
// compactionEnabled reports whether context compaction should be wired. On
// unless GADFLY_COMPACT is explicitly falsey.
func compactionEnabled() bool {
switch strings.ToLower(strings.TrimSpace(os.Getenv("GADFLY_COMPACT"))) {
case "0", "false", "no", "off":
return false
default:
return true
}
}
func compactionEnabled() bool { return envBool("GADFLY_COMPACT", true) }
// compactRatio is the compaction threshold as a fraction of the model context
// window (GADFLY_COMPACT_RATIO), clamped to (0,1]; default defaultCompactRatio.
@@ -355,10 +357,15 @@ func resolveContextTokens(modelSpec string) int {
key = strings.TrimSpace(os.Getenv("OLLAMA_API_KEY"))
Review

🟠 resolveContextTokens silently disables compaction on network errors with no log

error-handling · flagged by 2 models

🪰 Gadfly · advisory

🟠 **resolveContextTokens silently disables compaction on network errors with no log** _error-handling · flagged by 2 models_ <sub>🪰 Gadfly · advisory</sub>
}
cache := model.NewCloudOllamaLimitCache("", key, nil)
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), contextTokenLookupTimeout)
defer cancel()
if n, ok := model.MaxContextTokensResolving(ctx, modelSpec, cache); ok {
return n
}
// Unknown model or a failed lookup (e.g. no key / unreachable endpoint): don't
// guess — compaction is disabled. Log it so a misconfiguration is debuggable
// rather than silently dropping the protection. Set GADFLY_MODEL_CONTEXT_TOKENS
// to force a window for an endpoint executus can't introspect.
fmt.Fprintf(os.Stderr, "gadfly: no context window resolved for %q; compaction disabled (set GADFLY_MODEL_CONTEXT_TOKENS to enable it)\n", modelSpec)
return 0
}
+26 -8
View File
@@ -39,10 +39,9 @@
// GADFLY_TITLE PR title (optional).
// GADFLY_BODY PR description (optional).
// GADFLY_MAX_STEPS review-pass step cap (optional, default 24).
// GADFLY_WRAPUP_RESERVE steps before the cap at which the agent is told to
// stop investigating and write its answer (optional,
// default 4). Plus a tool-free finalization fallback
// guarantees a step-exhausted pass still emits output.
// GADFLY_WRAPUP_RESERVE steps before the cap at which the wrap-up critic nudges
// the agent to stop investigating and write its answer
// (optional, default 4).
// GADFLY_RECHECK set to 0/false to skip the recheck pass (optional, default on).
// GADFLY_RECHECK_MAX_STEPS recheck-pass step cap (optional, default 16).
// GADFLY_TIMEOUT_SECS overall deadline in seconds, shared by both passes (optional, default 300).
@@ -55,8 +54,8 @@
// lanes as GADFLY_PROVIDER_CONCURRENCY (e.g.
// "ollama-cloud=3,m1=1"). Wins over GADFLY_LENS_CONCURRENCY
// for the model's provider; falls back to it otherwise.
// GADFLY_MAX_DIFF_CHARS diff chars embedded in the prompt (optional, default 60000;
// the full diff is always available via the get_diff tool).
// GADFLY_MAX_DIFF_CHARS diff chars embedded in the review prompt (optional, default 60000;
// the full diff is reachable via the paginated get_diff tool).
//
// On success it prints the review to stdout and exits 0. On a usage/config or
// model error it prints a diagnostic to stderr and exits non-zero; run.sh then
@@ -322,8 +321,7 @@ func providerOverride(envName, provider string) (int, bool) {
// returned bool is true when the review pass failed (rendered as an inline
// notice — advisory; one lens failing never sinks the others or the job).
func reviewWithSpecialist(eng reviewEngine, base string, sp Specialist, task, diff string) (string, bool) {
timeout := time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
ctx, cancel := context.WithTimeout(context.Background(), reviewTimeout())
defer cancel()
draft, err := eng.runPass(ctx, composeSpecialistPrompt(base, sp), task,
@@ -389,3 +387,23 @@ func envInt(name string, def int) int {
}
return n
}
// envBool reads a boolean-ish env var: def when unset, false for an explicit
// falsey value (0/false/no/off), true otherwise. The shared spelling for
// gadfly's "on unless disabled" opt-out flags (GADFLY_RECHECK, GADFLY_COMPACT).
func envBool(name string, def bool) bool {
switch strings.ToLower(strings.TrimSpace(os.Getenv(name))) {
case "":
return def
case "0", "false", "no", "off":
return false
default:
return true
}
}
// reviewTimeout is the per-specialist-lens deadline (GADFLY_TIMEOUT_SECS), shared
// across a lens's review+recheck passes and applied as each pass's run cap.
func reviewTimeout() time.Duration {
return time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second
}
+1 -9
View File
@@ -2,7 +2,6 @@ package main
import (
"fmt"
"os"
"strings"
)
@@ -65,14 +64,7 @@ Output rules:
// recheckEnabled reports whether the verification pass should run. On unless
// GADFLY_RECHECK is explicitly a falsey value.
func recheckEnabled() bool {
switch strings.ToLower(strings.TrimSpace(os.Getenv("GADFLY_RECHECK"))) {
case "0", "false", "no", "off":
return false
default:
return true
}
}
func recheckEnabled() bool { return envBool("GADFLY_RECHECK", true) }
// shouldRecheck decides whether to run the verification pass for a given draft.
// A clean "no material issues" draft has nothing to verify, so it is skipped
+1 -1
View File
@@ -49,7 +49,7 @@ func TestRecheckEnabled(t *testing.T) {
}
func TestBuildRecheckTask(t *testing.T) {
t.Setenv("GADFLY_MAX_DIFF_CHARS", "")
t.Setenv("GADFLY_RECHECK_DIFF_CHARS", "")
draft := "VERDICT: Blocking issues found\n- foo.go:1 broken"
out := buildRecheckTask(draft, "diff --git a/x b/x\n+y\n")
if !strings.Contains(out, draft) {
+84 -30
View File
@@ -9,6 +9,7 @@ import (
"regexp"
"sort"
"strings"
"sync"
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
@@ -42,6 +43,12 @@ type repoFS struct {
root string // absolute, symlink-resolved repo root
diff string // the full PR unified diff (served by get_diff)
worker llm.Model // optional cheap model for delegate_investigation; nil = no delegation
// diffLines caches the split diff so paging through get_diff doesn't re-split
// the whole (possibly large) diff on every call. The diff is immutable, so the
// cache is computed once and safe to share across concurrent lenses.
diffOnce sync.Once
diffLines []string
}
// newRepoFS resolves root to an absolute, symlink-free path.
@@ -110,15 +117,24 @@ func (r *repoFS) fsTools() []llm.Tool {
}
}
// toolbox builds the reviewer's toolbox: the read-only repo tools, plus the
// delegate_investigation tool when a worker model is configured.
func (r *repoFS) toolbox() (*llm.Toolbox, error) {
box := llm.NewToolbox("gadfly")
// allTools is the single source of truth for the reviewer's tool set: the
// read-only repo tools, plus delegate_investigation when a worker model is
// configured. Both the executus registry (gadflyToolRegistry, the production
// path) and toolbox() build from this list.
func (r *repoFS) allTools() []llm.Tool {
tools := r.fsTools()
if r.worker != nil {
tools = append(tools, r.delegateTool())
}
for _, t := range tools {
return tools
}
// toolbox builds a majordomo toolbox from allTools(). The production review path
// now goes through executus's tool.Registry (see executus.go); this remains for
// the toolbox-level tests (the `call` helper).
func (r *repoFS) toolbox() (*llm.Toolbox, error) {
box := llm.NewToolbox("gadfly")
for _, t := range r.allTools() {
if err := box.Add(t); err != nil {
return nil, fmt.Errorf("add tool %q: %w", t.Name, err)
}
@@ -409,30 +425,49 @@ func (r *repoFS) getDiffTool() llm.Tool {
"get_diff",
"Return the unified diff under review as a numbered, PAGINATED window (like read_file) — not the whole diff at once, so a huge PR can't blow the context window. Pass `path` to fetch just one changed file's hunks, or `start_line`/`limit` to page through. A truncated copy of the diff is also embedded in the task message.",
func(_ context.Context, args getDiffArgs) (any, error) {
diff := r.diff
scope := "the diff"
var lines []string
if p := strings.TrimSpace(args.Path); p != "" {
diff = diffForPath(r.diff, p)
if strings.TrimSpace(diff) == "" {
lines = diffLinesForPath(r.diff, p)
if len(lines) == 0 {
return fmt.Sprintf("(no diff hunks for path %q; check it against the changed-files list — the path must match a file the diff touches)", p), nil
}
scope = "the diff for " + p
Review

🟡 windowDiff re-splits the whole diff per get_diff call; paging a large diff is O(n^2/limit) on the large-PR hot path

correctness, performance · flagged by 2 models

  • cmd/gadfly/tools.go:435 (windowDiff) re-splits the entire diff on every get_diff call, even to return one 800-line window. strings.Split(diff, "\n") materializes a []string over the whole diff regardless of start/limit. On exactly the large PRs this feature targets, the agent is steered to call get_diff repeatedly — paging via start_line and re-fetching per path. Each call is O(total-diff-lines) work plus a fresh line-slice allocation, so **paging a large diff to its…

🪰 Gadfly · advisory

🟡 **windowDiff re-splits the whole diff per get_diff call; paging a large diff is O(n^2/limit) on the large-PR hot path** _correctness, performance · flagged by 2 models_ - **`cmd/gadfly/tools.go:435` (`windowDiff`) re-splits the entire diff on every `get_diff` call, even to return one 800-line window.** `strings.Split(diff, "\n")` materializes a `[]string` over the *whole* diff regardless of `start`/`limit`. On exactly the large PRs this feature targets, the agent is steered to call `get_diff` repeatedly — paging via `start_line` and re-fetching per `path`. Each call is O(total-diff-lines) work plus a fresh line-slice allocation, so **paging a large diff to its… <sub>🪰 Gadfly · advisory</sub>
} else {
lines = r.diffAllLines()
if len(lines) == 0 {
return "(empty diff)", nil
}
}
if strings.TrimSpace(diff) == "" {
return "(empty diff)", nil
}
return windowDiff(diff, scope, args.StartLine, args.Limit), nil
return windowDiff(lines, scope, args.StartLine, args.Limit), nil
},
)
}
// windowDiff returns a numbered, paginated slice of a unified diff, mirroring
// read_file's caps (maxGetDiffLines / maxGetDiffBytes / maxLineLen) so a single
// get_diff call can never dump a multi-hundred-KB diff into the transcript — the
// amplifier behind the large-PR token burn. The full diff stays reachable by
// paging with start_line, or scoped per file via the path arg.
func windowDiff(diff, scope string, start, limit int) string {
// diffAllLines returns the whole diff split into lines, cached so paging never
// re-splits the (possibly large) diff.
func (r *repoFS) diffAllLines() []string {
r.diffOnce.Do(func() { r.diffLines = splitDiffLines(r.diff) })
return r.diffLines
}
// splitDiffLines splits a unified diff into lines, dropping the single trailing
// empty element a trailing newline produces — otherwise windowDiff would emit a
// blank final line and over-count the total by one.
func splitDiffLines(diff string) []string {
lines := strings.Split(diff, "\n")
if n := len(lines); n > 0 && lines[n-1] == "" {
lines = lines[:n-1]
}
Outdated
Review

🟡 windowDiff truncation message off-by-one: says 'truncated at line N' when N was the last emitted line

error-handling · flagged by 1 model

  • windowDiff truncation message off-by-one (cmd/gadfly/tools.go:461): The message says truncated at line %d using i (0-based index of the first unemitted line), but the last emitted line was i (1-based). For example, after emitting lines 1–800, i=800 (0-based), and the message reads "truncated at line 800 of 850" — but line 800 was the last line shown; truncation actually happened at line 801. The start_line hint correctly uses i+1 (801), so the paging instruction is righ…

🪰 Gadfly · advisory

🟡 **windowDiff truncation message off-by-one: says 'truncated at line N' when N was the last emitted line** _error-handling · flagged by 1 model_ - **`windowDiff` truncation message off-by-one** (`cmd/gadfly/tools.go:461`): The message says `truncated at line %d` using `i` (0-based index of the first *unemitted* line), but the last *emitted* line was `i` (1-based). For example, after emitting lines 1–800, `i=800` (0-based), and the message reads "truncated at line 800 of 850" — but line 800 was the last line shown; truncation actually happened at line 801. The `start_line` hint correctly uses `i+1` (801), so the paging instruction is righ… <sub>🪰 Gadfly · advisory</sub>
return lines
}
// windowDiff returns a numbered, paginated slice of pre-split diff lines,
// mirroring read_file's caps (maxGetDiffLines / maxGetDiffBytes / maxLineLen) so
// a single get_diff call can never dump a multi-hundred-KB diff into the
// transcript — the amplifier behind the large-PR token burn. The full diff stays
// reachable by paging with start_line, or scoped per file via the path arg.
func windowDiff(lines []string, scope string, start, limit int) string {
Outdated
Review

🟠 diffForPath re-splits the full diff on every scoped get_diff call, allocating O(diff_size) per invocation

correctness, error-handling, performance · flagged by 5 models

The finding is confirmed. The code at cmd/gadfly/tools.go:470-483 matches the draft's description exactly: inSection = strings.Contains(ln, path) is an unanchored substring match against the full diff --git a/… b/… header line, so a path that is a substring of another changed file's path (e.g. foo.go vs vendor/foo.go) pulls in both sections. This survives verification.

🪰 Gadfly · advisory

🟠 **diffForPath re-splits the full diff on every scoped get_diff call, allocating O(diff_size) per invocation** _correctness, error-handling, performance · flagged by 5 models_ The finding is confirmed. The code at `cmd/gadfly/tools.go:470-483` matches the draft's description exactly: `inSection = strings.Contains(ln, path)` is an unanchored substring match against the full `diff --git a/… b/…` header line, so a `path` that is a substring of another changed file's path (e.g. `foo.go` vs `vendor/foo.go`) pulls in both sections. This survives verification. <sub>🪰 Gadfly · advisory</sub>
total := len(lines)
if start < 1 {
start = 1
@@ -458,26 +493,45 @@ func windowDiff(diff, scope string, start, limit int) string {
emitted++
}
if i < total {
fmt.Fprintf(&b, "... (%s truncated at line %d of %d; call get_diff again with start_line=%d for more, or pass a `path` to scope to one file)\n", scope, i, total, i+1)
// i (0-based) is the first line NOT emitted; line i was the last shown.
fmt.Fprintf(&b, "... (%s truncated after line %d of %d; call get_diff again with start_line=%d for the rest, or pass a `path` to scope to one file)\n", scope, i, total, i+1)
}
return b.String()
}
// diffForPath extracts the unified-diff section for one changed file: from the
// `diff --git` header that names the path through to the next `diff --git`
// header (or end). Matching is a substring on the header line, so either side
// (a/<path> or b/<path>) hits, including a rename's two paths.
func diffForPath(diff, path string) string {
var b strings.Builder
// diffLinesForPath returns the unified-diff lines for one changed file: from the
// `diff --git` header that names path through to the next header (or end). The
// header names two path tokens (a/<old> b/<new>); a match is on a WHOLE token,
// so path "foo.go" does not pull in "barfoo.go" — and a trailing "/" scopes to a
// directory (e.g. "pkg/foo/" matches pkg/foo/bar.go).
func diffLinesForPath(diff, path string) []string {
want := strings.TrimPrefix(strings.TrimPrefix(strings.TrimSpace(path), "a/"), "b/")
var out []string
inSection := false
for _, ln := range strings.Split(diff, "\n") {
for _, ln := range splitDiffLines(diff) {
if strings.HasPrefix(ln, "diff --git ") {
inSection = strings.Contains(ln, path)
inSection = diffHeaderNames(ln, want)
}
if inSection {
b.WriteString(ln)
b.WriteByte('\n')
out = append(out, ln)
}
}
return b.String()
return out
}
// diffHeaderNames reports whether a `diff --git a/X b/Y` header names want as one
// of its (a/b-stripped) path tokens — exact whole-token match, or a directory
// prefix when want ends with "/".
func diffHeaderNames(header, want string) bool {
fields := strings.Fields(header)
if len(fields) < 3 {
return false
}
for _, f := range fields[2:] {
p := strings.TrimPrefix(strings.TrimPrefix(f, "a/"), "b/")
if p == want || (strings.HasSuffix(want, "/") && strings.HasPrefix(p, want)) {
return true
}
}
return false
}
+25 -1
View File
@@ -247,7 +247,7 @@ func TestGetDiffTool_Paginates(t *testing.T) {
if err != nil {
t.Fatalf("get_diff: %v", err)
}
if !strings.Contains(out, "truncated at line") {
if !strings.Contains(out, "truncated after line") {
t.Error("a diff longer than the per-call cap should be truncated with a paging hint")
}
if strings.Contains(out, "801\t") {
@@ -263,6 +263,30 @@ func TestGetDiffTool_Paginates(t *testing.T) {
}
}
// TestDiffLinesForPath_Anchored: get_diff path= matches on a WHOLE path token,
// so "foo.go" never pulls in "barfoo.go" (the unanchored-substring weakness the
// swarm flagged), while a trailing "/" still scopes to a directory.
func TestDiffLinesForPath_Anchored(t *testing.T) {
diff := "diff --git a/foo.go b/foo.go\n+foo change\n" +
"diff --git a/barfoo.go b/barfoo.go\n+barfoo change\n" +
"diff --git a/pkg/x.go b/pkg/x.go\n+x change\n"
joined := strings.Join(diffLinesForPath(diff, "foo.go"), "\n")
if !strings.Contains(joined, "foo change") {
t.Errorf("foo.go should match its own hunk:\n%s", joined)
}
if strings.Contains(joined, "barfoo change") {
t.Errorf("foo.go must NOT match barfoo.go (unanchored substring regression):\n%s", joined)
}
if !strings.Contains(strings.Join(diffLinesForPath(diff, "pkg/"), "\n"), "x change") {
t.Error("a trailing-slash path should scope to the directory (pkg/ -> pkg/x.go)")
}
if len(diffLinesForPath(diff, "nope.go")) != 0 {
t.Error("an unknown path should yield no lines")
}
}
func TestNewRepoFS_BadRoot(t *testing.T) {
// A file (not a directory) is rejected.
f := filepath.Join(t.TempDir(), "afile")
+15 -5
View File
@@ -205,7 +205,9 @@ if [ "$HUGE_DIFF_BYTES" -gt 0 ] 2>/dev/null; then
export GADFLY_MAX_STEPS="${GADFLY_HUGE_DIFF_MAX_STEPS:-12}"
export GADFLY_RECHECK_MAX_STEPS="${GADFLY_HUGE_DIFF_RECHECK_MAX_STEPS:-8}"
export GADFLY_RECHECK="${GADFLY_HUGE_DIFF_RECHECK:-0}" # skip recheck on huge PRs
export MAX_DIFF_CHARS="${GADFLY_HUGE_DIFF_MAX_DIFF_CHARS:-20000}" # run.sh -> GADFLY_MAX_DIFF_CHARS
# The Go-visible name directly (run.sh prefers GADFLY_MAX_DIFF_CHARS over its
Outdated
Review

downshift exports MAX_DIFF_CHARS (not GADFLY_MAX_DIFF_CHARS); the aliasing lives only in run.sh and is invisible from the Go side

maintainability · flagged by 1 model

  • entrypoint.sh:208MAX_DIFF_CHARS env name is a shell-only alias invisible from the Go side. The downshift sets export MAX_DIFF_CHARS=... with a comment "run.sh -> GADFLY_MAX_DIFF_CHARS." Confirmed: scripts/run.sh:180 translates MAX_DIFF_CHARS to GADFLY_MAX_DIFF_CHARS when invoking the binary, and the Go tree reads only GADFLY_MAX_DIFF_CHARS (main.go:361) / GADFLY_RECHECK_DIFF_CHARS (recheck.go). MAX_DIFF_CHARS appears nowhere in Go. The indirection lives entirely in she…

🪰 Gadfly · advisory

⚪ **downshift exports MAX_DIFF_CHARS (not GADFLY_MAX_DIFF_CHARS); the aliasing lives only in run.sh and is invisible from the Go side** _maintainability · flagged by 1 model_ - **`entrypoint.sh:208` — `MAX_DIFF_CHARS` env name is a shell-only alias invisible from the Go side.** The downshift sets `export MAX_DIFF_CHARS=...` with a comment "run.sh -> GADFLY_MAX_DIFF_CHARS." Confirmed: `scripts/run.sh:180` translates `MAX_DIFF_CHARS` to `GADFLY_MAX_DIFF_CHARS` when invoking the binary, and the Go tree reads only `GADFLY_MAX_DIFF_CHARS` (main.go:361) / `GADFLY_RECHECK_DIFF_CHARS` (recheck.go). `MAX_DIFF_CHARS` appears nowhere in Go. The indirection lives entirely in she… <sub>🪰 Gadfly · advisory</sub>
# own MAX_DIFF_CHARS), so the cap is honored without relying on run.sh's alias.
export GADFLY_MAX_DIFF_CHARS="${GADFLY_HUGE_DIFF_MAX_DIFF_CHARS:-20000}"
# Surfaced on each posted comment so the shallower review is self-explaining.
export GADFLY_NOTICE="⚠️ Large PR (${PR_DIFF_BYTES} bytes): Gadfly downshifted to a focused, single-model review to stay within budget — coverage is intentionally shallower. Consider splitting the PR for a deeper review."
fi
@@ -333,7 +335,7 @@ fi
# findings were gathered are still posted and the job never fails (advisory).
# GADFLY_PR_BUDGET_SECS=0 (default) disables it.
KILLER_PID=""
rm -f "${WORKDIR}/.budget_killed" 2>/dev/null || true
rm -f "${WORKDIR}/.budget_killed" "${WORKDIR}/.disarmed" 2>/dev/null || true
if [ "${GADFLY_PR_BUDGET_SECS:-0}" -gt 0 ] 2>/dev/null; then
(
sleep "${GADFLY_PR_BUDGET_SECS}"
@@ -342,7 +344,10 @@ if [ "${GADFLY_PR_BUDGET_SECS:-0}" -gt 0 ] 2>/dev/null; then
pkill -TERM -f '/usr/local/bin/gadfly' 2>/dev/null || true
pkill -TERM -f "${SCRIPTS_DIR}/run.sh" 2>/dev/null || true
Review

🟠 Backstop KILL pkill can race the disarm and kill the consolidation gadfly pass

error-handling · flagged by 1 model

🪰 Gadfly · advisory

🟠 **Backstop KILL pkill can race the disarm and kill the consolidation gadfly pass** _error-handling · flagged by 1 model_ <sub>🪰 Gadfly · advisory</sub>
sleep 5
pkill -KILL -f '/usr/local/bin/gadfly' 2>/dev/null || true
# Guard the delayed SIGKILL on the disarm marker: once the lanes finished and
# the watchdog was disarmed, the consolidation gadfly pass runs next, and a
# name-based KILL here must NOT catch it.
[ -f "${WORKDIR}/.disarmed" ] || pkill -KILL -f '/usr/local/bin/gadfly' 2>/dev/null || true
) &
KILLER_PID=$!
log "PR budget watchdog armed (${GADFLY_PR_BUDGET_SECS}s, pid ${KILLER_PID})"
1
@@ -360,8 +365,13 @@ done
[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}"
# Reviews finished (or the watchdog killed them): disarm the watchdog so its
# delayed SIGKILL can't catch the consolidation pass that runs next.
if [ -n "$KILLER_PID" ]; then kill "$KILLER_PID" 2>/dev/null || true; fi
# delayed SIGKILL can't catch the consolidation pass that runs next. Drop the
# disarm marker FIRST so even a racing watchdog that already reached its KILL line
# skips it (the kill below also tears the watchdog subshell down during its sleep).
if [ -n "$KILLER_PID" ]; then
: > "${WORKDIR}/.disarmed"
kill "$KILLER_PID" 2>/dev/null || true
fi
# If the backstop fired, note it on the consensus comment (per-model comments
# were already posted during the run; a killed model surfaces as a failed lane).
+1 -1
View File
@@ -177,7 +177,7 @@ case "$PROVIDER" in
GADFLY_SYSTEM_FILE="${SCRIPT_DIR}/system-prompt.txt" \
GADFLY_TITLE="$TITLE" \
GADFLY_BODY="$BODY" \
GADFLY_MAX_DIFF_CHARS="$MAX_DIFF_CHARS" \
GADFLY_MAX_DIFF_CHARS="${GADFLY_MAX_DIFF_CHARS:-$MAX_DIFF_CHARS}" \
GADFLY_STATUS_FILE="${GADFLY_STATUS_FILE:-}" \
GADFLY_FINDINGS_OUT="${GADFLY_FINDINGS_OUT:-}" \
"$BIN" 2>"$ERR_FILE"