feat: re-platform agentic review onto executus + large-PR cost controls (#20)
Build & push image / build-and-push (push) Successful in 33s
Build & push image / build-and-push (push) Successful in 33s
Makes gadfly a consumer of executus (run.Executor compaction/bounding/budget/critic + fanout) and fixes the large-PR token burn in size-gated layers: paginated get_diff, downshift above GADFLY_HUGE_DIFF_BYTES, and a swarm-wide GADFLY_PR_BUDGET_SECS backstop. Small PRs untouched; advisory-only and the static binary preserved. Dogfood swarm reviewed it (6 models, 21 real findings graded + folded in). Co-authored-by: Steve Dudenhoeffer <steve@stevedudenhoeffer.com> Co-committed-by: Steve Dudenhoeffer <steve@stevedudenhoeffer.com>
This commit was merged in pull request #20.
This commit is contained in:
+74
-1
@@ -183,6 +183,36 @@ export GADFLY_FINDINGS_TOKEN="${GADFLY_FINDINGS_TOKEN:-}"
|
||||
MODELS="${GADFLY_MODELS:-${OLLAMA_REVIEW_MODELS:-$DEFAULT_MODELS}}"
|
||||
DEFAULT_CONC="${GADFLY_CONCURRENCY:-1}"
|
||||
|
||||
# --- huge-PR downshift ------------------------------------------------------
|
||||
# A very large diff is what burns the model budget: every review step re-sends
|
||||
# it, multiplied across models × lenses × passes × steps (this is what nuked a
|
||||
# whole Ollama Cloud block on one giant PR). entrypoint is the only process that
|
||||
# spans the whole fleet, so the fleet-wide size decision lives here: size the PR
|
||||
# diff ONCE, and above GADFLY_HUGE_DIFF_BYTES collapse to a single cheap model +
|
||||
# a focused lens subset, fewer steps, no recheck, and a smaller embedded diff.
|
||||
# A finished shallow review beats a budget-nuking one. All knobs override; set
|
||||
# GADFLY_HUGE_DIFF_BYTES=0 to disable. Small PRs are never touched.
|
||||
HUGE_PR=0
|
||||
HUGE_DIFF_BYTES="${GADFLY_HUGE_DIFF_BYTES:-600000}"
|
||||
if [ "$HUGE_DIFF_BYTES" -gt 0 ] 2>/dev/null; then
|
||||
PR_DIFF_BYTES="$(API "${GITEA_API}/pulls/${PR}.diff" 2>/dev/null | wc -c | tr -d '[:space:]')"
|
||||
[ -z "$PR_DIFF_BYTES" ] && PR_DIFF_BYTES=0
|
||||
if [ "$PR_DIFF_BYTES" -gt "$HUGE_DIFF_BYTES" ] 2>/dev/null; then
|
||||
HUGE_PR=1
|
||||
log "huge PR: diff ${PR_DIFF_BYTES}B > ${HUGE_DIFF_BYTES}B — downshifting the fleet (advisory)"
|
||||
MODELS="${GADFLY_HUGE_DIFF_MODELS:-${MODELS%%,*}}" # first model only by default
|
||||
export GADFLY_SPECIALISTS="${GADFLY_HUGE_DIFF_SPECIALISTS:-security,correctness,error-handling}"
|
||||
export GADFLY_MAX_STEPS="${GADFLY_HUGE_DIFF_MAX_STEPS:-12}"
|
||||
export GADFLY_RECHECK_MAX_STEPS="${GADFLY_HUGE_DIFF_RECHECK_MAX_STEPS:-8}"
|
||||
export GADFLY_RECHECK="${GADFLY_HUGE_DIFF_RECHECK:-0}" # skip recheck on huge PRs
|
||||
# The Go-visible name directly (run.sh prefers GADFLY_MAX_DIFF_CHARS over its
|
||||
# own MAX_DIFF_CHARS), so the cap is honored without relying on run.sh's alias.
|
||||
export GADFLY_MAX_DIFF_CHARS="${GADFLY_HUGE_DIFF_MAX_DIFF_CHARS:-20000}"
|
||||
# Surfaced on each posted comment so the shallower review is self-explaining.
|
||||
export GADFLY_NOTICE="⚠️ Large PR (${PR_DIFF_BYTES} bytes): Gadfly downshifted to a focused, single-model review to stay within budget — coverage is intentionally shallower. Consider splitting the PR for a deeper review."
|
||||
fi
|
||||
fi
|
||||
|
||||
provider_of() { case "$1" in */*) echo "${1%%/*}";; *) echo "${GADFLY_PROVIDER:-ollama-cloud}";; esac; }
|
||||
|
||||
# Per-model status file path for the live board. The model id can contain '/'
|
||||
@@ -297,6 +327,32 @@ if [ "${GADFLY_STATUS_BOARD:-1}" != "0" ]; then
|
||||
log "status board started (pid ${BOARD_PID})"
|
||||
fi
|
||||
|
||||
# --- swarm-wide hard backstop ----------------------------------------------
|
||||
# A wall-clock ceiling across the WHOLE fleet, so a pathological PR can never
|
||||
# drain the usage block however the models behave. entrypoint is the only
|
||||
# process spanning every model, so a single "never exceed X" guard lives here.
|
||||
# On expiry it stops the review subtrees (the binary + run.sh); whatever partial
|
||||
# findings were gathered are still posted and the job never fails (advisory).
|
||||
# GADFLY_PR_BUDGET_SECS=0 (default) disables it.
|
||||
KILLER_PID=""
|
||||
rm -f "${WORKDIR}/.budget_killed" "${WORKDIR}/.disarmed" 2>/dev/null || true
|
||||
if [ "${GADFLY_PR_BUDGET_SECS:-0}" -gt 0 ] 2>/dev/null; then
|
||||
(
|
||||
sleep "${GADFLY_PR_BUDGET_SECS}"
|
||||
log "PR wall-clock budget (${GADFLY_PR_BUDGET_SECS}s) reached — stopping the review fleet (advisory; partial findings still posted)"
|
||||
: > "${WORKDIR}/.budget_killed"
|
||||
pkill -TERM -f '/usr/local/bin/gadfly' 2>/dev/null || true
|
||||
pkill -TERM -f "${SCRIPTS_DIR}/run.sh" 2>/dev/null || true
|
||||
sleep 5
|
||||
# Guard the delayed SIGKILL on the disarm marker: once the lanes finished and
|
||||
# the watchdog was disarmed, the consolidation gadfly pass runs next, and a
|
||||
# name-based KILL here must NOT catch it.
|
||||
[ -f "${WORKDIR}/.disarmed" ] || pkill -KILL -f '/usr/local/bin/gadfly' 2>/dev/null || true
|
||||
) &
|
||||
KILLER_PID=$!
|
||||
log "PR budget watchdog armed (${GADFLY_PR_BUDGET_SECS}s, pid ${KILLER_PID})"
|
||||
fi
|
||||
|
||||
log "providers: ${PROVIDERS:-none}"
|
||||
# Each provider lane runs in parallel; cap is enforced within each lane. Track
|
||||
# the lane PIDs so we wait ONLY for the review work — not the status board,
|
||||
@@ -308,6 +364,21 @@ for p in $PROVIDERS; do
|
||||
done
|
||||
[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}"
|
||||
|
||||
# Reviews finished (or the watchdog killed them): disarm the watchdog so its
|
||||
# delayed SIGKILL can't catch the consolidation pass that runs next. Drop the
|
||||
# disarm marker FIRST so even a racing watchdog that already reached its KILL line
|
||||
# skips it (the kill below also tears the watchdog subshell down during its sleep).
|
||||
if [ -n "$KILLER_PID" ]; then
|
||||
: > "${WORKDIR}/.disarmed"
|
||||
kill "$KILLER_PID" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# If the backstop fired, note it on the consensus comment (per-model comments
|
||||
# were already posted during the run; a killed model surfaces as a failed lane).
|
||||
if [ -f "${WORKDIR}/.budget_killed" ]; then
|
||||
export GADFLY_NOTICE="${GADFLY_NOTICE:+${GADFLY_NOTICE} }⏱️ This review was stopped early by the per-PR time budget (GADFLY_PR_BUDGET_SECS); findings are partial."
|
||||
fi
|
||||
|
||||
# Reviews are done: signal the board to render the final state once and exit.
|
||||
if [ -n "$BOARD_PID" ]; then
|
||||
touch "${STATUS_DIR}/.done" 2>/dev/null || true
|
||||
@@ -331,7 +402,9 @@ if [ "$CONSOLIDATE" = "1" ]; then
|
||||
CONSENSUS="$(GADFLY_CONSOLIDATE_DIR="$FINDINGS_DIR" GADFLY_DIFF_FILE="$DIFF_FILE" \
|
||||
/usr/local/bin/gadfly 2>"${WORKDIR}/consolidate.err" || true)"
|
||||
if [ -n "$CONSENSUS" ]; then
|
||||
BODY="$(printf '%s\n\n<sub>Automated adversarial review by Gadfly — consensus across the model swarm. Advisory only — does not block merge.</sub>' "$CONSENSUS")"
|
||||
NOTICE_BLOCK=""
|
||||
[ -n "${GADFLY_NOTICE:-}" ] && NOTICE_BLOCK="> ${GADFLY_NOTICE}"$'\n\n'
|
||||
BODY="$(printf '%s%s\n\n<sub>Automated adversarial review by Gadfly — consensus across the model swarm. Advisory only — does not block merge.</sub>' "$NOTICE_BLOCK" "$CONSENSUS")"
|
||||
upsert_comment_body "<!-- gadfly-consensus -->" "$BODY"
|
||||
log "consensus comment posted"
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user