88f74aa768
Build & push image / build-and-push (push) Successful in 9s
Co-authored-by: Steve Dudenhoeffer <steve@stevedudenhoeffer.com> Co-committed-by: Steve Dudenhoeffer <steve@stevedudenhoeffer.com>
345 lines
17 KiB
Bash
345 lines
17 KiB
Bash
#!/usr/bin/env bash
|
||
# Gadfly container entrypoint.
|
||
#
|
||
# This is the brains that used to live in the Gitea Actions workflow YAML. A
|
||
# consuming repo only commits a ~15-line stub workflow that runs this image and
|
||
# passes the event context as env; ALL the gating, cloning, model-looping and
|
||
# comment I/O happens here, so the stub stays dumb (act_runner has weak YAML
|
||
# expression support — keep logic in the image, not the workflow).
|
||
#
|
||
# What it does:
|
||
# 1. Decides whether this event should trigger a review (draft skip, comment
|
||
# trigger phrase + allowed-user gate, PR detection). Non-triggers exit 0.
|
||
# 2. Acknowledges a comment trigger with a 👀 reaction.
|
||
# 3. Shallow-clones the PR's head branch (the agentic reviewer reads the
|
||
# checked-out tree to VERIFY findings, not just the diff).
|
||
# 4. Runs the gadfly reviewer once per configured model via run.sh, which
|
||
# upserts one labeled PR comment per model.
|
||
#
|
||
# Advisory only: it never blocks a merge. Config/usage errors exit non-zero;
|
||
# everything review-related is posted as a comment, never a failed check.
|
||
#
|
||
# Env (set by the consumer's stub workflow from the github.* context):
|
||
# GITEA_API https://HOST/api/v1/repos/OWNER/REPO (required)
|
||
# GITEA_TOKEN built-in Actions token (posts comments) (required)
|
||
# OLLAMA_CLOUD_API_KEY Ollama Cloud key; empty => "not configured" notice
|
||
# EVENT_NAME pull_request | issue_comment | workflow_dispatch (required)
|
||
# PR pull request number (required)
|
||
# PR_BRANCH head branch (github.head_ref); empty => fetched from API
|
||
# IS_DRAFT 'true' on a draft PR => skipped
|
||
# COMMENT_BODY comment text (issue_comment only)
|
||
# COMMENT_ID comment id, for the 👀 reaction (issue_comment only)
|
||
# ACTOR github.actor (the user who triggered)
|
||
# Optional config:
|
||
# GADFLY_MODELS comma-separated model ids/specs (alias: OLLAMA_REVIEW_MODELS)
|
||
# GADFLY_PROVIDER majordomo provider for bare model ids (default ollama-cloud;
|
||
# e.g. "ollama" local, "openai", "anthropic", "google")
|
||
# GADFLY_BASE_URL override backend endpoint (OpenAI/Ollama-compatible servers)
|
||
# GADFLY_API_KEY provider key (else provider's standard env: OPENAI_API_KEY, …)
|
||
# CLAUDE_CODE_OAUTH_TOKEN auth for the claude-code engine (GADFLY_MODELS entry
|
||
# "claude-code"/"claude-code/<model>"); Pro/Max subscription
|
||
# token from `claude setup-token`. Else ANTHROPIC_API_KEY.
|
||
# GADFLY_TRIGGER_PHRASE comment phrase that triggers a re-review (default "@gadfly review")
|
||
# GADFLY_ALLOWED_USERS comma-separated usernames allowed to comment-trigger;
|
||
# empty => fall back to "is a repo collaborator"
|
||
# GADFLY_FINDINGS_URL optional gadfly-reports store base URL; set to POST the run +
|
||
# findings for model-quality tracking (off when empty)
|
||
# GADFLY_FINDINGS_TOKEN optional bearer token for the gadfly-reports store
|
||
# GADFLY_CONSOLIDATE cross-model consensus comment: "auto" (default; on for >=2
|
||
# models), "1" force on, "0" force off (one comment per model)
|
||
set -uo pipefail
|
||
|
||
# One model by default: the specialist suite already provides breadth, so a
|
||
# multi-model default would multiply cost (models × specialists × 2 passes).
|
||
DEFAULT_MODELS="qwen3-coder:480b-cloud"
|
||
TRIGGER_PHRASE="${GADFLY_TRIGGER_PHRASE:-@gadfly review}"
|
||
SCRIPTS_DIR="/app/scripts"
|
||
WORKDIR="${WORKDIR:-/tmp/gadfly}"
|
||
|
||
log() { echo "[gadfly] $*" >&2; }
|
||
die() { log "ERROR: $*"; exit 1; }
|
||
|
||
: "${GITEA_API:?GITEA_API required}"
|
||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||
: "${PR:?PR required}"
|
||
: "${EVENT_NAME:?EVENT_NAME required}"
|
||
|
||
API() { curl -fsS --connect-timeout 20 --max-time 30 -H "Authorization: token ${GITEA_TOKEN}" "$@"; }
|
||
|
||
# upsert_comment_body MARKER BODY — create or update (by leading MARKER) a single
|
||
# PR comment. Mirrors run.sh's per-model upsert; used for the consensus comment
|
||
# and the per-model fallback when consolidation is on.
|
||
upsert_comment_body() {
|
||
local marker="$1" body="$2" post_body existing_id="" page=1 cmts
|
||
post_body="$(jq -n --arg b "$body" '{body:$b}')"
|
||
while [ "$page" -le 10 ]; do
|
||
cmts="$(API "${GITEA_API}/issues/${PR}/comments?limit=50&page=${page}" || echo '[]')"
|
||
[ "$(echo "$cmts" | jq 'length')" = "0" ] && break
|
||
existing_id="$(echo "$cmts" | jq -r --arg m "$marker" \
|
||
'.[] | select(.body != null and (.body | startswith($m))) | .id' | head -n1)"
|
||
[ -n "$existing_id" ] && break
|
||
page=$((page+1))
|
||
done
|
||
if [ -n "$existing_id" ]; then
|
||
curl -sS --connect-timeout 20 --max-time 30 -X PATCH -H "Authorization: token ${GITEA_TOKEN}" \
|
||
-H "Content-Type: application/json" "${GITEA_API}/issues/comments/${existing_id}" -d "$post_body" >/dev/null
|
||
else
|
||
curl -sS --connect-timeout 20 --max-time 30 -X POST -H "Authorization: token ${GITEA_TOKEN}" \
|
||
-H "Content-Type: application/json" "${GITEA_API}/issues/${PR}/comments" -d "$post_body" >/dev/null
|
||
fi
|
||
}
|
||
|
||
# --- is the commenter allowed to trigger a re-review? ----------------------
|
||
actor_allowed() {
|
||
local actor="$1"
|
||
[ -z "$actor" ] && return 1
|
||
if [ -n "${GADFLY_ALLOWED_USERS:-}" ]; then
|
||
local IFS=','
|
||
for u in $GADFLY_ALLOWED_USERS; do
|
||
[ "$(echo "$u" | tr -d '[:space:]')" = "$actor" ] && return 0
|
||
done
|
||
return 1
|
||
fi
|
||
# No explicit allow-list: allow anyone with collaborator (write) access.
|
||
local code
|
||
code="$(curl -s -o /dev/null -w '%{http_code}' --connect-timeout 20 --max-time 30 \
|
||
-H "Authorization: token ${GITEA_TOKEN}" "${GITEA_API}/collaborators/${actor}")"
|
||
[ "$code" = "204" ]
|
||
}
|
||
|
||
# --- trigger gating --------------------------------------------------------
|
||
case "$EVENT_NAME" in
|
||
workflow_dispatch)
|
||
log "manual dispatch for PR #${PR}" ;;
|
||
pull_request)
|
||
if [ "${IS_DRAFT:-false}" = "true" ]; then
|
||
log "PR #${PR} is a draft; skipping"; exit 0
|
||
fi
|
||
log "new/updated PR #${PR}" ;;
|
||
issue_comment)
|
||
case "${COMMENT_BODY:-}" in
|
||
*"$TRIGGER_PHRASE"*) : ;;
|
||
*) log "comment does not contain trigger phrase ${TRIGGER_PHRASE}; skipping"; exit 0 ;;
|
||
esac
|
||
if ! actor_allowed "${ACTOR:-}"; then
|
||
log "actor '${ACTOR:-}' not allowed to trigger; skipping"; exit 0
|
||
fi
|
||
# Must be a comment on a PR, not a plain issue.
|
||
if ! API "${GITEA_API}/pulls/${PR}" >/dev/null 2>&1; then
|
||
log "issue #${PR} is not a pull request; skipping"; exit 0
|
||
fi
|
||
# Acknowledge with 👀.
|
||
if [ -n "${COMMENT_ID:-}" ]; then
|
||
curl -s -X POST -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \
|
||
"${GITEA_API}/issues/comments/${COMMENT_ID}/reactions" -d '{"content":"eyes"}' >/dev/null 2>&1 || true
|
||
fi
|
||
log "comment-triggered review for PR #${PR} by ${ACTOR:-?}" ;;
|
||
*)
|
||
log "event '${EVENT_NAME}' not handled; skipping"; exit 0 ;;
|
||
esac
|
||
|
||
# --- resolve head branch ---------------------------------------------------
|
||
BRANCH="${PR_BRANCH:-}"
|
||
if [ -z "$BRANCH" ]; then
|
||
BRANCH="$(API "${GITEA_API}/pulls/${PR}" | jq -r '.head.ref // ""')"
|
||
fi
|
||
[ -z "$BRANCH" ] && die "could not determine PR #${PR} head branch"
|
||
|
||
# --- clone the PR's checked-out tree (shallow) -----------------------------
|
||
HOST="${GITEA_API%%/api/v1/*}" # https://host
|
||
REPO_PATH="${GITEA_API##*/api/v1/repos/}" # owner/repo
|
||
CLONE_URL="https://token:${GITEA_TOKEN}@${HOST#https://}/${REPO_PATH}.git"
|
||
REPO_DIR="${WORKDIR}/repo"
|
||
rm -rf "$REPO_DIR"; mkdir -p "$WORKDIR"
|
||
log "cloning ${REPO_PATH} @ ${BRANCH}"
|
||
git clone --depth=1 --branch "$BRANCH" "$CLONE_URL" "$REPO_DIR" 2>/dev/null \
|
||
|| die "clone of ${REPO_PATH}@${BRANCH} failed"
|
||
|
||
# --- findings telemetry (optional) -----------------------------------------
|
||
# Plumb the run context to the binary (inherited through run.sh and the gadfly
|
||
# process env). The binary's emit is OFF unless GADFLY_FINDINGS_URL is set; when
|
||
# set it POSTs the run + its findings to a gadfly-reports store. GADFLY_FINDINGS_URL /
|
||
# GADFLY_FINDINGS_TOKEN come from the consumer's stub env and are re-exported so
|
||
# they reach the binary even if unset (empty => disabled).
|
||
export GADFLY_REPO="$REPO_PATH"
|
||
export GADFLY_PR="$PR"
|
||
export GADFLY_FINDINGS_URL="${GADFLY_FINDINGS_URL:-}"
|
||
export GADFLY_FINDINGS_TOKEN="${GADFLY_FINDINGS_TOKEN:-}"
|
||
|
||
# --- review once per model, with per-provider concurrency -------------------
|
||
# GADFLY_MODELS is the provider-agnostic name; OLLAMA_REVIEW_MODELS is a
|
||
# back-compat alias. GADFLY_PROVIDER / GADFLY_BASE_URL / GADFLY_API_KEY and any
|
||
# provider key envs (OPENAI_API_KEY, …) are inherited by run.sh and the binary.
|
||
#
|
||
# Concurrency: each PROVIDER is its own lane and lanes run in PARALLEL, so a fast
|
||
# cloud provider isn't stuck behind a slow local box. Within a lane, at most
|
||
# `cap` models run at once. cap = GADFLY_PROVIDER_CONCURRENCY's "provider=N"
|
||
# entry, else GADFLY_CONCURRENCY (default 1). A model's provider is the spec's
|
||
# first path segment ("m1pro/qwen3.6:35b-mlx" -> m1pro), or GADFLY_PROVIDER /
|
||
# ollama-cloud for a bare id. Default (cap 1) keeps a single-provider pool fully
|
||
# sequential, exactly as before.
|
||
MODELS="${GADFLY_MODELS:-${OLLAMA_REVIEW_MODELS:-$DEFAULT_MODELS}}"
|
||
DEFAULT_CONC="${GADFLY_CONCURRENCY:-1}"
|
||
|
||
provider_of() { case "$1" in */*) echo "${1%%/*}";; *) echo "${GADFLY_PROVIDER:-ollama-cloud}";; esac; }
|
||
|
||
# Per-model status file path for the live board. The model id can contain '/'
|
||
# and ':' (e.g. m1/qwen3:14b), so sanitize to a flat filename; the JSON inside
|
||
# carries the real model/provider, so this just needs to be unique per model.
|
||
STATUS_DIR="${WORKDIR}/status"
|
||
status_file_for() { echo "${STATUS_DIR}/$(echo "$1" | tr -c '[:alnum:]._-' '_').json"; }
|
||
|
||
provider_cap() { # provider -> concurrency (override map "p=N,...", else default)
|
||
local p="$1" item k v
|
||
IFS=',' read -ra _caps <<< "${GADFLY_PROVIDER_CONCURRENCY:-}"
|
||
for item in "${_caps[@]}"; do
|
||
k="$(echo "${item%%=*}" | tr -d '[:space:]')"
|
||
v="$(echo "${item#*=}" | tr -d '[:space:]')"
|
||
if [ "$k" = "$p" ] && [ -n "$v" ]; then echo "$v"; return; fi
|
||
done
|
||
echo "$DEFAULT_CONC"
|
||
}
|
||
|
||
review_one() {
|
||
local sf="" ff=""
|
||
[ "${GADFLY_STATUS_BOARD:-1}" != "0" ] && sf="$(status_file_for "$1")"
|
||
[ "$CONSOLIDATE" = "1" ] && ff="$(findings_file_for "$1")"
|
||
PROVIDER=ollama MODEL="$1" GADFLY_BIN="/usr/local/bin/gadfly" GADFLY_REPO_DIR="$REPO_DIR" \
|
||
GADFLY_STATUS_FILE="$sf" GADFLY_FINDINGS_OUT="$ff" GADFLY_CONSOLIDATE="$CONSOLIDATE" \
|
||
bash "${SCRIPTS_DIR}/run.sh" || log "model $1 failed (continuing)"
|
||
# If the binary never wrote real status (run.sh skipped it: empty diff, no key,
|
||
# binary missing), the pre-seed stays {started:0, done:false} and the board
|
||
# would show this model "waiting to start" forever and never reach N/N. Mark
|
||
# such a never-started file done so the board can complete. The binary stamps a
|
||
# nonzero `started`, so that reliably distinguishes "ran" from "skipped".
|
||
if [ -n "$sf" ] && [ -f "$sf" ] && [ "$(jq -r '.started // 0' "$sf" 2>/dev/null)" = "0" ]; then
|
||
tmp="$(jq '.done = true' "$sf" 2>/dev/null)" && printf '%s' "$tmp" > "$sf"
|
||
fi
|
||
}
|
||
|
||
# Normalize the model list (trim, drop blanks) into MODEL_LIST.
|
||
IFS=',' read -ra _raw <<< "$MODELS" || true
|
||
MODEL_LIST=()
|
||
for raw in "${_raw[@]}"; do m="$(echo "$raw" | tr -d '[:space:]')"; [ -n "$m" ] && MODEL_LIST+=("$m"); done
|
||
|
||
# --- cross-model consolidation decision ------------------------------------
|
||
# With >=2 models, post ONE consensus comment (findings clustered + ranked by
|
||
# cross-model agreement) instead of N per-model walls of prose. Each model writes
|
||
# its findings to FINDINGS_DIR; a final pass (the binary in GADFLY_CONSOLIDATE_DIR
|
||
# mode) renders the consensus comment. GADFLY_CONSOLIDATE: "auto" (default; on for
|
||
# >=2 models), "1" force on, "0" force off (keep per-model comments).
|
||
FINDINGS_DIR="${WORKDIR}/findings"
|
||
CONSOLIDATE=0
|
||
case "${GADFLY_CONSOLIDATE:-auto}" in
|
||
1) CONSOLIDATE=1 ;;
|
||
0) CONSOLIDATE=0 ;;
|
||
*) [ "${#MODEL_LIST[@]}" -ge 2 ] && CONSOLIDATE=1 ;;
|
||
esac
|
||
# A model spec can contain '/' and ':' (e.g. claude-code/opus, qwen3:14b), so
|
||
# sanitize to a flat filename — but append a checksum of the raw spec so two
|
||
# specs that sanitize the same (foo:bar vs foo/bar -> foo_bar) don't collide onto
|
||
# one file and silently drop a model from the consensus.
|
||
findings_file_for() {
|
||
local safe sum
|
||
safe="$(echo "$1" | tr -c '[:alnum:]._-' '_')"
|
||
sum="$(printf '%s' "$1" | cksum | cut -d' ' -f1)"
|
||
echo "${FINDINGS_DIR}/${safe}-${sum}.json"
|
||
}
|
||
if [ "$CONSOLIDATE" = "1" ]; then
|
||
rm -rf "$FINDINGS_DIR"; mkdir -p "$FINDINGS_DIR"
|
||
log "consolidation ON: ${#MODEL_LIST[@]} models -> one consensus comment"
|
||
fi
|
||
|
||
# Distinct providers, in first-seen order (no associative arrays — portable).
|
||
PROVIDERS=""
|
||
for m in "${MODEL_LIST[@]}"; do
|
||
p="$(provider_of "$m")"
|
||
case " $PROVIDERS " in *" $p "*) ;; *) PROVIDERS="${PROVIDERS}${PROVIDERS:+ }$p" ;; esac
|
||
done
|
||
|
||
run_lane() { # $1=provider: run its models, at most `cap` at a time
|
||
local p="$1" cap inflight=0 m
|
||
cap="$(provider_cap "$p")"; [ "$cap" -ge 1 ] 2>/dev/null || cap=1
|
||
local mine=()
|
||
for m in "${MODEL_LIST[@]}"; do [ "$(provider_of "$m")" = "$p" ] && mine+=("$m"); done
|
||
log "lane ${p}: cap ${cap}; models: ${mine[*]}"
|
||
for m in "${mine[@]}"; do
|
||
review_one "$m" &
|
||
inflight=$((inflight+1))
|
||
if [ "$inflight" -ge "$cap" ]; then wait -n 2>/dev/null || wait; inflight=$((inflight-1)); fi
|
||
done
|
||
wait
|
||
}
|
||
|
||
# --- live status board (optional, default on) ------------------------------
|
||
# Each model process publishes per-lens progress to STATUS_DIR/<model>.json; a
|
||
# background renderer (status-board.sh) upserts ONE consolidated PR comment so
|
||
# progress across all models/lenses is visible at a glance — and a watcher can
|
||
# tell when the whole swarm is finished. Advisory/best-effort; the per-model
|
||
# findings still land in each model's own comment. Disable with
|
||
# GADFLY_STATUS_BOARD=0.
|
||
BOARD_PID=""
|
||
if [ "${GADFLY_STATUS_BOARD:-1}" != "0" ]; then
|
||
rm -rf "$STATUS_DIR"; mkdir -p "$STATUS_DIR"
|
||
# Pre-seed every model as queued so the board shows the full swarm from t=0,
|
||
# even models still waiting on their provider lane's concurrency cap. Each
|
||
# binary overwrites its own file with real per-lens detail once it starts.
|
||
for m in "${MODEL_LIST[@]}"; do
|
||
jq -n --arg model "$m" --arg provider "$(provider_of "$m")" \
|
||
'{model:$model, provider:$provider, started:0, updated:0, done:false, lenses:[]}' \
|
||
> "$(status_file_for "$m")" 2>/dev/null || true
|
||
done
|
||
GITEA_API="$GITEA_API" GITEA_TOKEN="$GITEA_TOKEN" PR="$PR" GADFLY_STATUS_DIR="$STATUS_DIR" \
|
||
bash "${SCRIPTS_DIR}/status-board.sh" &
|
||
BOARD_PID=$!
|
||
log "status board started (pid ${BOARD_PID})"
|
||
fi
|
||
|
||
log "providers: ${PROVIDERS:-none}"
|
||
# Each provider lane runs in parallel; cap is enforced within each lane. Track
|
||
# the lane PIDs so we wait ONLY for the review work — not the status board,
|
||
# which intentionally runs until we signal it below.
|
||
LANE_PIDS=()
|
||
for p in $PROVIDERS; do
|
||
run_lane "$p" &
|
||
LANE_PIDS+=("$!")
|
||
done
|
||
[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}"
|
||
|
||
# Reviews are done: signal the board to render the final state once and exit.
|
||
if [ -n "$BOARD_PID" ]; then
|
||
touch "${STATUS_DIR}/.done" 2>/dev/null || true
|
||
wait "$BOARD_PID" 2>/dev/null || true
|
||
fi
|
||
|
||
# --- cross-model consensus comment -----------------------------------------
|
||
# Render ONE consensus comment from the per-model findings the swarm wrote. This
|
||
# is advisory and best-effort: if the consolidation pass produces nothing, fall
|
||
# back to posting each model's review as its own comment (the per-model comments
|
||
# were suppressed during the run), so a consolidation hiccup never loses output.
|
||
if [ "$CONSOLIDATE" = "1" ]; then
|
||
n_files="$(ls -1 "${FINDINGS_DIR}"/*.json 2>/dev/null | wc -l | tr -d '[:space:]')"
|
||
log "consolidating findings from ${n_files} model(s)"
|
||
CONSENSUS="$(GADFLY_CONSOLIDATE_DIR="$FINDINGS_DIR" /usr/local/bin/gadfly 2>"${WORKDIR}/consolidate.err" || true)"
|
||
if [ -n "$CONSENSUS" ]; then
|
||
BODY="$(printf '%s\n\n<sub>Automated adversarial review by Gadfly — consensus across the model swarm. Advisory only — does not block merge.</sub>' "$CONSENSUS")"
|
||
upsert_comment_body "<!-- gadfly-consensus -->" "$BODY"
|
||
log "consensus comment posted"
|
||
else
|
||
log "consolidation produced no output; falling back to per-model comments"
|
||
log "$(tail -c 500 "${WORKDIR}/consolidate.err" 2>/dev/null)"
|
||
for f in "${FINDINGS_DIR}"/*.json; do
|
||
[ -f "$f" ] || continue
|
||
m="$(jq -r '.model // ""' "$f" 2>/dev/null)"
|
||
[ -z "$m" ] && continue
|
||
prov="$(jq -r '.provider // ""' "$f" 2>/dev/null)"
|
||
md="$(jq -r '.markdown // ""' "$f" 2>/dev/null)"
|
||
marker="<!-- gadfly-review:ollama:${m} -->"
|
||
body="$(printf '%s\n### 🪰 Gadfly review — `%s` (%s)\n\n%s\n\n<sub>Automated adversarial review by Gadfly. Advisory only — does not block merge.</sub>' \
|
||
"$marker" "$m" "$prov" "$md")"
|
||
upsert_comment_body "$marker" "$body"
|
||
done
|
||
fi
|
||
fi
|
||
log "done"
|