gadfly/entrypoint.sh

#!/usr/bin/env bash
# Gadfly container entrypoint.
#
# This is the brains that used to live in the Gitea Actions workflow YAML. A
# consuming repo only commits a ~15-line stub workflow that runs this image and
# passes the event context as env; ALL the gating, cloning, model-looping and
# comment I/O happens here, so the stub stays dumb (act_runner has weak YAML
# expression support — keep logic in the image, not the workflow).
#
# What it does:
#   1. Decides whether this event should trigger a review (draft skip, comment
#      trigger phrase + allowed-user gate, PR detection). Non-triggers exit 0.
#   2. Acknowledges a comment trigger with a 👀 reaction.
#   3. Shallow-clones the PR's head branch (the agentic reviewer reads the
#      checked-out tree to VERIFY findings, not just the diff).
#   4. Runs the gadfly reviewer once per configured model via run.sh, which
#      upserts one labeled PR comment per model.
#
# Advisory only: it never blocks a merge. Config/usage errors exit non-zero;
# everything review-related is posted as a comment, never a failed check.
#
# Env (set by the consumer's stub workflow from the github.* context):
#   GITEA_API             https://HOST/api/v1/repos/OWNER/REPO            (required)
#   GITEA_TOKEN           built-in Actions token (posts comments)         (required)
#   OLLAMA_CLOUD_API_KEY  Ollama Cloud key; empty => "not configured" notice
#   EVENT_NAME            pull_request | issue_comment | workflow_dispatch (required)
#   PR                    pull request number                             (required)
#   PR_BRANCH             head branch (github.head_ref); empty => fetched from API
#   IS_DRAFT              'true' on a draft PR => skipped
#   COMMENT_BODY          comment text (issue_comment only)
#   COMMENT_ID            comment id, for the 👀 reaction (issue_comment only)
#   ACTOR                 github.actor (the user who triggered)
# Optional config:
#   GADFLY_MODELS         comma-separated model ids/specs (alias: OLLAMA_REVIEW_MODELS)
#   GADFLY_PROVIDER       majordomo provider for bare model ids (default ollama-cloud;
#                         e.g. "ollama" local, "openai", "anthropic", "google")
#   GADFLY_BASE_URL       override backend endpoint (OpenAI/Ollama-compatible servers)
#   GADFLY_API_KEY        provider key (else provider's standard env: OPENAI_API_KEY, …)
#   CLAUDE_CODE_OAUTH_TOKEN  auth for the claude-code engine (GADFLY_MODELS entry
#                         "claude-code"/"claude-code/<model>"); Pro/Max subscription
#                         token from `claude setup-token`. Else ANTHROPIC_API_KEY.
#   GADFLY_TRIGGER_PHRASE comment phrase that triggers a re-review (default "@gadfly review")
#   GADFLY_ALLOWED_USERS  comma-separated usernames allowed to comment-trigger;
#                         empty => fall back to "is a repo collaborator"
#   GADFLY_FINDINGS_URL   optional gadfly-reports store base URL; set to POST the run +
#                         findings for model-quality tracking (off when empty)
#   GADFLY_FINDINGS_TOKEN optional bearer token for the gadfly-reports store
#   GADFLY_CONSOLIDATE    cross-model consensus comment: "auto" (default; on for >=2
#                         models), "1" force on, "0" force off (one comment per model)
set -uo pipefail

# One model by default: the specialist suite already provides breadth, so a
# multi-model default would multiply cost (models × specialists × 2 passes).
DEFAULT_MODELS="qwen3-coder:480b-cloud"
TRIGGER_PHRASE="${GADFLY_TRIGGER_PHRASE:-@gadfly review}"
SCRIPTS_DIR="/app/scripts"
WORKDIR="${WORKDIR:-/tmp/gadfly}"

log() { echo "[gadfly] $*" >&2; }
die() { log "ERROR: $*"; exit 1; }

: "${GITEA_API:?GITEA_API required}"
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
: "${PR:?PR required}"
: "${EVENT_NAME:?EVENT_NAME required}"

API() { curl -fsS --connect-timeout 20 --max-time 30 -H "Authorization: token ${GITEA_TOKEN}" "$@"; }

# upsert_comment_body MARKER BODY — create or update (by leading MARKER) a single
# PR comment. Mirrors run.sh's per-model upsert; used for the consensus comment
# and the per-model fallback when consolidation is on.
upsert_comment_body() {
  local marker="$1" body="$2" post_body existing_id="" page=1 cmts
  post_body="$(jq -n --arg b "$body" '{body:$b}')"
  while [ "$page" -le 10 ]; do
    cmts="$(API "${GITEA_API}/issues/${PR}/comments?limit=50&page=${page}" || echo '[]')"
    [ "$(echo "$cmts" | jq 'length')" = "0" ] && break
    existing_id="$(echo "$cmts" | jq -r --arg m "$marker" \
      '.[] | select(.body != null and (.body | startswith($m))) | .id' | head -n1)"
    [ -n "$existing_id" ] && break
    page=$((page+1))
  done
  if [ -n "$existing_id" ]; then
    curl -sS --connect-timeout 20 --max-time 30 -X PATCH -H "Authorization: token ${GITEA_TOKEN}" \
      -H "Content-Type: application/json" "${GITEA_API}/issues/comments/${existing_id}" -d "$post_body" >/dev/null
  else
    curl -sS --connect-timeout 20 --max-time 30 -X POST -H "Authorization: token ${GITEA_TOKEN}" \
      -H "Content-Type: application/json" "${GITEA_API}/issues/${PR}/comments" -d "$post_body" >/dev/null
  fi
}

# --- is the commenter allowed to trigger a re-review? ----------------------
actor_allowed() {
  local actor="$1"
  [ -z "$actor" ] && return 1
  if [ -n "${GADFLY_ALLOWED_USERS:-}" ]; then
    local IFS=','
    for u in $GADFLY_ALLOWED_USERS; do
      [ "$(echo "$u" | tr -d '[:space:]')" = "$actor" ] && return 0
    done
    return 1
  fi
  # No explicit allow-list: allow anyone with collaborator (write) access.
  local code
  code="$(curl -s -o /dev/null -w '%{http_code}' --connect-timeout 20 --max-time 30 \
    -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_API}/collaborators/${actor}")"
  [ "$code" = "204" ]
}

# --- trigger gating --------------------------------------------------------
case "$EVENT_NAME" in
  workflow_dispatch)
    log "manual dispatch for PR #${PR}" ;;
  pull_request)
    if [ "${IS_DRAFT:-false}" = "true" ]; then
      log "PR #${PR} is a draft; skipping"; exit 0
    fi
    log "new/updated PR #${PR}" ;;
  issue_comment)
    case "${COMMENT_BODY:-}" in
      *"$TRIGGER_PHRASE"*) : ;;
      *) log "comment does not contain trigger phrase ${TRIGGER_PHRASE}; skipping"; exit 0 ;;
    esac
    if ! actor_allowed "${ACTOR:-}"; then
      log "actor '${ACTOR:-}' not allowed to trigger; skipping"; exit 0
    fi
    # Must be a comment on a PR, not a plain issue.
    if ! API "${GITEA_API}/pulls/${PR}" >/dev/null 2>&1; then
      log "issue #${PR} is not a pull request; skipping"; exit 0
    fi
    # Acknowledge with 👀.
    if [ -n "${COMMENT_ID:-}" ]; then
      curl -s -X POST -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \
        "${GITEA_API}/issues/comments/${COMMENT_ID}/reactions" -d '{"content":"eyes"}' >/dev/null 2>&1 || true
    fi
    log "comment-triggered review for PR #${PR} by ${ACTOR:-?}" ;;
  *)
    log "event '${EVENT_NAME}' not handled; skipping"; exit 0 ;;
esac

# --- resolve head branch ---------------------------------------------------
BRANCH="${PR_BRANCH:-}"
if [ -z "$BRANCH" ]; then
  BRANCH="$(API "${GITEA_API}/pulls/${PR}" | jq -r '.head.ref // ""')"
fi
[ -z "$BRANCH" ] && die "could not determine PR #${PR} head branch"

# --- clone the PR's checked-out tree (shallow) -----------------------------
HOST="${GITEA_API%%/api/v1/*}"            # https://host
REPO_PATH="${GITEA_API##*/api/v1/repos/}" # owner/repo
CLONE_URL="https://token:${GITEA_TOKEN}@${HOST#https://}/${REPO_PATH}.git"
REPO_DIR="${WORKDIR}/repo"
rm -rf "$REPO_DIR"; mkdir -p "$WORKDIR"
log "cloning ${REPO_PATH} @ ${BRANCH}"
git clone --depth=1 --branch "$BRANCH" "$CLONE_URL" "$REPO_DIR" 2>/dev/null \
  || die "clone of ${REPO_PATH}@${BRANCH} failed"

# --- findings telemetry (optional) -----------------------------------------
# Plumb the run context to the binary (inherited through run.sh and the gadfly
# process env). The binary's emit is OFF unless GADFLY_FINDINGS_URL is set; when
# set it POSTs the run + its findings to a gadfly-reports store. GADFLY_FINDINGS_URL /
# GADFLY_FINDINGS_TOKEN come from the consumer's stub env and are re-exported so
# they reach the binary even if unset (empty => disabled).
export GADFLY_REPO="$REPO_PATH"
export GADFLY_PR="$PR"
export GADFLY_FINDINGS_URL="${GADFLY_FINDINGS_URL:-}"
export GADFLY_FINDINGS_TOKEN="${GADFLY_FINDINGS_TOKEN:-}"

# --- review once per model, with per-provider concurrency -------------------
# GADFLY_MODELS is the provider-agnostic name; OLLAMA_REVIEW_MODELS is a
# back-compat alias. GADFLY_PROVIDER / GADFLY_BASE_URL / GADFLY_API_KEY and any
# provider key envs (OPENAI_API_KEY, …) are inherited by run.sh and the binary.
#
# Concurrency: each PROVIDER is its own lane and lanes run in PARALLEL, so a fast
# cloud provider isn't stuck behind a slow local box. Within a lane, at most
# `cap` models run at once. cap = GADFLY_PROVIDER_CONCURRENCY's "provider=N"
# entry, else GADFLY_CONCURRENCY (default 1). A model's provider is the spec's
# first path segment ("m1pro/qwen3.6:35b-mlx" -> m1pro), or GADFLY_PROVIDER /
# ollama-cloud for a bare id. Default (cap 1) keeps a single-provider pool fully
# sequential, exactly as before.
MODELS="${GADFLY_MODELS:-${OLLAMA_REVIEW_MODELS:-$DEFAULT_MODELS}}"
DEFAULT_CONC="${GADFLY_CONCURRENCY:-1}"

provider_of() { case "$1" in */*) echo "${1%%/*}";; *) echo "${GADFLY_PROVIDER:-ollama-cloud}";; esac; }

# Per-model status file path for the live board. The model id can contain '/'
# and ':' (e.g. m1/qwen3:14b), so sanitize to a flat filename; the JSON inside
# carries the real model/provider, so this just needs to be unique per model.
STATUS_DIR="${WORKDIR}/status"
status_file_for() { echo "${STATUS_DIR}/$(echo "$1" | tr -c '[:alnum:]._-' '_').json"; }

provider_cap() { # provider -> concurrency (override map "p=N,...", else default)
  local p="$1" item k v
  IFS=',' read -ra _caps <<< "${GADFLY_PROVIDER_CONCURRENCY:-}"
  for item in "${_caps[@]}"; do
    k="$(echo "${item%%=*}" | tr -d '[:space:]')"
    v="$(echo "${item#*=}" | tr -d '[:space:]')"
    if [ "$k" = "$p" ] && [ -n "$v" ]; then echo "$v"; return; fi
  done
  echo "$DEFAULT_CONC"
}

review_one() {
  local sf="" ff=""
  [ "${GADFLY_STATUS_BOARD:-1}" != "0" ] && sf="$(status_file_for "$1")"
  [ "$CONSOLIDATE" = "1" ] && ff="$(findings_file_for "$1")"
  PROVIDER=ollama MODEL="$1" GADFLY_BIN="/usr/local/bin/gadfly" GADFLY_REPO_DIR="$REPO_DIR" \
    GADFLY_STATUS_FILE="$sf" GADFLY_FINDINGS_OUT="$ff" GADFLY_CONSOLIDATE="$CONSOLIDATE" \
    bash "${SCRIPTS_DIR}/run.sh" || log "model $1 failed (continuing)"
  # If the binary never wrote real status (run.sh skipped it: empty diff, no key,
  # binary missing), the pre-seed stays {started:0, done:false} and the board
  # would show this model "waiting to start" forever and never reach N/N. Mark
  # such a never-started file done so the board can complete. The binary stamps a
  # nonzero `started`, so that reliably distinguishes "ran" from "skipped".
  if [ -n "$sf" ] && [ -f "$sf" ] && [ "$(jq -r '.started // 0' "$sf" 2>/dev/null)" = "0" ]; then
    tmp="$(jq '.done = true' "$sf" 2>/dev/null)" && printf '%s' "$tmp" > "$sf"
  fi
}

# Normalize the model list (trim, drop blanks) into MODEL_LIST.
IFS=',' read -ra _raw <<< "$MODELS" || true
MODEL_LIST=()
for raw in "${_raw[@]}"; do m="$(echo "$raw" | tr -d '[:space:]')"; [ -n "$m" ] && MODEL_LIST+=("$m"); done

# --- cross-model consolidation decision ------------------------------------
# With >=2 models, post ONE consensus comment (findings clustered + ranked by
# cross-model agreement) instead of N per-model walls of prose. Each model writes
# its findings to FINDINGS_DIR; a final pass (the binary in GADFLY_CONSOLIDATE_DIR
# mode) renders the consensus comment. GADFLY_CONSOLIDATE: "auto" (default; on for
# >=2 models), "1" force on, "0" force off (keep per-model comments).
FINDINGS_DIR="${WORKDIR}/findings"
CONSOLIDATE=0
case "${GADFLY_CONSOLIDATE:-auto}" in
  1) CONSOLIDATE=1 ;;
  0) CONSOLIDATE=0 ;;
  *) [ "${#MODEL_LIST[@]}" -ge 2 ] && CONSOLIDATE=1 ;;
esac
# A model spec can contain '/' and ':' (e.g. claude-code/opus, qwen3:14b), so
# sanitize to a flat filename — but append a checksum of the raw spec so two
# specs that sanitize the same (foo:bar vs foo/bar -> foo_bar) don't collide onto
# one file and silently drop a model from the consensus.
findings_file_for() {
  local safe sum
  safe="$(echo "$1" | tr -c '[:alnum:]._-' '_')"
  sum="$(printf '%s' "$1" | cksum | cut -d' ' -f1)"
  echo "${FINDINGS_DIR}/${safe}-${sum}.json"
}
if [ "$CONSOLIDATE" = "1" ]; then
  rm -rf "$FINDINGS_DIR"; mkdir -p "$FINDINGS_DIR"
  log "consolidation ON: ${#MODEL_LIST[@]} models -> one consensus comment"
fi

# Distinct providers, in first-seen order (no associative arrays — portable).
PROVIDERS=""
for m in "${MODEL_LIST[@]}"; do
  p="$(provider_of "$m")"
  case " $PROVIDERS " in *" $p "*) ;; *) PROVIDERS="${PROVIDERS}${PROVIDERS:+ }$p" ;; esac
done

run_lane() { # $1=provider: run its models, at most `cap` at a time
  local p="$1" cap inflight=0 m
  cap="$(provider_cap "$p")"; [ "$cap" -ge 1 ] 2>/dev/null || cap=1
  local mine=()
  for m in "${MODEL_LIST[@]}"; do [ "$(provider_of "$m")" = "$p" ] && mine+=("$m"); done
  log "lane ${p}: cap ${cap}; models: ${mine[*]}"
  for m in "${mine[@]}"; do
    review_one "$m" &
    inflight=$((inflight+1))
    if [ "$inflight" -ge "$cap" ]; then wait -n 2>/dev/null || wait; inflight=$((inflight-1)); fi
  done
  wait
}

# --- live status board (optional, default on) ------------------------------
# Each model process publishes per-lens progress to STATUS_DIR/<model>.json; a
# background renderer (status-board.sh) upserts ONE consolidated PR comment so
# progress across all models/lenses is visible at a glance — and a watcher can
# tell when the whole swarm is finished. Advisory/best-effort; the per-model
# findings still land in each model's own comment. Disable with
# GADFLY_STATUS_BOARD=0.
BOARD_PID=""
if [ "${GADFLY_STATUS_BOARD:-1}" != "0" ]; then
  rm -rf "$STATUS_DIR"; mkdir -p "$STATUS_DIR"
  # Pre-seed every model as queued so the board shows the full swarm from t=0,
  # even models still waiting on their provider lane's concurrency cap. Each
  # binary overwrites its own file with real per-lens detail once it starts.
  for m in "${MODEL_LIST[@]}"; do
    jq -n --arg model "$m" --arg provider "$(provider_of "$m")" \
      '{model:$model, provider:$provider, started:0, updated:0, done:false, lenses:[]}' \
      > "$(status_file_for "$m")" 2>/dev/null || true
  done
  GITEA_API="$GITEA_API" GITEA_TOKEN="$GITEA_TOKEN" PR="$PR" GADFLY_STATUS_DIR="$STATUS_DIR" \
    bash "${SCRIPTS_DIR}/status-board.sh" &
  BOARD_PID=$!
  log "status board started (pid ${BOARD_PID})"
fi

log "providers: ${PROVIDERS:-none}"
# Each provider lane runs in parallel; cap is enforced within each lane. Track
# the lane PIDs so we wait ONLY for the review work — not the status board,
# which intentionally runs until we signal it below.
LANE_PIDS=()
for p in $PROVIDERS; do
  run_lane "$p" &
  LANE_PIDS+=("$!")
done
[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}"

# Reviews are done: signal the board to render the final state once and exit.
if [ -n "$BOARD_PID" ]; then
  touch "${STATUS_DIR}/.done" 2>/dev/null || true
  wait "$BOARD_PID" 2>/dev/null || true
fi

# --- cross-model consensus comment -----------------------------------------
# Render ONE consensus comment from the per-model findings the swarm wrote. This
# is advisory and best-effort: if the consolidation pass produces nothing, fall
# back to posting each model's review as its own comment (the per-model comments
# were suppressed during the run), so a consolidation hiccup never loses output.
if [ "$CONSOLIDATE" = "1" ]; then
  n_files="$(ls -1 "${FINDINGS_DIR}"/*.json 2>/dev/null | wc -l | tr -d '[:space:]')"
  log "consolidating findings from ${n_files} model(s)"
  CONSENSUS="$(GADFLY_CONSOLIDATE_DIR="$FINDINGS_DIR" /usr/local/bin/gadfly 2>"${WORKDIR}/consolidate.err" || true)"
  if [ -n "$CONSENSUS" ]; then
    BODY="$(printf '%s\n\n<sub>Automated adversarial review by Gadfly — consensus across the model swarm. Advisory only — does not block merge.</sub>' "$CONSENSUS")"
    upsert_comment_body "<!-- gadfly-consensus -->" "$BODY"
    log "consensus comment posted"
  else
    log "consolidation produced no output; falling back to per-model comments"
    log "$(tail -c 500 "${WORKDIR}/consolidate.err" 2>/dev/null)"
    for f in "${FINDINGS_DIR}"/*.json; do
      [ -f "$f" ] || continue
      m="$(jq -r '.model // ""' "$f" 2>/dev/null)"
      [ -z "$m" ] && continue
      prov="$(jq -r '.provider // ""' "$f" 2>/dev/null)"
      md="$(jq -r '.markdown // ""' "$f" 2>/dev/null)"
      marker="<!-- gadfly-review:ollama:${m} -->"
      body="$(printf '%s\n### 🪰 Gadfly review — `%s` (%s)\n\n%s\n\n<sub>Automated adversarial review by Gadfly. Advisory only — does not block merge.</sub>' \
        "$marker" "$m" "$prov" "$md")"
      upsert_comment_body "$marker" "$body"
    done
  fi
fi
log "done"