gadfly/entrypoint.sh

#!/usr/bin/env bash
# Gadfly container entrypoint.
#
# This is the brains that used to live in the Gitea Actions workflow YAML. A
# consuming repo only commits a ~15-line stub workflow that runs this image and
# passes the event context as env; ALL the gating, cloning, model-looping and
# comment I/O happens here, so the stub stays dumb (act_runner has weak YAML
# expression support — keep logic in the image, not the workflow).
#
# What it does:
#   1. Decides whether this event should trigger a review (draft skip, comment
#      trigger phrase + allowed-user gate, PR detection). Non-triggers exit 0.
#   2. Acknowledges a comment trigger with a 👀 reaction.
#   3. Shallow-clones the PR's head branch (the agentic reviewer reads the
#      checked-out tree to VERIFY findings, not just the diff).
#   4. Runs the gadfly reviewer once per configured model via run.sh, which
#      upserts one labeled PR comment per model.
#
# Advisory only: it never blocks a merge. Config/usage errors exit non-zero;
# everything review-related is posted as a comment, never a failed check.
#
# Env (set by the consumer's stub workflow from the github.* context):
#   GITEA_API             https://HOST/api/v1/repos/OWNER/REPO            (required)
#   GITEA_TOKEN           built-in Actions token (posts comments)         (required)
#   OLLAMA_CLOUD_API_KEY  Ollama Cloud key; empty => "not configured" notice
#   EVENT_NAME            pull_request | issue_comment | workflow_dispatch (required)
#   PR                    pull request number                             (required)
#   PR_BRANCH             head branch (github.head_ref); empty => fetched from API
#   IS_DRAFT              'true' on a draft PR => skipped
#   COMMENT_BODY          comment text (issue_comment only)
#   COMMENT_ID            comment id, for the 👀 reaction (issue_comment only)
#   ACTOR                 github.actor (the user who triggered)
# Optional config:
#   GADFLY_MODELS         comma-separated model ids/specs (alias: OLLAMA_REVIEW_MODELS)
#   GADFLY_PROVIDER       majordomo provider for bare model ids (default ollama-cloud;
#                         e.g. "ollama" local, "openai", "anthropic", "google")
#   GADFLY_BASE_URL       override backend endpoint (OpenAI/Ollama-compatible servers)
#   GADFLY_API_KEY        provider key (else provider's standard env: OPENAI_API_KEY, …)
#   GADFLY_TRIGGER_PHRASE comment phrase that triggers a re-review (default "@gadfly review")
#   GADFLY_ALLOWED_USERS  comma-separated usernames allowed to comment-trigger;
#                         empty => fall back to "is a repo collaborator"
set -uo pipefail

# One model by default: the specialist suite already provides breadth, so a
# multi-model default would multiply cost (models × specialists × 2 passes).
DEFAULT_MODELS="qwen3-coder:480b-cloud"
TRIGGER_PHRASE="${GADFLY_TRIGGER_PHRASE:-@gadfly review}"
SCRIPTS_DIR="/app/scripts"
WORKDIR="${WORKDIR:-/tmp/gadfly}"

log() { echo "[gadfly] $*" >&2; }
die() { log "ERROR: $*"; exit 1; }

: "${GITEA_API:?GITEA_API required}"
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
: "${PR:?PR required}"
: "${EVENT_NAME:?EVENT_NAME required}"

API() { curl -fsS --connect-timeout 20 --max-time 30 -H "Authorization: token ${GITEA_TOKEN}" "$@"; }

# --- is the commenter allowed to trigger a re-review? ----------------------
actor_allowed() {
  local actor="$1"
  [ -z "$actor" ] && return 1
  if [ -n "${GADFLY_ALLOWED_USERS:-}" ]; then
    local IFS=','
    for u in $GADFLY_ALLOWED_USERS; do
      [ "$(echo "$u" | tr -d '[:space:]')" = "$actor" ] && return 0
    done
    return 1
  fi
  # No explicit allow-list: allow anyone with collaborator (write) access.
  local code
  code="$(curl -s -o /dev/null -w '%{http_code}' --connect-timeout 20 --max-time 30 \
    -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_API}/collaborators/${actor}")"
  [ "$code" = "204" ]
}

# --- trigger gating --------------------------------------------------------
case "$EVENT_NAME" in
  workflow_dispatch)
    log "manual dispatch for PR #${PR}" ;;
  pull_request)
    if [ "${IS_DRAFT:-false}" = "true" ]; then
      log "PR #${PR} is a draft; skipping"; exit 0
    fi
    log "new/updated PR #${PR}" ;;
  issue_comment)
    case "${COMMENT_BODY:-}" in
      *"$TRIGGER_PHRASE"*) : ;;
      *) log "comment does not contain trigger phrase ${TRIGGER_PHRASE}; skipping"; exit 0 ;;
    esac
    if ! actor_allowed "${ACTOR:-}"; then
      log "actor '${ACTOR:-}' not allowed to trigger; skipping"; exit 0
    fi
    # Must be a comment on a PR, not a plain issue.
    if ! API "${GITEA_API}/pulls/${PR}" >/dev/null 2>&1; then
      log "issue #${PR} is not a pull request; skipping"; exit 0
    fi
    # Acknowledge with 👀.
    if [ -n "${COMMENT_ID:-}" ]; then
      curl -s -X POST -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \
        "${GITEA_API}/issues/comments/${COMMENT_ID}/reactions" -d '{"content":"eyes"}' >/dev/null 2>&1 || true
    fi
    log "comment-triggered review for PR #${PR} by ${ACTOR:-?}" ;;
  *)
    log "event '${EVENT_NAME}' not handled; skipping"; exit 0 ;;
esac

# --- resolve head branch ---------------------------------------------------
BRANCH="${PR_BRANCH:-}"
if [ -z "$BRANCH" ]; then
  BRANCH="$(API "${GITEA_API}/pulls/${PR}" | jq -r '.head.ref // ""')"
fi
[ -z "$BRANCH" ] && die "could not determine PR #${PR} head branch"

# --- clone the PR's checked-out tree (shallow) -----------------------------
HOST="${GITEA_API%%/api/v1/*}"            # https://host
REPO_PATH="${GITEA_API##*/api/v1/repos/}" # owner/repo
CLONE_URL="https://token:${GITEA_TOKEN}@${HOST#https://}/${REPO_PATH}.git"
REPO_DIR="${WORKDIR}/repo"
rm -rf "$REPO_DIR"; mkdir -p "$WORKDIR"
log "cloning ${REPO_PATH} @ ${BRANCH}"
git clone --depth=1 --branch "$BRANCH" "$CLONE_URL" "$REPO_DIR" 2>/dev/null \
  || die "clone of ${REPO_PATH}@${BRANCH} failed"

# --- review once per model, with per-provider concurrency -------------------
# GADFLY_MODELS is the provider-agnostic name; OLLAMA_REVIEW_MODELS is a
# back-compat alias. GADFLY_PROVIDER / GADFLY_BASE_URL / GADFLY_API_KEY and any
# provider key envs (OPENAI_API_KEY, …) are inherited by run.sh and the binary.
#
# Concurrency: each PROVIDER is its own lane and lanes run in PARALLEL, so a fast
# cloud provider isn't stuck behind a slow local box. Within a lane, at most
# `cap` models run at once. cap = GADFLY_PROVIDER_CONCURRENCY's "provider=N"
# entry, else GADFLY_CONCURRENCY (default 1). A model's provider is the spec's
# first path segment ("m1pro/qwen3.6:35b-mlx" -> m1pro), or GADFLY_PROVIDER /
# ollama-cloud for a bare id. Default (cap 1) keeps a single-provider pool fully
# sequential, exactly as before.
MODELS="${GADFLY_MODELS:-${OLLAMA_REVIEW_MODELS:-$DEFAULT_MODELS}}"
DEFAULT_CONC="${GADFLY_CONCURRENCY:-1}"

provider_of() { case "$1" in */*) echo "${1%%/*}";; *) echo "${GADFLY_PROVIDER:-ollama-cloud}";; esac; }

provider_cap() { # provider -> concurrency (override map "p=N,...", else default)
  local p="$1" item k v
  IFS=',' read -ra _caps <<< "${GADFLY_PROVIDER_CONCURRENCY:-}"
  for item in "${_caps[@]}"; do
    k="$(echo "${item%%=*}" | tr -d '[:space:]')"
    v="$(echo "${item#*=}" | tr -d '[:space:]')"
    if [ "$k" = "$p" ] && [ -n "$v" ]; then echo "$v"; return; fi
  done
  echo "$DEFAULT_CONC"
}

review_one() {
  PROVIDER=ollama MODEL="$1" GADFLY_BIN="/usr/local/bin/gadfly" GADFLY_REPO_DIR="$REPO_DIR" \
    bash "${SCRIPTS_DIR}/run.sh" || log "model $1 failed (continuing)"
}

# Normalize the model list (trim, drop blanks) into MODEL_LIST.
IFS=',' read -ra _raw <<< "$MODELS" || true
MODEL_LIST=()
for raw in "${_raw[@]}"; do m="$(echo "$raw" | tr -d '[:space:]')"; [ -n "$m" ] && MODEL_LIST+=("$m"); done

# Distinct providers, in first-seen order (no associative arrays — portable).
PROVIDERS=""
for m in "${MODEL_LIST[@]}"; do
  p="$(provider_of "$m")"
  case " $PROVIDERS " in *" $p "*) ;; *) PROVIDERS="${PROVIDERS}${PROVIDERS:+ }$p" ;; esac
done

run_lane() { # $1=provider: run its models, at most `cap` at a time
  local p="$1" cap inflight=0 m
  cap="$(provider_cap "$p")"; [ "$cap" -ge 1 ] 2>/dev/null || cap=1
  local mine=()
  for m in "${MODEL_LIST[@]}"; do [ "$(provider_of "$m")" = "$p" ] && mine+=("$m"); done
  log "lane ${p}: cap ${cap}; models: ${mine[*]}"
  for m in "${mine[@]}"; do
    review_one "$m" &
    inflight=$((inflight+1))
    if [ "$inflight" -ge "$cap" ]; then wait -n 2>/dev/null || wait; inflight=$((inflight-1)); fi
  done
  wait
}

log "providers: ${PROVIDERS:-none}"
# Each provider lane runs in parallel; cap is enforced within each lane.
for p in $PROVIDERS; do
  run_lane "$p" &
done
wait
log "done"