4237a18d09
Phase 1 of the gadfly-games build. Adds a second review engine alongside the majordomo agent loop: for each lens, shell out to the Claude Code CLI (`claude -p`) inside the checked-out repo so it verifies findings with its OWN read tools, then reuse gadfly's verdict-parse + recheck + consolidate + emit pipeline unchanged. - cmd/gadfly/engine.go: new reviewEngine interface with two impls — majordomoEngine (wraps the existing runAgent path) and claudeCodeEngine (exec `claude -p ... --output-format json`, parse `.result`). main.go's runSpecialists/reviewWithSpecialist are now engine-agnostic. - Select via a model id: `claude-code` (CLI default) or `claude-code/<model>` (suffix → --model). Auth inherits from the env: Pro/Max via CLAUDE_CODE_OAUTH_TOKEN (no --bare), else ANTHROPIC_API_KEY. Read-only by default (--permission-mode plan); tunable via GADFLY_CLAUDE_*. - auto-select + delegate worker are majordomo-only and are skipped with this engine (Claude Code does its own legwork). - Dockerfile bundles Node + @anthropic-ai/claude-code (larger image). - Docs: README "Claude Code engine" section + config rows, examples/ claude-code.yml stub, examples/README + CLAUDE.md updated. Honest note that subscription-auth-in-CI is untested here / a ToS gray area. - Bumps the dogfood image pin to :sha-c3d09d3 so gadfly's own PRs now review with the live status board from Phase 3. New engine_test.go covers spec detection, model derivation, and argv building (no live CLI call). gofmt clean, go vet quiet, go test -race green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
211 lines
9.4 KiB
Bash
211 lines
9.4 KiB
Bash
#!/usr/bin/env bash
|
|
# Adversarial PR review runner.
|
|
#
|
|
# Fetches a PR's unified diff + metadata from Gitea, asks ONE model to review it
|
|
# adversarially, then upserts the result as a single labeled PR comment (so
|
|
# re-runs on new commits update the comment in place instead of stacking dupes).
|
|
#
|
|
# The ollama lane is AGENTIC: it runs the cmd/gadfly Go binary, which drives a
|
|
# tool-using agent (majordomo + Ollama Cloud) over the PR's checked-out repo so
|
|
# the model can read_file/grep/etc. to VERIFY findings instead of guessing from
|
|
# the diff alone. The antigravity lane stays a one-shot `agy` call (agy has its
|
|
# own file tools).
|
|
#
|
|
# Required env:
|
|
# GITEA_API e.g. https://gitea.stevedudenhoeffer.com/api/v1/repos/steve/mort
|
|
# GITEA_TOKEN token with repo write access (posts the comment)
|
|
# PR pull request index/number
|
|
# PROVIDER "ollama" | "antigravity"
|
|
# MODEL model id (e.g. qwen3-coder:480b-cloud, gemini-3-pro)
|
|
#
|
|
# Provider-specific env:
|
|
# ollama: OLLAMA_CLOUD_API_KEY, GADFLY_BIN (path to the built reviewer),
|
|
# GADFLY_REPO_DIR (checked-out repo; default: this script's repo)
|
|
# antigravity: `agy` on PATH with credentials already seeded (~/.gemini)
|
|
#
|
|
# claude-code engine: when MODEL is "claude-code" or "claude-code/<model>" the
|
|
# binary shells out to the bundled `claude` CLI instead of a majordomo model.
|
|
# Its auth (CLAUDE_CODE_OAUTH_TOKEN, else ANTHROPIC_API_KEY) and GADFLY_CLAUDE_*
|
|
# tuning are read straight from the inherited environment — same as the other
|
|
# provider keys (OPENAI_API_KEY, …) — so no extra wiring is needed here.
|
|
#
|
|
# Optional:
|
|
# MAX_DIFF_CHARS diff truncation cap for the prompt (default 60000)
|
|
# GADFLY_STATUS_FILE per-model JSON path for the live status board (set by
|
|
# entrypoint.sh; empty/unset disables status publishing)
|
|
#
|
|
# This script is advisory: it never fails the job for review content. It exits
|
|
# non-zero only on a usage/configuration error.
|
|
set -uo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
MAX_DIFF_CHARS="${MAX_DIFF_CHARS:-60000}"
|
|
|
|
: "${GITEA_API:?GITEA_API required}"
|
|
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
|
: "${PR:?PR required}"
|
|
: "${PROVIDER:?PROVIDER required}"
|
|
: "${MODEL:?MODEL required}"
|
|
|
|
MARKER="<!-- gadfly-review:${PROVIDER}:${MODEL} -->"
|
|
say() { echo "[gadfly-review:${PROVIDER}:${MODEL}] $*" >&2; }
|
|
|
|
# Display the model's ACTUAL backend: the provider segment of the spec
|
|
# ("m1pro/qwen3.6:35b-mlx" -> "m1pro"); a bare id uses GADFLY_PROVIDER (default
|
|
# ollama-cloud). This is what the comment header shows, not the run.sh lane.
|
|
case "$MODEL" in
|
|
*/*) MODEL_PROVIDER="${MODEL%%/*}" ;;
|
|
*) MODEL_PROVIDER="${GADFLY_PROVIDER:-ollama-cloud}" ;;
|
|
esac
|
|
|
|
# jq is required for payload building / response parsing; install if missing.
|
|
if ! command -v jq >/dev/null 2>&1; then
|
|
say "jq not found; attempting install"
|
|
{ apt-get update -qq && apt-get install -y -qq jq; } >/dev/null 2>&1 \
|
|
|| { sudo apt-get update -qq && sudo apt-get install -y -qq jq; } >/dev/null 2>&1 \
|
|
|| { say "could not install jq"; exit 1; }
|
|
fi
|
|
|
|
# curl timeouts: Gitea API calls are quick. Word-split on purpose so the flags
|
|
# expand as separate args. (The LLM call's own deadline lives in the reviewer
|
|
# binary / agy, not here.)
|
|
API_TIMEOUT="--connect-timeout 20 --max-time 30"
|
|
|
|
# upsert_comment BODY — create or update (by MARKER) this model's single comment.
|
|
upsert_comment() {
|
|
local body="$1" post_body existing_id page=1 cmts
|
|
post_body="$(jq -n --arg b "$body" '{body:$b}')"
|
|
existing_id=""
|
|
while [ "$page" -le 10 ]; do
|
|
cmts="$(curl $API_TIMEOUT -fsS -H "Authorization: token ${GITEA_TOKEN}" \
|
|
"${GITEA_API}/issues/${PR}/comments?limit=50&page=${page}" || echo '[]')"
|
|
[ "$(echo "$cmts" | jq 'length')" = "0" ] && break
|
|
existing_id="$(echo "$cmts" | jq -r --arg m "$MARKER" \
|
|
'.[] | select(.body != null and (.body | startswith($m))) | .id' | head -n1)"
|
|
[ -n "$existing_id" ] && break
|
|
page=$((page+1))
|
|
done
|
|
if [ -n "$existing_id" ]; then
|
|
curl $API_TIMEOUT -sS -X PATCH -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \
|
|
"${GITEA_API}/issues/comments/${existing_id}" -d "$post_body" >/dev/null
|
|
else
|
|
curl $API_TIMEOUT -sS -X POST -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \
|
|
"${GITEA_API}/issues/${PR}/comments" -d "$post_body" >/dev/null
|
|
fi
|
|
}
|
|
|
|
# fmt_duration SECONDS -> "1m 23s" / "45s"
|
|
fmt_duration() { if [ "$1" -ge 60 ]; then echo "$(($1/60))m $(($1%60))s"; else echo "$1s"; fi; }
|
|
|
|
# --- fetch PR context -------------------------------------------------------
|
|
say "fetching PR #${PR} context"
|
|
DIFF="$(curl $API_TIMEOUT -fsS -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_API}/pulls/${PR}.diff" || true)"
|
|
META="$(curl $API_TIMEOUT -fsS -H "Authorization: token ${GITEA_TOKEN}" "${GITEA_API}/pulls/${PR}" || echo '{}')"
|
|
TITLE="$(echo "$META" | jq -r '.title // ""')"
|
|
BODY="$(echo "$META" | jq -r '.body // ""')"
|
|
|
|
if [ -z "$DIFF" ]; then
|
|
say "empty diff; nothing to review"
|
|
exit 0
|
|
fi
|
|
|
|
# Keep the FULL diff for the agentic (ollama) reviewer — it can pull the whole
|
|
# thing via the get_diff tool and embeds a truncated copy in the prompt itself.
|
|
# The truncated copy below is only for the one-shot antigravity prompt.
|
|
FULL_DIFF="$DIFF"
|
|
TRUNC_NOTE=""
|
|
if [ "${#DIFF}" -gt "$MAX_DIFF_CHARS" ]; then
|
|
DIFF="${DIFF:0:$MAX_DIFF_CHARS}"
|
|
TRUNC_NOTE=$'\n\n[NOTE: diff truncated to '"${MAX_DIFF_CHARS}"' chars for length; review the rest manually.]'
|
|
fi
|
|
|
|
SYS="$(cat "${SCRIPT_DIR}/system-prompt.txt")"
|
|
USR="$(printf 'PR #%s: %s\n\nDescription:\n%s\n\nUnified diff to review:\n```diff\n%s\n```%s' \
|
|
"$PR" "$TITLE" "$BODY" "$DIFF" "$TRUNC_NOTE")"
|
|
|
|
# --- announce start (placeholder comment) -----------------------------------
|
|
START_TS="$(date +%s)"
|
|
say "starting review with ${MODEL}"
|
|
upsert_comment "$(printf '%s\n### 🪰 Gadfly review — `%s` (%s)\n\n⏳ Reviewing… this comment will update with findings and run time.' \
|
|
"$MARKER" "$MODEL" "$MODEL_PROVIDER")"
|
|
|
|
# --- call the model ---------------------------------------------------------
|
|
REVIEW=""
|
|
case "$PROVIDER" in
|
|
ollama)
|
|
# Agentic lane: hand off to the cmd/gadfly binary, which runs a tool-using
|
|
# agent over the checked-out repo so it can verify findings instead of
|
|
# guessing from the diff. The reviewer is majordomo-powered, so GADFLY_PROVIDER
|
|
# selects the backend (default ollama-cloud); local Ollama, OpenAI, Anthropic,
|
|
# Google and OpenAI/Ollama-compatible endpoints all work — see the README.
|
|
|
|
# Back-compat: map the consumer's OLLAMA_CLOUD_API_KEY secret onto the
|
|
# OLLAMA_API_KEY env the ollama-cloud provider reads.
|
|
if [ -n "${OLLAMA_CLOUD_API_KEY:-}" ] && [ -z "${OLLAMA_API_KEY:-}" ]; then
|
|
export OLLAMA_API_KEY="$OLLAMA_CLOUD_API_KEY"
|
|
fi
|
|
GADFLY_PROVIDER_EFF="$MODEL_PROVIDER"
|
|
|
|
# Only the default cloud provider strictly needs a key up front; local Ollama
|
|
# and other providers either need none or read their own standard env var.
|
|
if [ "$GADFLY_PROVIDER_EFF" = "ollama-cloud" ] && [ -z "${OLLAMA_API_KEY:-}" ] && [ -z "${GADFLY_API_KEY:-}" ]; then
|
|
REVIEW="⚠️ No Ollama Cloud key configured (set \`OLLAMA_CLOUD_API_KEY\`) and \`GADFLY_PROVIDER\` is the default \`ollama-cloud\`; this reviewer was skipped."
|
|
else
|
|
BIN="${GADFLY_BIN:-gadfly}"
|
|
if ! command -v "$BIN" >/dev/null 2>&1 && [ ! -x "$BIN" ]; then
|
|
REVIEW="⚠️ Agentic reviewer binary not found (\`GADFLY_BIN=${BIN}\`); the workflow build step may have failed."
|
|
else
|
|
REPO_DIR="${GADFLY_REPO_DIR:-$(cd "${SCRIPT_DIR}/../../.." && pwd)}"
|
|
DIFF_FILE="$(mktemp)"
|
|
ERR_FILE="${DIFF_FILE}.err"
|
|
printf '%s' "$FULL_DIFF" > "$DIFF_FILE"
|
|
# GADFLY_PROVIDER / GADFLY_BASE_URL / GADFLY_API_KEY and provider key
|
|
# envs (OPENAI_API_KEY, …) are inherited from the process environment.
|
|
REVIEW="$(
|
|
GADFLY_MODEL="$MODEL" \
|
|
GADFLY_REPO_DIR="$REPO_DIR" \
|
|
GADFLY_DIFF_FILE="$DIFF_FILE" \
|
|
GADFLY_SYSTEM_FILE="${SCRIPT_DIR}/system-prompt.txt" \
|
|
GADFLY_TITLE="$TITLE" \
|
|
GADFLY_BODY="$BODY" \
|
|
GADFLY_MAX_DIFF_CHARS="$MAX_DIFF_CHARS" \
|
|
GADFLY_STATUS_FILE="${GADFLY_STATUS_FILE:-}" \
|
|
"$BIN" 2>"$ERR_FILE"
|
|
)"
|
|
rc=$?
|
|
if [ "$rc" -ne 0 ] || [ -z "$REVIEW" ]; then
|
|
REVIEW="⚠️ Agentic reviewer for \`${MODEL}\` failed (exit ${rc}):
|
|
\`\`\`
|
|
$(tail -c 1500 "$ERR_FILE" 2>/dev/null)
|
|
\`\`\`"
|
|
fi
|
|
rm -f "$DIFF_FILE" "$ERR_FILE"
|
|
fi
|
|
fi
|
|
;;
|
|
antigravity)
|
|
if ! command -v agy >/dev/null 2>&1; then
|
|
REVIEW="⚠️ Antigravity CLI (\`agy\`) not found on PATH."
|
|
else
|
|
FULL="$(printf '%s\n\n%s' "$SYS" "$USR")"
|
|
if ! REVIEW="$(agy -p "$FULL" --model "$MODEL" 2>agy.err)"; then
|
|
REVIEW="⚠️ Antigravity CLI failed:
|
|
\`\`\`
|
|
$(tail -c 1500 agy.err 2>/dev/null)
|
|
\`\`\`"
|
|
fi
|
|
[ -z "$REVIEW" ] && REVIEW="⚠️ Antigravity CLI returned no output (auth/quota?)."
|
|
fi
|
|
;;
|
|
*)
|
|
say "unknown provider: ${PROVIDER}"; exit 1 ;;
|
|
esac
|
|
|
|
# --- assemble + post final comment (with run time) --------------------------
|
|
ELAPSED="$(( $(date +%s) - START_TS ))"
|
|
DUR="$(fmt_duration "$ELAPSED")"
|
|
COMMENT="$(printf '%s\n### 🪰 Gadfly review — `%s` (%s)\n\n%s\n\n<sub>Automated adversarial review by Gadfly. Advisory only — does not block merge. · ⏱️ reviewed in %s</sub>' \
|
|
"$MARKER" "$MODEL" "$MODEL_PROVIDER" "$REVIEW" "$DUR")"
|
|
upsert_comment "$COMMENT"
|
|
say "done in ${DUR}"
|