From a5adc6f4d1674c992209efb4404de1931d12e025 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sat, 27 Jun 2026 14:31:48 -0400 Subject: [PATCH 1/3] ci: add Gadfly adversarial PR reviewer workflow Installs the standalone Gadfly agentic adversarial reviewer (advisory, never blocks merge), mirroring executus's setup on the latest pinned image (sha-d7f364d). Reviews majordomo PRs with the full fleet: 9 ollama-cloud models plus the M1/M5 Macs via foreman, each running the 3-lens suite (security, correctness, error-handling). Posts one consolidated comment per model. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitea/workflows/adversarial-review.yml | 92 +++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 .gitea/workflows/adversarial-review.yml diff --git a/.gitea/workflows/adversarial-review.yml b/.gitea/workflows/adversarial-review.yml new file mode 100644 index 0000000..6b04e80 --- /dev/null +++ b/.gitea/workflows/adversarial-review.yml @@ -0,0 +1,92 @@ +# Gadfly — agentic adversarial PR reviewer (https://gitea.stevedudenhoeffer.com/steve/gadfly). +# +# Runs the published Gadfly image (pinned to an immutable :sha- tag — act_runner +# caches :latest, and this build is what carries foreman provider-type support) +# as a specialist swarm and posts +# ONE consolidated review comment as gitea-actions. Advisory only — never blocks a +# merge. This reviews majordomo PRs with 9 ollama-cloud models + M1/M5 Macs +# (3-lens suite). Gadfly is a simple system — findings are advisory; always +# double-check before acting. + +name: Adversarial Review (Gadfly) + +on: + pull_request: + types: [opened, reopened, ready_for_review] + issue_comment: + types: [created] + workflow_dispatch: + inputs: + pr_number: + description: "PR number to review" + required: true + +permissions: + contents: read + issues: write + pull-requests: write + +concurrency: + group: gadfly-${{ github.event.issue.number || github.event.pull_request.number || github.event.inputs.pr_number }} + cancel-in-progress: true + +jobs: + review: + # Security: only trusted users may trigger a secret-bearing run via a PR + # comment (pull_request + workflow_dispatch are already trusted). Mirrors + # GADFLY_ALLOWED_USERS, the in-container belt-and-suspenders check. + if: >- + github.event_name != 'issue_comment' + || (github.event.issue.pull_request + && (github.actor == 'steve' + || github.actor == 'fizi' + || github.actor == 'dazed')) + runs-on: ubuntu-latest + # Full fleet: 9 cloud (lens fan-out) + M1/M5 Macs via foreman. The slow local + # lanes dominate wall time, so allow plenty of headroom. + timeout-minutes: 90 + steps: + - uses: docker://gitea.stevedudenhoeffer.com/steve/gadfly:sha-d7f364d + env: + GITEA_API: ${{ github.server_url }}/api/v1/repos/${{ github.repository }} + GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} + OLLAMA_CLOUD_API_KEY: ${{ secrets.OLLAMA_CLOUD_API_KEY }} + # Local Macs, reached through their foreman queues (native Ollama on the + # wire). GADFLY_ENDPOINT_M1 registers provider "m1", _M5 registers "m5", + # each a foreman-preset Ollama client at the secret's URL, of the form: + # foreman|https://| + # Needs an image with foreman provider-type support (this one). If a Mac + # is offline that model's comment shows an error and the others still post. + # (Gitea secrets aren't auto-exposed — map each explicitly.) + GADFLY_ENDPOINT_M1: ${{ secrets.GADFLY_ENDPOINT_M1 }} + GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }} + # Full fleet: 9 cloud + M1 Pro + M5 Max. Cloud concurrency lives in the + # LENSES: cloud models run a few at a time (ollama-cloud=3) with their 3 + # lenses concurrent (LENS ollama-cloud=3) so comments land sooner; each + # Mac runs one model, lenses serial (its foreman queue serializes anyway). + # All three provider lanes run parallel. + GADFLY_MODELS: "minimax-m3:cloud,glm-5.2:cloud,glm-5.1:cloud,kimi-k2.7-code:cloud,deepseek-v4-pro:cloud,nemotron-3-super:cloud,gpt-oss:120b-cloud,qwen3-coder:480b-cloud,gemma4:cloud,m1/qwen3:14b,m5/qwen3.6:35b-mlx" + GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m1=1,m5=1" + GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3" + # Default => the 3-lens suite (security, correctness, error-handling). + # Set the repo var GADFLY_SPECIALISTS to override (csv / "all" / "auto"). + GADFLY_SPECIALISTS: ${{ vars.GADFLY_SPECIALISTS || 'security,correctness,error-handling' }} + # Per-lens deadline + bounded steps so the slow local models stay sane. + GADFLY_TIMEOUT_SECS: "600" + GADFLY_MAX_STEPS: "14" + # Allow-list for the comment trigger (mirrors the job-level if: guard). + GADFLY_ALLOWED_USERS: "steve,fizi,dazed" + # --- findings telemetry: POST runs + findings to the gadfly-reports store --- + # Advisory & off unless GADFLY_FINDINGS_URL is set; failures only log to + # stderr and never affect the review. GADFLY_REPO / GADFLY_PR are derived + # in-container; the URL + token are user-scope secrets. + GADFLY_FINDINGS_URL: ${{ secrets.GADFLY_FINDINGS_URL }} + GADFLY_FINDINGS_TOKEN: ${{ secrets.GADFLY_FINDINGS_TOKEN }} + # --- event context (leave as-is) --- + EVENT_NAME: ${{ github.event_name }} + PR: ${{ github.event.pull_request.number || github.event.issue.number || github.event.inputs.pr_number }} + PR_BRANCH: ${{ github.head_ref }} + IS_DRAFT: ${{ github.event.pull_request.draft }} + COMMENT_BODY: ${{ github.event.comment.body }} + COMMENT_ID: ${{ github.event.comment.id }} + ACTOR: ${{ github.actor }} From 8dae9cc94108790dd27bb2cd2dccc80015f80f04 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sat, 27 Jun 2026 14:32:25 -0400 Subject: [PATCH 2/3] docs: document the Gadfly adversarial review loop in CLAUDE.md Records the PR workflow: push work to a PR (never straight to main), wait for Gadfly to finish and weigh its findings, then grade each finding back to the gadfly-reports MCP (record_finding_grade / list_findings / scoreboard) so the telemetry can measure whether each model earns its keep. Co-Authored-By: Claude Opus 4.8 (1M context) --- CLAUDE.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 104ddfb..5b57da7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -129,6 +129,27 @@ CI: `.gitea/workflows/ci.yaml` (Gitea Actions, mirrors foreman). README.md must match reality in the same commit that changes behavior — no aspirational docs; unbuilt features are marked pending in the matrix. +## Adversarial review loop (Gadfly) + +Ship work through PRs and let Gadfly review it before merge: + +- **Push to a PR, never straight to `main`.** Branch, push, open a PR. + `.gitea/workflows/adversarial-review.yml` runs Gadfly (the standalone + agentic adversarial reviewer) — a full fleet of 9 ollama-cloud models + + the M1/M5 Macs via foreman, each running the 3-lens suite (security, + correctness, error-handling). Advisory only; it never blocks the merge. +- **Wait for Gadfly to finish, then read its output.** Don't merge while the + review is still running. Each model posts one consolidated comment; weigh + every finding on its merits and fix the real ones (Gadfly is a simple + system — findings are advisory, so confirm before acting). +- **Grade the findings back to the Gadfly MCP.** For each finding, call + `mcp__gadfly__record_finding_grade`: `is_real=true` + a `severity` + (trivial|small|medium|high|critical) for a genuine problem, or + `is_real=false` for a false positive; add `notes`/`usefulness` when + useful. Use `mcp__gadfly__list_findings` (`only_ungraded=true`) to find + what still needs grading and `mcp__gadfly__scoreboard` for the per-model + rollup. This telemetry is how we measure whether each model earns its keep. + ## Out of scope (anti-creep) No persistent store (health is in-memory behind the registry), no From 43eb15575981bba50d6edbc7d5d34e1ca27dfca2 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sat, 27 Jun 2026 14:52:11 -0400 Subject: [PATCH 3/3] ci(gadfly): drop the M1 Mac from the review swarm M1 was consistently slow (26-29 min) for zero real findings, so pull it before this workflow ever fires. Leaves the 9 ollama-cloud models + the M5 Mac; removes GADFLY_ENDPOINT_M1 and the m1 concurrency entry. Mirrors the same change on executus. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitea/workflows/adversarial-review.yml | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.gitea/workflows/adversarial-review.yml b/.gitea/workflows/adversarial-review.yml index 6b04e80..1831136 100644 --- a/.gitea/workflows/adversarial-review.yml +++ b/.gitea/workflows/adversarial-review.yml @@ -4,7 +4,7 @@ # caches :latest, and this build is what carries foreman provider-type support) # as a specialist swarm and posts # ONE consolidated review comment as gitea-actions. Advisory only — never blocks a -# merge. This reviews majordomo PRs with 9 ollama-cloud models + M1/M5 Macs +# merge. This reviews majordomo PRs with 9 ollama-cloud models + the M5 Mac # (3-lens suite). Gadfly is a simple system — findings are advisory; always # double-check before acting. @@ -42,8 +42,9 @@ jobs: || github.actor == 'fizi' || github.actor == 'dazed')) runs-on: ubuntu-latest - # Full fleet: 9 cloud (lens fan-out) + M1/M5 Macs via foreman. The slow local - # lanes dominate wall time, so allow plenty of headroom. + # Fleet: 9 cloud (lens fan-out) + the M5 Mac via foreman. The slow local + # lane dominates wall time, so allow plenty of headroom. (M1 was dropped — + # consistently slow for zero real findings.) timeout-minutes: 90 steps: - uses: docker://gitea.stevedudenhoeffer.com/steve/gadfly:sha-d7f364d @@ -51,22 +52,21 @@ jobs: GITEA_API: ${{ github.server_url }}/api/v1/repos/${{ github.repository }} GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} OLLAMA_CLOUD_API_KEY: ${{ secrets.OLLAMA_CLOUD_API_KEY }} - # Local Macs, reached through their foreman queues (native Ollama on the - # wire). GADFLY_ENDPOINT_M1 registers provider "m1", _M5 registers "m5", - # each a foreman-preset Ollama client at the secret's URL, of the form: + # Local Mac, reached through its foreman queue (native Ollama on the + # wire). GADFLY_ENDPOINT_M5 registers provider "m5", a foreman-preset + # Ollama client at the secret's URL, of the form: # foreman|https://| - # Needs an image with foreman provider-type support (this one). If a Mac + # Needs an image with foreman provider-type support (this one). If the Mac # is offline that model's comment shows an error and the others still post. # (Gitea secrets aren't auto-exposed — map each explicitly.) - GADFLY_ENDPOINT_M1: ${{ secrets.GADFLY_ENDPOINT_M1 }} GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }} - # Full fleet: 9 cloud + M1 Pro + M5 Max. Cloud concurrency lives in the - # LENSES: cloud models run a few at a time (ollama-cloud=3) with their 3 - # lenses concurrent (LENS ollama-cloud=3) so comments land sooner; each - # Mac runs one model, lenses serial (its foreman queue serializes anyway). - # All three provider lanes run parallel. - GADFLY_MODELS: "minimax-m3:cloud,glm-5.2:cloud,glm-5.1:cloud,kimi-k2.7-code:cloud,deepseek-v4-pro:cloud,nemotron-3-super:cloud,gpt-oss:120b-cloud,qwen3-coder:480b-cloud,gemma4:cloud,m1/qwen3:14b,m5/qwen3.6:35b-mlx" - GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m1=1,m5=1" + # Fleet: 9 cloud + M5 Max. Cloud concurrency lives in the LENSES: cloud + # models run a few at a time (ollama-cloud=3) with their 3 lenses + # concurrent (LENS ollama-cloud=3) so comments land sooner; the Mac runs + # one model, lenses serial (its foreman queue serializes anyway). Both + # provider lanes run parallel. + GADFLY_MODELS: "minimax-m3:cloud,glm-5.2:cloud,glm-5.1:cloud,kimi-k2.7-code:cloud,deepseek-v4-pro:cloud,nemotron-3-super:cloud,gpt-oss:120b-cloud,qwen3-coder:480b-cloud,gemma4:cloud,m5/qwen3.6:35b-mlx" + GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m5=1" GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3" # Default => the 3-lens suite (security, correctness, error-handling). # Set the repo var GADFLY_SPECIALISTS to override (csv / "all" / "auto").