From 7bc3c982fa7b72367034c673f7812bf05e9c503e Mon Sep 17 00:00:00 2001 From: steve Date: Sun, 28 Jun 2026 06:00:18 +0000 Subject: [PATCH] feat(reusable): runtime-variable swarm config (cache-immune, no more re-pinning to retune) (#14) --- .gitea/workflows/review-reusable.yml | 84 ++++++++++++++-------------- CLAUDE.md | 13 +++-- README.md | 29 ++++++++-- 3 files changed, 74 insertions(+), 52 deletions(-) diff --git a/.gitea/workflows/review-reusable.yml b/.gitea/workflows/review-reusable.yml index c4c4e25..bd5c273 100644 --- a/.gitea/workflows/review-reusable.yml +++ b/.gitea/workflows/review-reusable.yml @@ -12,12 +12,12 @@ # CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} # with: { allowed_users: "..." } # config inputs are optional (see below) # -# Inputs ship the DEFAULT swarm (see the inputs block): 3 cloud models + the -# Claude Code engine + a local 4090 Ti (qwen3.6-27b via llama-swap), 5-lens suite -# (3 claude models concurrent / 5 lenses each; the 4090 Ti runs 1 model × 1 lens). A consumer -# inherits it by omitting `with:` entirely, or overrides any field (e.g. -# `models:` for a cloud-only / different-provider setup; "" falls back to the -# image's built-in default). Secrets are DECLARED below (workflow_call.secrets) so a +# The swarm config (models, specialists, concurrency) is resolved at RUNTIME from, +# in order: a consumer's `with:` input → the owner's user-scope GADFLY_DEFAULT_* +# variable → the image's built-in default. Because variables are injected per-run +# (not part of this cached file), the owner retunes the whole fleet by editing ONE +# variable — see the inputs block and README "Central config via variables". +# Secrets are DECLARED below (workflow_call.secrets) so a # caller forwards only the credentials the reviewer actually uses — least # privilege — rather than `secrets: inherit`, which leaks every caller secret # (registry/deploy/db creds) into this workflow. `secrets: inherit` still works @@ -34,27 +34,27 @@ name: Gadfly review (reusable) on: workflow_call: - # Inputs ship the DEFAULT Gadfly swarm so a consumer can just call this - # workflow (no `with:` block) and inherit it. The default is opinionated — - # 3 strong cloud models + the Claude Code engine (sonnet/opus/opus:max) + a - # local 4090 Ti (qwen3.6-27b via llama-swap at GADFLY_ENDPOINT_RAGNAROS), the - # 5-lens suite, with all 3 claude models concurrent and each running its 5 - # lenses at once (the 4090 Ti runs 1 model × 1 lens — a single local GPU). It - # needs OLLAMA_CLOUD_API_KEY and CLAUDE_CODE_OAUTH_TOKEN; a consumer - # with only one (or a different provider) overrides `models:` (and forwards - # just the secrets it uses). Set any input to "" to fall back to the - # image/entrypoint built-in default. + # The swarm config (models, specialists, concurrency) is resolved at RUNTIME, + # in priority order: a consumer's explicit `with:` input → the owner's + # user/org-level variable (GADFLY_DEFAULT_*) → the image's built-in default. + # Variables are injected per-run by Gitea (not baked into this file), so the + # owner can retune the whole fleet by editing ONE variable — it propagates even + # though long-lived act_runners CACHE this workflow file by ref (a moved tag is + # NOT re-fetched; only a runtime value or a fresh @ bypasses the cache). # - # Peak claude concurrency = provider_concurrency × provider_lens_concurrency - # (3 models × 5 lenses = up to 15 concurrent `claude -p` per pass). If you hit - # subscription rate limits or runner load, dial claude-code down in either knob. + # Owner-set user-scope variables (see README "Central config via variables"): + # GADFLY_DEFAULT_MODELS, GADFLY_DEFAULT_SPECIALISTS, + # GADFLY_DEFAULT_PROVIDER_CONCURRENCY, GADFLY_DEFAULT_PROVIDER_LENS_CONCURRENCY, + # GADFLY_ENDPOINT_RAGNAROS (the 4090 Ti endpoint). + # An unset variable + no input → the image default (one model, default suite), + # so a public consumer with neither still gets a sane minimal review. inputs: - models: { type: string, default: "minimax-m3:cloud,glm-5.2:cloud,deepseek-v4-pro:cloud,claude-code/sonnet,claude-code/opus,claude-code/opus:max,ragnaros/qwen3.6-27b" } # GADFLY_MODELS (csv); ragnaros/* = the 4090 Ti via llama-swap (see GADFLY_ENDPOINT_RAGNAROS) - specialists: { type: string, default: "security,correctness,maintainability,performance,error-handling" } # GADFLY_SPECIALISTS (5-lens default suite) + models: { type: string, default: "" } # GADFLY_MODELS — empty falls back to user var GADFLY_DEFAULT_MODELS, then the image default + specialists: { type: string, default: "" } # GADFLY_SPECIALISTS — empty falls back to user var GADFLY_DEFAULT_SPECIALISTS provider: { type: string, default: "" } # GADFLY_PROVIDER base_url: { type: string, default: "" } # GADFLY_BASE_URL - provider_concurrency: { type: string, default: "ollama-cloud=3,claude-code=3,ragnaros=1" } # GADFLY_PROVIDER_CONCURRENCY (claude all 3 at once; ragnaros 4090 Ti one model at a time) - provider_lens_concurrency: { type: string, default: "ollama-cloud=3,claude-code=5,ragnaros=1" } # GADFLY_PROVIDER_LENS_CONCURRENCY (claude 5 lenses at once; ragnaros 1 lens at a time) + provider_concurrency: { type: string, default: "" } # GADFLY_PROVIDER_CONCURRENCY — empty falls back to user var GADFLY_DEFAULT_PROVIDER_CONCURRENCY + provider_lens_concurrency: { type: string, default: "" } # GADFLY_PROVIDER_LENS_CONCURRENCY — empty falls back to user var GADFLY_DEFAULT_PROVIDER_LENS_CONCURRENCY timeout_secs: { type: string, default: "600" } # GADFLY_TIMEOUT_SECS (per lens) max_steps: { type: string, default: "14" } # GADFLY_MAX_STEPS worker_model: { type: string, default: "" } # GADFLY_WORKER_MODEL @@ -68,8 +68,8 @@ on: # workflow every secret in the caller's repo (registry/deploy/db creds the # review never touches). All optional — an unset/unpassed secret resolves to # empty, harmless for the providers a given consumer doesn't use. GITEA_TOKEN - # is the automatic job token and need not be declared/forwarded. Consumers - # with bespoke GADFLY_ENDPOINT_s beyond M1/M5 need the full stub. + # is the automatic job token and need not be declared/forwarded. Named + # endpoints (GADFLY_ENDPOINT_*) come from user/org VARS now, not secrets. secrets: OLLAMA_CLOUD_API_KEY: { required: false } OPENAI_API_KEY: { required: false } @@ -77,8 +77,6 @@ on: GOOGLE_API_KEY: { required: false } GADFLY_API_KEY: { required: false } CLAUDE_CODE_OAUTH_TOKEN: { required: false } - GADFLY_ENDPOINT_M1: { required: false } - GADFLY_ENDPOINT_M5: { required: false } GADFLY_FINDINGS_URL: { required: false } GADFLY_FINDINGS_TOKEN: { required: false } @@ -117,27 +115,29 @@ jobs: GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} GADFLY_API_KEY: ${{ secrets.GADFLY_API_KEY }} CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - # Common named foreman/LAN endpoints (optional). Consumers with other - # GADFLY_ENDPOINT_s need the full stub (examples/), since a - # reusable workflow can't enumerate arbitrary names. - GADFLY_ENDPOINT_M1: ${{ secrets.GADFLY_ENDPOINT_M1 }} - GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }} - # ragnaros = the 4090 Ti, reached over the LAN through its llama-swap - # proxy (lazy-loads models on demand). Plain https URL, no credential — - # set here so the default `ragnaros/qwen3.6-27b` model resolves for all - # consumers. Registers provider "ragnaros". NB: use the un-hyphenated - # `llamaswap` provider spelling — the pinned image accepts that form. - GADFLY_ENDPOINT_RAGNAROS: "llamaswap|https://llama-swap.ragnaros.dudenhoeffer.casa" + # Named LAN endpoints, defined in user/org vars (format + # "|[|]"). Adding a NEW name still needs a line + # here — a reusable workflow can't enumerate arbitrary vars.GADFLY_ENDPOINT_*. + # NB: vars are NOT masked like secrets — if an endpoint embeds an auth + # token, keep that one a secret instead. + GADFLY_ENDPOINT_M1: ${{ vars.GADFLY_ENDPOINT_M1 }} + GADFLY_ENDPOINT_M5: ${{ vars.GADFLY_ENDPOINT_M5 }} + # ragnaros = the 4090 Ti via its llama-swap proxy. Defined in the user + # var GADFLY_ENDPOINT_RAGNAROS (format "|[|]") + # so the URL can change without editing this file; the matching model is + # ragnaros/qwen3.6-27b in GADFLY_DEFAULT_MODELS. NB: use the un-hyphenated + # `llamaswap` provider spelling in the var — the pinned image needs it. + GADFLY_ENDPOINT_RAGNAROS: ${{ vars.GADFLY_ENDPOINT_RAGNAROS }} # --- findings telemetry (optional) -------------------------------- GADFLY_FINDINGS_URL: ${{ secrets.GADFLY_FINDINGS_URL }} GADFLY_FINDINGS_TOKEN: ${{ secrets.GADFLY_FINDINGS_TOKEN }} # --- config (from inputs; empty => image default) ----------------- - GADFLY_MODELS: ${{ inputs.models }} - GADFLY_SPECIALISTS: ${{ inputs.specialists }} + GADFLY_MODELS: ${{ inputs.models || vars.GADFLY_DEFAULT_MODELS }} + GADFLY_SPECIALISTS: ${{ inputs.specialists || vars.GADFLY_DEFAULT_SPECIALISTS }} GADFLY_PROVIDER: ${{ inputs.provider }} GADFLY_BASE_URL: ${{ inputs.base_url }} - GADFLY_PROVIDER_CONCURRENCY: ${{ inputs.provider_concurrency }} - GADFLY_PROVIDER_LENS_CONCURRENCY: ${{ inputs.provider_lens_concurrency }} + GADFLY_PROVIDER_CONCURRENCY: ${{ inputs.provider_concurrency || vars.GADFLY_DEFAULT_PROVIDER_CONCURRENCY }} + GADFLY_PROVIDER_LENS_CONCURRENCY: ${{ inputs.provider_lens_concurrency || vars.GADFLY_DEFAULT_PROVIDER_LENS_CONCURRENCY }} GADFLY_TIMEOUT_SECS: ${{ inputs.timeout_secs }} GADFLY_MAX_STEPS: ${{ inputs.max_steps }} GADFLY_WORKER_MODEL: ${{ inputs.worker_model }} diff --git a/CLAUDE.md b/CLAUDE.md index 7c5b1ae..b9d3d1a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -46,11 +46,14 @@ entrypoint.sh container brains: trigger gating, PR clone, model loop (t used to live in workflow YAML) Dockerfile multi-stage; private-module creds via BuildKit secrets never reach the final image .gitea/workflows/build-image.yml push main → :latest; tag v* → :+:latest; PR → build-only -.gitea/workflows/review-reusable.yml reusable (workflow_call) review job; ships the DEFAULT swarm as - input defaults (3 cloud + Claude Code sonnet/opus/opus:max + a local 4090 Ti - via llama-swap, 5-lens suite; 3 claude models concurrent / 5 lenses each, the - 4090 Ti 1 model × 1 lens) so consumers inherit it by omitting `with:`. Consumers subscribe - with an ~8-line caller forwarding only the secrets the reviewer needs (Phase 4); +.gitea/workflows/review-reusable.yml reusable (workflow_call) review job; resolves swarm config at + RUNTIME: consumer `with:` input → owner user-scope var (GADFLY_DEFAULT_MODELS / + _SPECIALISTS / _PROVIDER_CONCURRENCY / _PROVIDER_LENS_CONCURRENCY, + + GADFLY_ENDPOINT_RAGNAROS) → image default. Vars are injected per-run, so editing + one var retunes the whole fleet even though long-lived act_runners CACHE this file + by ref (a moved tag is NOT re-fetched — only a runtime value or a fresh @ + bypasses the cache). Consumers subscribe with an ~8-line caller forwarding only the + secrets the reviewer needs and pinned to an immutable @ (Phase 4); gadfly's own adversarial-review.yml is a thin caller of it (dogfoods the path). examples/ copy-paste consumer stub workflows for different providers ``` diff --git a/README.md b/README.md index be01c81..e27388b 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,8 @@ it. Drop one file in your repo and set a couple of secrets/vars: 1. Copy a stub from [`examples/`](examples/) to `.gitea/workflows/adversarial-review.yml` in your repo. Two flavors: the slim [`reusable.yml`](examples/reusable.yml) — a tiny caller of Gadfly's **reusable workflow** (`uses: steve/gadfly/.gitea/workflows/review-reusable.yml@…`, - forwarding only the secrets the reviewer needs), which ships a **default swarm** (3 cloud models + - the Claude Code engine + a local 4090 Ti via llama-swap, 5-lens suite) you inherit by omitting `with:` or override per-input — or the full self-contained + forwarding only the secrets the reviewer needs), whose **default swarm is set centrally via owner + variables** (see [Central config via variables](#central-config-via-variables)) and inherited by omitting `with:` — or the full self-contained [`adversarial-review.yml`](examples/adversarial-review.yml) (Ollama Cloud default, with inline notes for every provider / local Ollama / OpenAI-compatible / endpoint aliases). See the [examples index](examples/README.md). @@ -313,9 +313,28 @@ examples/ the ~15-line stub a consuming repo drops in The image is published to `gitea.stevedudenhoeffer.com/steve/gadfly`. Every push to `main` rebuilds and republishes `:latest` (plus `:sha-`); pushing a `v*` tag publishes that pinned version (plus `:latest`). Pin full-stub consumers to a `:vN` image tag for stability, or track -`:latest` to ride main. Reusable-workflow consumers likewise pin the workflow ref — -`review-reusable.yml@v1` (a curated tag moved on releases, so central swarm tuning propagates) or -a full `@` for an immutable pin; avoid `@main`. +`:latest` to ride main. **Reusable-workflow consumers should pin the workflow ref to an immutable +`review-reusable.yml@`** — long-lived act_runners *cache the workflow file by ref*, so a moved tag +(`@v1`) or `@main` is often **not** re-fetched and silently runs a stale copy. A fresh `@` is the +only reliable way to roll out a *structural* change to the reusable. + +### Central config via variables + +So you don't have to re-pin every consumer just to retune the swarm, the reusable resolves its config +at **runtime** — `with:` input → owner **user/org-level variable** → image default — and variables are +injected per-run (not part of the cached file), so changing one variable propagates to every consumer +on its next review **without** a re-pin or a tag move: + +| Variable (user/org scope) | Sets | +|---|---| +| `GADFLY_DEFAULT_MODELS` | `GADFLY_MODELS` (csv) | +| `GADFLY_DEFAULT_SPECIALISTS` | the lens suite | +| `GADFLY_DEFAULT_PROVIDER_CONCURRENCY` | models-at-once per provider | +| `GADFLY_DEFAULT_PROVIDER_LENS_CONCURRENCY` | lenses-at-once per provider | +| `GADFLY_ENDPOINT_RAGNAROS` | a named endpoint, e.g. `llamaswap\|https://host` | + +Adding a *new* named endpoint still needs a one-line reusable edit (Gitea can't auto-expose arbitrary +`vars.GADFLY_ENDPOINT_*`); the values of already-wired ones are pure variables. ## Configuration (advanced)