diff --git a/.gitea/workflows/adversarial-review.yml b/.gitea/workflows/adversarial-review.yml index 419d6d4..09c7378 100644 --- a/.gitea/workflows/adversarial-review.yml +++ b/.gitea/workflows/adversarial-review.yml @@ -4,8 +4,8 @@ # caches :latest, and this build is what carries foreman provider-type support) # as a specialist swarm and posts # ONE consolidated review comment as gitea-actions. Advisory only — never blocks a -# merge. Gadfly reviewing its OWN PRs — dogfooding, full fleet (3 cloud + the M1/M5 -# Macs), copied from mort's setup. +# merge. Gadfly reviewing its OWN PRs — dogfooding, full cloud fleet (9 cloud + +# the M5 Mac; M1 dropped as too slow), copied from mort's setup. name: Adversarial Review (Gadfly) @@ -41,8 +41,8 @@ jobs: || github.actor == 'fizi' || github.actor == 'dazed')) runs-on: ubuntu-latest - # Full fleet (3 cloud + 2 local Macs, all running concurrently) reviewing - # every PR with the 3-lens suite — the slow local lanes dominate wall time. + # Fleet (9 cloud + 1 local Mac/M5, all running concurrently) reviewing + # every PR with the 3-lens suite — the slow local lane dominates wall time. timeout-minutes: 90 steps: - uses: docker://gitea.stevedudenhoeffer.com/steve/gadfly:sha-d7f364d @@ -50,10 +50,12 @@ jobs: GITEA_API: ${{ github.server_url }}/api/v1/repos/${{ github.repository }} GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} OLLAMA_CLOUD_API_KEY: ${{ secrets.OLLAMA_CLOUD_API_KEY }} - # Local Macs, reached through their foreman queues (native Ollama on the + # Local Mac (M5), reached through its foreman queue (native Ollama on the # wire). Gadfly's GADFLY_ENDPOINT_* form with the "foreman" provider - # type: GADFLY_ENDPOINT_M1 registers provider "m1", _M5 registers "m5", - # each building a foreman-preset Ollama client at the given URL. Values + # type: GADFLY_ENDPOINT_M5 registers provider "m5", building a + # foreman-preset Ollama client at the given URL. (M1 is dropped from + # gadfly's swarm — too slow/low-signal — so its endpoint isn't mapped.) + # Values # (host + token) live in gitea secrets, each of the form: # foreman|https://| # (converted from the komodo LLM_* DSNs foreman://@). @@ -64,12 +66,16 @@ jobs: # NOTE: the Mac behind each foreman must still be awake/reachable; if a # box is offline, that model's comment shows an error and the others # still post. (Gitea secrets aren't auto-exposed — map each explicitly.) - GADFLY_ENDPOINT_M1: ${{ secrets.GADFLY_ENDPOINT_M1 }} GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }} - # 3 cloud (parallel) + M1 Pro + M5 Max — one consolidated comment each. - GADFLY_MODELS: "minimax-m3:cloud,deepseek-v4-flash:cloud,glm-5.2:cloud,m1/qwen3:14b,m5/qwen3.6:35b-mlx" - # cloud runs 3 at once; each Mac one at a time; all three lanes parallel. - GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m1=1,m5=1" + # Fleet: 9 cloud (3 at a time) + M5 Max — one consolidated comment each. + # Matches mort's cloud set so the model-quality scoreboard is comparable + # across both repos. NOTE: M1 Pro is intentionally dropped here (too slow + # / low-signal for gadfly's own PRs); mort still runs it. + GADFLY_MODELS: "minimax-m3:cloud,glm-5.2:cloud,glm-5.1:cloud,kimi-k2.7-code:cloud,deepseek-v4-pro:cloud,nemotron-3-super:cloud,gpt-oss:120b-cloud,qwen3-coder:480b-cloud,gemma4:cloud,m5/qwen3.6:35b-mlx" + # cloud runs 3 at once; the Mac one at a time; both lanes parallel. + GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m5=1" + # 3 cloud models x 3 lenses = 9 concurrent ollama-cloud queries (under the 10 budget). + GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3" # Default => the 3-lens suite (security, correctness, error-handling). # Set the repo var GADFLY_SPECIALISTS to override (csv / "all" / "auto"). GADFLY_SPECIALISTS: ${{ vars.GADFLY_SPECIALISTS || 'security,correctness,error-handling' }} diff --git a/CLAUDE.md b/CLAUDE.md index ce156e0..8d49c16 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -39,6 +39,7 @@ cmd/gadfly/ the reviewer binary — pure producer of review markdown recheck.go second-pass verification prompt + verdict recompute *_test.go sandbox, recheck, wrap-up, spec/endpoint-parse, specialist-resolution tests scripts/run.sh fetch PR diff+meta, run the binary, upsert ONE labeled PR comment +scripts/status-board.sh render+upsert ONE live status-board comment (per-model/per-lens progress) scripts/system-prompt.txt the reviewer persona + verification discipline (generic, not repo-specific) entrypoint.sh container brains: trigger gating, PR clone, model loop (the logic that used to live in workflow YAML) diff --git a/Dockerfile b/Dockerfile index a76891f..b2ae511 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,5 +28,5 @@ RUN apk add --no-cache bash git curl jq ca-certificates COPY --from=build /out/gadfly /usr/local/bin/gadfly COPY scripts /app/scripts COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh /app/scripts/run.sh /usr/local/bin/gadfly +RUN chmod +x /entrypoint.sh /app/scripts/run.sh /app/scripts/status-board.sh /usr/local/bin/gadfly ENTRYPOINT ["/entrypoint.sh"] diff --git a/README.md b/README.md index b6f799b..78ffaba 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,30 @@ GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3,m1=1" GADFLY_SPECIALISTS: "security,correctness,error-handling" ``` +### Live status board + +When several models (each with several lenses) review a PR, the individual findings land in +**one comment per model** — but while that's in flight all you'd see is a row of +`⏳ Reviewing…` placeholders. So Gadfly also upserts **one consolidated status-board comment** +that aggregates every model's per-lens progress as it happens: + +``` +## 🪰 Gadfly — live review status +1/3 reviewers finished · updated 2026-06-27 18:14:56Z + +#### `glm-5.2:cloud` · ollama-cloud — ⏳ 2/4 lenses +- ✅ security — No material issues found +- 🔄 correctness — running +- ⏸️ performance — queued +… +``` + +Each model process publishes its lenses (queued → running → finished + verdict) to a small +JSON file, and a background renderer in `entrypoint.sh` re-renders + upserts the single comment +every `GADFLY_STATUS_POLL_SECS` (default 12s) until the swarm finishes. It's advisory and +best-effort — the per-model findings comments are unaffected — and entirely separate from those. +Turn it off with `GADFLY_STATUS_BOARD=0`. + ### Triggers 1. A **new/reopened/ready** non-draft PR — automatic. @@ -217,6 +241,7 @@ fixes. This keeps usage down.) ``` cmd/gadfly/ the agentic reviewer binary (majordomo + Ollama Cloud); zero deps beyond stdlib + majordomo scripts/run.sh fetches the PR diff, runs the reviewer, upserts one labeled comment +scripts/status-board.sh renders + upserts the single live status-board comment (per-lens progress) scripts/system-prompt.txt the reviewer persona + verification discipline entrypoint.sh the container brains: trigger gating, clone, model loop (logic lives here, not in YAML) Dockerfile multi-stage; build-time module creds (BuildKit secrets) never reach the final image @@ -252,6 +277,8 @@ The reviewer binary reads these (the stub/entrypoint set sane defaults): | `GADFLY_RECHECK` | on | set `0`/`false` to skip the recheck pass | | `GADFLY_RECHECK_MAX_STEPS` | 16 | recheck-pass step cap | | `GADFLY_MAX_DIFF_CHARS` | 60000 | diff chars embedded in the prompt (full diff via `get_diff`) | +| `GADFLY_STATUS_BOARD` | on | set `0` to disable the live status-board comment | +| `GADFLY_STATUS_POLL_SECS` | 12 | how often the status board re-renders/upserts | | `GADFLY_TRIGGER_PHRASE` | `@gadfly review` | comment phrase that re-triggers | | `GADFLY_ALLOWED_USERS` | *(collaborators)* | comma-separated allow-list for comment triggers | | `GADFLY_FINDINGS_URL` | — | gadfly-reports store base URL; set to enable findings telemetry (off when empty) | diff --git a/cmd/gadfly/main.go b/cmd/gadfly/main.go index 19eecb9..3718c56 100644 --- a/cmd/gadfly/main.go +++ b/cmd/gadfly/main.go @@ -218,6 +218,11 @@ func run() error { func runSpecialists(mdl llm.Model, fsTools *repoFS, base string, specialists []Specialist, task, diff string) []specialistResult { results := make([]specialistResult, len(specialists)) + // Optional live status board: publishes this model's per-lens progress to a + // file the entrypoint board renders. Inert (no-op) unless GADFLY_STATUS_FILE + // is set, so plain runs are unaffected. + sw := newStatusWriter(os.Getenv("GADFLY_MODEL"), modelProvider(), specialists) + conc := min(lensConcurrency(), len(specialists)) sem := make(chan struct{}, conc) @@ -228,8 +233,21 @@ func runSpecialists(mdl llm.Model, fsTools *repoFS, base string, specialists []S go func(i int, sp Specialist) { defer wg.Done() defer func() { <-sem }() + // A panic in one lens must not crash the whole binary (which would + // kill every other lens's output) or leave this lens stuck at + // "running" on the status board. Recover, record it as an errored + // result, and mark the lens finished so the board can complete. + defer func() { + if r := recover(); r != nil { + results[i] = specialistResult{spec: sp, out: fmt.Sprintf("⚠️ This reviewer panicked: %v", r), verdict: verdictUnknown, errored: true} + sw.set(sp.Name, lensFinished, "", true) + } + }() + sw.set(sp.Name, lensRunning, "", false) out, errored := reviewWithSpecialist(mdl, fsTools, base, sp, task, diff) - results[i] = specialistResult{spec: sp, out: out, verdict: parseVerdict(out), errored: errored} + v := parseVerdict(out) + results[i] = specialistResult{spec: sp, out: out, verdict: v, errored: errored} + sw.set(sp.Name, lensFinished, v.label(), errored) }(i, sp) } wg.Wait() diff --git a/cmd/gadfly/status.go b/cmd/gadfly/status.go new file mode 100644 index 0000000..8681672 --- /dev/null +++ b/cmd/gadfly/status.go @@ -0,0 +1,131 @@ +package main + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +// Lens states for the live status board. A lens starts queued, becomes running +// when its pass begins, and ends finished (with a verdict, or errored). +const ( + lensQueued = "queued" + lensRunning = "running" + lensFinished = "finished" +) + +// lensStatus is one specialist lens's progress, as rendered by the entrypoint +// status board (scripts/status-board.sh). +type lensStatus struct { + Name string `json:"name"` + State string `json:"state"` // queued | running | finished + Verdict string `json:"verdict,omitempty"` // set when finished (the lens's label) + Errored bool `json:"errored,omitempty"` // the lens failed to complete +} + +// modelStatus is the on-disk shape one model process publishes for the live +// status board: a snapshot of this model's lenses as they progress. The board +// reads every model's file and renders a single consolidated PR comment. +type modelStatus struct { + Model string `json:"model"` + Provider string `json:"provider"` + Started int64 `json:"started"` // unix seconds + Updated int64 `json:"updated"` // unix seconds, bumped on every change + Done bool `json:"done"` // all lenses finished + Lenses []lensStatus `json:"lenses"` +} + +// statusWriter maintains a model's status file as its lenses progress. It is +// purely opt-in: when GADFLY_STATUS_FILE is unset the writer's path is empty and +// every method is a no-op, so a plain run (and the unit tests) never touch the +// filesystem and behave exactly as before. Writes are atomic (temp file + +// rename within the same dir) so the board never reads a half-written file even +// though lenses can finish concurrently. +type statusWriter struct { + path string + mu sync.Mutex + st modelStatus +} + +// newStatusWriter seeds a writer with every lens queued and flushes the initial +// snapshot. model/provider are echoed into the file so the board can render +// them without re-deriving from the filename (which is sanitized). The status +// file path comes from GADFLY_STATUS_FILE (set by run.sh per model); when empty +// the writer is inert. +func newStatusWriter(model, provider string, specialists []Specialist) *statusWriter { + w := &statusWriter{path: strings.TrimSpace(os.Getenv("GADFLY_STATUS_FILE"))} + w.st = modelStatus{ + Model: model, + Provider: provider, + Started: time.Now().Unix(), + } + for _, sp := range specialists { + w.st.Lenses = append(w.st.Lenses, lensStatus{Name: sp.Name, State: lensQueued}) + } + w.flush() + return w +} + +// set transitions a lens to a new state (and verdict/errored when finished), +// recomputes the overall done flag, and atomically rewrites the file. Unknown +// lens names are ignored. Safe for concurrent callers (one goroutine per lens). +func (w *statusWriter) set(name, state, verdict string, errored bool) { + if w == nil || w.path == "" { + return + } + w.mu.Lock() + defer w.mu.Unlock() + for i := range w.st.Lenses { + if w.st.Lenses[i].Name == name { + w.st.Lenses[i].State = state + w.st.Lenses[i].Verdict = verdict + w.st.Lenses[i].Errored = errored + break + } + } + done := true + for _, l := range w.st.Lenses { + if l.State != lensFinished { + done = false + break + } + } + w.st.Done = done + w.flush() +} + +// flush writes the current snapshot atomically. Best-effort: any error is +// swallowed (the status board is advisory and must never affect the review). +func (w *statusWriter) flush() { + if w.path == "" { + return + } + w.st.Updated = time.Now().Unix() + data, err := json.MarshalIndent(&w.st, "", " ") + if err != nil { + return + } + dir := filepath.Dir(w.path) + tmp, err := os.CreateTemp(dir, ".status-*.tmp") + if err != nil { + return + } + tmpName := tmp.Name() + if _, err := tmp.Write(data); err != nil { + tmp.Close() + os.Remove(tmpName) + return + } + if err := tmp.Close(); err != nil { + os.Remove(tmpName) + return + } + // Rename is atomic within the same filesystem, so the board reader sees + // either the old complete file or the new complete file — never a partial. + if err := os.Rename(tmpName, w.path); err != nil { + os.Remove(tmpName) + } +} diff --git a/cmd/gadfly/status_test.go b/cmd/gadfly/status_test.go new file mode 100644 index 0000000..6e556bd --- /dev/null +++ b/cmd/gadfly/status_test.go @@ -0,0 +1,103 @@ +package main + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" +) + +// readStatus loads a modelStatus written by the statusWriter. +func readStatus(t *testing.T, path string) modelStatus { + t.Helper() + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read status file: %v", err) + } + var st modelStatus + if err := json.Unmarshal(data, &st); err != nil { + t.Fatalf("unmarshal status: %v", err) + } + return st +} + +func TestStatusWriterLifecycle(t *testing.T) { + path := filepath.Join(t.TempDir(), "glm.json") + t.Setenv("GADFLY_STATUS_FILE", path) + + specs := []Specialist{ + {Name: "security", Title: "Security"}, + {Name: "correctness", Title: "Correctness"}, + } + w := newStatusWriter("glm-5.2:cloud", "ollama-cloud", specs) + + // Initial snapshot: both lenses queued, model not done, metadata echoed. + st := readStatus(t, path) + if st.Model != "glm-5.2:cloud" || st.Provider != "ollama-cloud" { + t.Fatalf("model/provider not echoed: %+v", st) + } + if len(st.Lenses) != 2 { + t.Fatalf("want 2 lenses, got %d", len(st.Lenses)) + } + for _, l := range st.Lenses { + if l.State != lensQueued { + t.Fatalf("lens %q want queued, got %q", l.Name, l.State) + } + } + if st.Done { + t.Fatal("model marked done while lenses still queued") + } + if st.Started == 0 { + t.Fatal("started timestamp not set") + } + + // Transition one lens through running -> finished; model not yet done. + w.set("security", lensRunning, "", false) + if got := readStatus(t, path); got.Lenses[0].State != lensRunning { + t.Fatalf("security want running, got %q", got.Lenses[0].State) + } + w.set("security", lensFinished, "No material issues found", false) + st = readStatus(t, path) + if st.Lenses[0].State != lensFinished || st.Lenses[0].Verdict != "No material issues found" { + t.Fatalf("security finish not recorded: %+v", st.Lenses[0]) + } + if st.Done { + t.Fatal("model marked done with one lens still queued") + } + + // Finish the second lens (errored) -> model done. + w.set("correctness", lensFinished, "Reviewed", true) + st = readStatus(t, path) + if !st.Done { + t.Fatal("model should be done after all lenses finished") + } + if !st.Lenses[1].Errored { + t.Fatal("errored flag not recorded for correctness") + } + if st.Updated < st.Started { + t.Fatalf("updated (%d) should be >= started (%d)", st.Updated, st.Started) + } +} + +// With GADFLY_STATUS_FILE unset the writer is inert: no file, no panic. +func TestStatusWriterDisabled(t *testing.T) { + t.Setenv("GADFLY_STATUS_FILE", "") + w := newStatusWriter("m", "p", []Specialist{{Name: "security"}}) + w.set("security", lensFinished, "Minor issues", false) + // Nothing to assert beyond "did not panic / did not write" — a nil-safe set + // on the disabled writer is the contract. + if w.path != "" { + t.Fatalf("expected empty path when disabled, got %q", w.path) + } +} + +// set must ignore unknown lens names rather than panic or append. +func TestStatusWriterUnknownLens(t *testing.T) { + path := filepath.Join(t.TempDir(), "s.json") + t.Setenv("GADFLY_STATUS_FILE", path) + w := newStatusWriter("m", "p", []Specialist{{Name: "security"}}) + w.set("does-not-exist", lensRunning, "", false) + if st := readStatus(t, path); len(st.Lenses) != 1 || st.Lenses[0].State != lensQueued { + t.Fatalf("unknown lens mutated state: %+v", st.Lenses) + } +} diff --git a/entrypoint.sh b/entrypoint.sh index b25f358..23649b3 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -155,6 +155,12 @@ DEFAULT_CONC="${GADFLY_CONCURRENCY:-1}" provider_of() { case "$1" in */*) echo "${1%%/*}";; *) echo "${GADFLY_PROVIDER:-ollama-cloud}";; esac; } +# Per-model status file path for the live board. The model id can contain '/' +# and ':' (e.g. m1/qwen3:14b), so sanitize to a flat filename; the JSON inside +# carries the real model/provider, so this just needs to be unique per model. +STATUS_DIR="${WORKDIR}/status" +status_file_for() { echo "${STATUS_DIR}/$(echo "$1" | tr -c '[:alnum:]._-' '_').json"; } + provider_cap() { # provider -> concurrency (override map "p=N,...", else default) local p="$1" item k v IFS=',' read -ra _caps <<< "${GADFLY_PROVIDER_CONCURRENCY:-}" @@ -167,8 +173,19 @@ provider_cap() { # provider -> concurrency (override map "p=N,...", else default } review_one() { + local sf="" + [ "${GADFLY_STATUS_BOARD:-1}" != "0" ] && sf="$(status_file_for "$1")" PROVIDER=ollama MODEL="$1" GADFLY_BIN="/usr/local/bin/gadfly" GADFLY_REPO_DIR="$REPO_DIR" \ + GADFLY_STATUS_FILE="$sf" \ bash "${SCRIPTS_DIR}/run.sh" || log "model $1 failed (continuing)" + # If the binary never wrote real status (run.sh skipped it: empty diff, no key, + # binary missing), the pre-seed stays {started:0, done:false} and the board + # would show this model "waiting to start" forever and never reach N/N. Mark + # such a never-started file done so the board can complete. The binary stamps a + # nonzero `started`, so that reliably distinguishes "ran" from "skipped". + if [ -n "$sf" ] && [ -f "$sf" ] && [ "$(jq -r '.started // 0' "$sf" 2>/dev/null)" = "0" ]; then + tmp="$(jq '.done = true' "$sf" 2>/dev/null)" && printf '%s' "$tmp" > "$sf" + fi } # Normalize the model list (trim, drop blanks) into MODEL_LIST. @@ -197,10 +214,44 @@ run_lane() { # $1=provider: run its models, at most `cap` at a time wait } +# --- live status board (optional, default on) ------------------------------ +# Each model process publishes per-lens progress to STATUS_DIR/.json; a +# background renderer (status-board.sh) upserts ONE consolidated PR comment so +# progress across all models/lenses is visible at a glance — and a watcher can +# tell when the whole swarm is finished. Advisory/best-effort; the per-model +# findings still land in each model's own comment. Disable with +# GADFLY_STATUS_BOARD=0. +BOARD_PID="" +if [ "${GADFLY_STATUS_BOARD:-1}" != "0" ]; then + rm -rf "$STATUS_DIR"; mkdir -p "$STATUS_DIR" + # Pre-seed every model as queued so the board shows the full swarm from t=0, + # even models still waiting on their provider lane's concurrency cap. Each + # binary overwrites its own file with real per-lens detail once it starts. + for m in "${MODEL_LIST[@]}"; do + jq -n --arg model "$m" --arg provider "$(provider_of "$m")" \ + '{model:$model, provider:$provider, started:0, updated:0, done:false, lenses:[]}' \ + > "$(status_file_for "$m")" 2>/dev/null || true + done + GITEA_API="$GITEA_API" GITEA_TOKEN="$GITEA_TOKEN" PR="$PR" GADFLY_STATUS_DIR="$STATUS_DIR" \ + bash "${SCRIPTS_DIR}/status-board.sh" & + BOARD_PID=$! + log "status board started (pid ${BOARD_PID})" +fi + log "providers: ${PROVIDERS:-none}" -# Each provider lane runs in parallel; cap is enforced within each lane. +# Each provider lane runs in parallel; cap is enforced within each lane. Track +# the lane PIDs so we wait ONLY for the review work — not the status board, +# which intentionally runs until we signal it below. +LANE_PIDS=() for p in $PROVIDERS; do run_lane "$p" & + LANE_PIDS+=("$!") done -wait +[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}" + +# Reviews are done: signal the board to render the final state once and exit. +if [ -n "$BOARD_PID" ]; then + touch "${STATUS_DIR}/.done" 2>/dev/null || true + wait "$BOARD_PID" 2>/dev/null || true +fi log "done" diff --git a/examples/adversarial-review.yml b/examples/adversarial-review.yml index 66a2f0a..82befb7 100644 --- a/examples/adversarial-review.yml +++ b/examples/adversarial-review.yml @@ -65,6 +65,10 @@ jobs: # GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3,m1=1" # GADFLY_LENS_CONCURRENCY: ${{ vars.GADFLY_LENS_CONCURRENCY }} # GADFLY_PROVIDER_LENS_CONCURRENCY: ${{ vars.GADFLY_PROVIDER_LENS_CONCURRENCY }} + # Live status board (optional; ON by default): one consolidated comment + # showing every model's per-lens progress as it runs. Disable with + # GADFLY_STATUS_BOARD=0; tune the refresh with GADFLY_STATUS_POLL_SECS. + # GADFLY_STATUS_BOARD: ${{ vars.GADFLY_STATUS_BOARD }} # --- Models & providers (optional; default = Ollama Cloud) ---------- # Gadfly is majordomo-powered, so it can target other backends. Set a # provider for bare model ids; point at a different endpoint with a diff --git a/scripts/run.sh b/scripts/run.sh index 2cbb097..0394f9e 100644 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -24,7 +24,9 @@ # antigravity: `agy` on PATH with credentials already seeded (~/.gemini) # # Optional: -# MAX_DIFF_CHARS diff truncation cap for the prompt (default 60000) +# MAX_DIFF_CHARS diff truncation cap for the prompt (default 60000) +# GADFLY_STATUS_FILE per-model JSON path for the live status board (set by +# entrypoint.sh; empty/unset disables status publishing) # # This script is advisory: it never fails the job for review content. It exits # non-zero only on a usage/configuration error. @@ -161,6 +163,7 @@ case "$PROVIDER" in GADFLY_TITLE="$TITLE" \ GADFLY_BODY="$BODY" \ GADFLY_MAX_DIFF_CHARS="$MAX_DIFF_CHARS" \ + GADFLY_STATUS_FILE="${GADFLY_STATUS_FILE:-}" \ "$BIN" 2>"$ERR_FILE" )" rc=$? diff --git a/scripts/status-board.sh b/scripts/status-board.sh new file mode 100755 index 0000000..f207a1d --- /dev/null +++ b/scripts/status-board.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# Live status board for a Gadfly review. +# +# Each model process (the cmd/gadfly binary) publishes its per-lens progress to +# $GADFLY_STATUS_DIR/.json as lenses go queued -> running -> finished. +# This script polls that directory and upserts ONE consolidated PR comment that +# aggregates every model's per-lens status, so a human (or an agent watching the +# PR) can see the whole swarm's progress at a glance and know when it's done — +# instead of staring at N separate "⏳ Reviewing…" placeholders. +# +# It is advisory and best-effort: a failed render/post is logged and retried on +# the next tick; nothing here can fail the review or block a merge. It runs in +# the background from entrypoint.sh and exits once the $GADFLY_STATUS_DIR/.done +# sentinel appears (the entrypoint touches it after all model lanes finish), +# after one final render. +# +# Required env: +# GITEA_API https://HOST/api/v1/repos/OWNER/REPO +# GITEA_TOKEN token with repo write access (posts the comment) +# PR pull request number +# GADFLY_STATUS_DIR directory holding the per-model .json files +# Optional: +# GADFLY_STATUS_POLL_SECS render/upsert interval (default 12) +set -uo pipefail + +: "${GITEA_API:?GITEA_API required}" +: "${GITEA_TOKEN:?GITEA_TOKEN required}" +: "${PR:?PR required}" +: "${GADFLY_STATUS_DIR:?GADFLY_STATUS_DIR required}" + +POLL="${GADFLY_STATUS_POLL_SECS:-12}" +# Guard against a non-numeric poll interval: with `set -uo pipefail` (no set -e) +# a bad `sleep "$POLL"` would fail silently and the `while :` loop would spin, +# hammering the Gitea API. Coerce anything non-integer (or <1) back to 12. +case "$POLL" in ''|*[!0-9]*) POLL=12 ;; esac +[ "$POLL" -ge 1 ] 2>/dev/null || POLL=12 +DONE_FILE="${GADFLY_STATUS_DIR}/.done" +MARKER="" +API_TIMEOUT="--connect-timeout 20 --max-time 30" +BOARD_ID="" # cached comment id, so we PATCH in place instead of re-searching + +say() { echo "[gadfly-status-board] $*" >&2; } + +command -v jq >/dev/null 2>&1 || { say "jq not found; status board disabled"; exit 0; } + +# render_section FILE -> markdown for one model (its header + per-lens bullets). +# Reads the JSON the binary writes; tolerates a half-written/missing file by +# emitting nothing (jq exits non-zero -> caller skips it this tick). +render_section() { + jq -r ' + def icon(state; errored): + if state == "finished" then (if errored then "⚠️" else "✅" end) + elif state == "running" then "🔄" + else "⏸️" end; + def lensline: + "- " + icon(.state; (.errored // false)) + " **" + .name + "** — " + + ( if .state == "finished" then (if (.errored // false) then "could not complete" else (.verdict // "done") end) + elif .state == "running" then "running" + else "queued" end ); + ( [.lenses[] | select(.state == "finished")] | length ) as $fin + | ( .lenses | length ) as $tot + | ( if .done then "✅ done" + elif $tot == 0 then "⏳ waiting to start" + else "⏳ " + ($fin|tostring) + "/" + ($tot|tostring) + " lenses" end ) as $sum + | "#### `" + .model + "` · " + .provider + " — " + $sum + "\n" + + ( if $tot == 0 then "- ⏸️ _no lenses reported yet_" + else ([.lenses[] | lensline] | join("\n")) end ) + ' "$1" 2>/dev/null +} + +# render_body -> the full consolidated comment body (marker + header + sections). +render_body() { + local f sections="" total=0 done=0 ts + shopt -s nullglob + local files=("${GADFLY_STATUS_DIR}"/*.json) + shopt -u nullglob + for f in "${files[@]}"; do + local sec + sec="$(render_section "$f")" || continue + [ -z "$sec" ] && continue + total=$((total + 1)) + if [ "$(jq -r 'if .done then 1 else 0 end' "$f" 2>/dev/null)" = "1" ]; then + done=$((done + 1)) + fi + sections="${sections}${sec}"$'\n\n' + done + ts="$(date -u '+%Y-%m-%d %H:%M:%SZ')" + if [ "$total" -eq 0 ]; then + sections="_Waiting for reviewers to start…_"$'\n' + fi + printf '%s\n## 🪰 Gadfly — live review status\n\n%d/%d reviewers finished · updated %s\n\n%s\nLive status board. Findings are posted in each model'\''s own comment. Advisory only — does not block merge.' \ + "$MARKER" "$done" "$total" "$ts" "$sections" +} + +# find_existing -> id of the board comment if it already exists (paginate by +# marker). Used once, to recover the comment across container restarts. +find_existing() { + local page=1 cmts id + while [ "$page" -le 10 ]; do + cmts="$(curl $API_TIMEOUT -fsS -H "Authorization: token ${GITEA_TOKEN}" \ + "${GITEA_API}/issues/${PR}/comments?limit=50&page=${page}" 2>/dev/null || echo '[]')" + [ "$(echo "$cmts" | jq 'length' 2>/dev/null || echo 0)" = "0" ] && break + id="$(echo "$cmts" | jq -r --arg m "$MARKER" \ + '.[] | select(.body != null and (.body | startswith($m))) | .id' 2>/dev/null | head -n1)" + [ -n "$id" ] && { echo "$id"; return; } + page=$((page + 1)) + done + echo "" +} + +# upsert BODY — PATCH the cached/known board comment, else POST a new one and +# cache its id. A failed PATCH (e.g. comment deleted) clears the cache so the +# next tick re-discovers or re-creates it. +upsert() { + local body="$1" post_body resp + post_body="$(jq -n --arg b "$body" '{body:$b}')" + [ -z "$BOARD_ID" ] && BOARD_ID="$(find_existing)" + if [ -n "$BOARD_ID" ]; then + if ! curl $API_TIMEOUT -fsS -X PATCH -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \ + "${GITEA_API}/issues/comments/${BOARD_ID}" -d "$post_body" >/dev/null 2>&1; then + say "patch of comment ${BOARD_ID} failed; will re-discover" + BOARD_ID="" + fi + else + resp="$(curl $API_TIMEOUT -fsS -X POST -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \ + "${GITEA_API}/issues/${PR}/comments" -d "$post_body" 2>/dev/null || echo '{}')" + BOARD_ID="$(echo "$resp" | jq -r '.id // ""' 2>/dev/null)" + fi +} + +say "starting (poll ${POLL}s, dir ${GADFLY_STATUS_DIR})" +while :; do + upsert "$(render_body)" + [ -f "$DONE_FILE" ] && break + sleep "$POLL" +done +say "done"