feat: live status-board comment — per-model/per-lens review progress #1
@@ -4,8 +4,8 @@
|
|||||||
# caches :latest, and this build is what carries foreman provider-type support)
|
# caches :latest, and this build is what carries foreman provider-type support)
|
||||||
# as a specialist swarm and posts
|
# as a specialist swarm and posts
|
||||||
# ONE consolidated review comment as gitea-actions. Advisory only — never blocks a
|
# ONE consolidated review comment as gitea-actions. Advisory only — never blocks a
|
||||||
# merge. Gadfly reviewing its OWN PRs — dogfooding, full fleet (3 cloud + the M1/M5
|
# merge. Gadfly reviewing its OWN PRs — dogfooding, full cloud fleet (9 cloud +
|
||||||
# Macs), copied from mort's setup.
|
# the M5 Mac; M1 dropped as too slow), copied from mort's setup.
|
||||||
|
|
||||||
name: Adversarial Review (Gadfly)
|
name: Adversarial Review (Gadfly)
|
||||||
|
|
||||||
@@ -41,8 +41,8 @@ jobs:
|
|||||||
|| github.actor == 'fizi'
|
|| github.actor == 'fizi'
|
||||||
|| github.actor == 'dazed'))
|
|| github.actor == 'dazed'))
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Full fleet (3 cloud + 2 local Macs, all running concurrently) reviewing
|
# Fleet (9 cloud + 1 local Mac/M5, all running concurrently) reviewing
|
||||||
# every PR with the 3-lens suite — the slow local lanes dominate wall time.
|
# every PR with the 3-lens suite — the slow local lane dominates wall time.
|
||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
steps:
|
steps:
|
||||||
- uses: docker://gitea.stevedudenhoeffer.com/steve/gadfly:sha-d7f364d
|
- uses: docker://gitea.stevedudenhoeffer.com/steve/gadfly:sha-d7f364d
|
||||||
@@ -50,10 +50,12 @@ jobs:
|
|||||||
GITEA_API: ${{ github.server_url }}/api/v1/repos/${{ github.repository }}
|
GITEA_API: ${{ github.server_url }}/api/v1/repos/${{ github.repository }}
|
||||||
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||||
OLLAMA_CLOUD_API_KEY: ${{ secrets.OLLAMA_CLOUD_API_KEY }}
|
OLLAMA_CLOUD_API_KEY: ${{ secrets.OLLAMA_CLOUD_API_KEY }}
|
||||||
# Local Macs, reached through their foreman queues (native Ollama on the
|
# Local Mac (M5), reached through its foreman queue (native Ollama on the
|
||||||
# wire). Gadfly's GADFLY_ENDPOINT_* form with the "foreman" provider
|
# wire). Gadfly's GADFLY_ENDPOINT_* form with the "foreman" provider
|
||||||
# type: GADFLY_ENDPOINT_M1 registers provider "m1", _M5 registers "m5",
|
# type: GADFLY_ENDPOINT_M5 registers provider "m5", building a
|
||||||
# each building a foreman-preset Ollama client at the given URL. Values
|
# foreman-preset Ollama client at the given URL. (M1 is dropped from
|
||||||
|
# gadfly's swarm — too slow/low-signal — so its endpoint isn't mapped.)
|
||||||
|
# Values
|
||||||
# (host + token) live in gitea secrets, each of the form:
|
# (host + token) live in gitea secrets, each of the form:
|
||||||
# foreman|https://<foreman-host>|<token>
|
# foreman|https://<foreman-host>|<token>
|
||||||
# (converted from the komodo LLM_* DSNs foreman://<token>@<host>).
|
# (converted from the komodo LLM_* DSNs foreman://<token>@<host>).
|
||||||
@@ -64,12 +66,16 @@ jobs:
|
|||||||
# NOTE: the Mac behind each foreman must still be awake/reachable; if a
|
# NOTE: the Mac behind each foreman must still be awake/reachable; if a
|
||||||
# box is offline, that model's comment shows an error and the others
|
# box is offline, that model's comment shows an error and the others
|
||||||
# still post. (Gitea secrets aren't auto-exposed — map each explicitly.)
|
# still post. (Gitea secrets aren't auto-exposed — map each explicitly.)
|
||||||
GADFLY_ENDPOINT_M1: ${{ secrets.GADFLY_ENDPOINT_M1 }}
|
|
||||||
GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }}
|
GADFLY_ENDPOINT_M5: ${{ secrets.GADFLY_ENDPOINT_M5 }}
|
||||||
# 3 cloud (parallel) + M1 Pro + M5 Max — one consolidated comment each.
|
# Fleet: 9 cloud (3 at a time) + M5 Max — one consolidated comment each.
|
||||||
GADFLY_MODELS: "minimax-m3:cloud,deepseek-v4-flash:cloud,glm-5.2:cloud,m1/qwen3:14b,m5/qwen3.6:35b-mlx"
|
# Matches mort's cloud set so the model-quality scoreboard is comparable
|
||||||
# cloud runs 3 at once; each Mac one at a time; all three lanes parallel.
|
# across both repos. NOTE: M1 Pro is intentionally dropped here (too slow
|
||||||
GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m1=1,m5=1"
|
# / low-signal for gadfly's own PRs); mort still runs it.
|
||||||
|
GADFLY_MODELS: "minimax-m3:cloud,glm-5.2:cloud,glm-5.1:cloud,kimi-k2.7-code:cloud,deepseek-v4-pro:cloud,nemotron-3-super:cloud,gpt-oss:120b-cloud,qwen3-coder:480b-cloud,gemma4:cloud,m5/qwen3.6:35b-mlx"
|
||||||
|
# cloud runs 3 at once; the Mac one at a time; both lanes parallel.
|
||||||
|
GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3,m5=1"
|
||||||
|
# 3 cloud models x 3 lenses = 9 concurrent ollama-cloud queries (under the 10 budget).
|
||||||
|
GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3"
|
||||||
# Default => the 3-lens suite (security, correctness, error-handling).
|
# Default => the 3-lens suite (security, correctness, error-handling).
|
||||||
# Set the repo var GADFLY_SPECIALISTS to override (csv / "all" / "auto").
|
# Set the repo var GADFLY_SPECIALISTS to override (csv / "all" / "auto").
|
||||||
GADFLY_SPECIALISTS: ${{ vars.GADFLY_SPECIALISTS || 'security,correctness,error-handling' }}
|
GADFLY_SPECIALISTS: ${{ vars.GADFLY_SPECIALISTS || 'security,correctness,error-handling' }}
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ cmd/gadfly/ the reviewer binary — pure producer of review markdown
|
|||||||
recheck.go second-pass verification prompt + verdict recompute
|
recheck.go second-pass verification prompt + verdict recompute
|
||||||
*_test.go sandbox, recheck, wrap-up, spec/endpoint-parse, specialist-resolution tests
|
*_test.go sandbox, recheck, wrap-up, spec/endpoint-parse, specialist-resolution tests
|
||||||
scripts/run.sh fetch PR diff+meta, run the binary, upsert ONE labeled PR comment
|
scripts/run.sh fetch PR diff+meta, run the binary, upsert ONE labeled PR comment
|
||||||
|
scripts/status-board.sh render+upsert ONE live status-board comment (per-model/per-lens progress)
|
||||||
scripts/system-prompt.txt the reviewer persona + verification discipline (generic, not repo-specific)
|
scripts/system-prompt.txt the reviewer persona + verification discipline (generic, not repo-specific)
|
||||||
entrypoint.sh container brains: trigger gating, PR clone, model loop (the logic that
|
entrypoint.sh container brains: trigger gating, PR clone, model loop (the logic that
|
||||||
used to live in workflow YAML)
|
used to live in workflow YAML)
|
||||||
|
|||||||
+1
-1
@@ -28,5 +28,5 @@ RUN apk add --no-cache bash git curl jq ca-certificates
|
|||||||
COPY --from=build /out/gadfly /usr/local/bin/gadfly
|
COPY --from=build /out/gadfly /usr/local/bin/gadfly
|
||||||
COPY scripts /app/scripts
|
COPY scripts /app/scripts
|
||||||
COPY entrypoint.sh /entrypoint.sh
|
COPY entrypoint.sh /entrypoint.sh
|
||||||
RUN chmod +x /entrypoint.sh /app/scripts/run.sh /usr/local/bin/gadfly
|
RUN chmod +x /entrypoint.sh /app/scripts/run.sh /app/scripts/status-board.sh /usr/local/bin/gadfly
|
||||||
ENTRYPOINT ["/entrypoint.sh"]
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
|
|||||||
@@ -192,6 +192,30 @@ GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3,m1=1"
|
|||||||
GADFLY_SPECIALISTS: "security,correctness,error-handling"
|
GADFLY_SPECIALISTS: "security,correctness,error-handling"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Live status board
|
||||||
|
|
||||||
|
When several models (each with several lenses) review a PR, the individual findings land in
|
||||||
|
**one comment per model** — but while that's in flight all you'd see is a row of
|
||||||
|
`⏳ Reviewing…` placeholders. So Gadfly also upserts **one consolidated status-board comment**
|
||||||
|
that aggregates every model's per-lens progress as it happens:
|
||||||
|
|
||||||
|
```
|
||||||
|
## 🪰 Gadfly — live review status
|
||||||
|
1/3 reviewers finished · updated 2026-06-27 18:14:56Z
|
||||||
|
|
||||||
|
#### `glm-5.2:cloud` · ollama-cloud — ⏳ 2/4 lenses
|
||||||
|
- ✅ security — No material issues found
|
||||||
|
- 🔄 correctness — running
|
||||||
|
- ⏸️ performance — queued
|
||||||
|
…
|
||||||
|
```
|
||||||
|
|
||||||
|
Each model process publishes its lenses (queued → running → finished + verdict) to a small
|
||||||
|
JSON file, and a background renderer in `entrypoint.sh` re-renders + upserts the single comment
|
||||||
|
every `GADFLY_STATUS_POLL_SECS` (default 12s) until the swarm finishes. It's advisory and
|
||||||
|
best-effort — the per-model findings comments are unaffected — and entirely separate from those.
|
||||||
|
Turn it off with `GADFLY_STATUS_BOARD=0`.
|
||||||
|
|
||||||
### Triggers
|
### Triggers
|
||||||
|
|
||||||
1. A **new/reopened/ready** non-draft PR — automatic.
|
1. A **new/reopened/ready** non-draft PR — automatic.
|
||||||
@@ -217,6 +241,7 @@ fixes. This keeps usage down.)
|
|||||||
```
|
```
|
||||||
cmd/gadfly/ the agentic reviewer binary (majordomo + Ollama Cloud); zero deps beyond stdlib + majordomo
|
cmd/gadfly/ the agentic reviewer binary (majordomo + Ollama Cloud); zero deps beyond stdlib + majordomo
|
||||||
scripts/run.sh fetches the PR diff, runs the reviewer, upserts one labeled comment
|
scripts/run.sh fetches the PR diff, runs the reviewer, upserts one labeled comment
|
||||||
|
scripts/status-board.sh renders + upserts the single live status-board comment (per-lens progress)
|
||||||
scripts/system-prompt.txt the reviewer persona + verification discipline
|
scripts/system-prompt.txt the reviewer persona + verification discipline
|
||||||
entrypoint.sh the container brains: trigger gating, clone, model loop (logic lives here, not in YAML)
|
entrypoint.sh the container brains: trigger gating, clone, model loop (logic lives here, not in YAML)
|
||||||
Dockerfile multi-stage; build-time module creds (BuildKit secrets) never reach the final image
|
Dockerfile multi-stage; build-time module creds (BuildKit secrets) never reach the final image
|
||||||
@@ -252,6 +277,8 @@ The reviewer binary reads these (the stub/entrypoint set sane defaults):
|
|||||||
| `GADFLY_RECHECK` | on | set `0`/`false` to skip the recheck pass |
|
| `GADFLY_RECHECK` | on | set `0`/`false` to skip the recheck pass |
|
||||||
| `GADFLY_RECHECK_MAX_STEPS` | 16 | recheck-pass step cap |
|
| `GADFLY_RECHECK_MAX_STEPS` | 16 | recheck-pass step cap |
|
||||||
| `GADFLY_MAX_DIFF_CHARS` | 60000 | diff chars embedded in the prompt (full diff via `get_diff`) |
|
| `GADFLY_MAX_DIFF_CHARS` | 60000 | diff chars embedded in the prompt (full diff via `get_diff`) |
|
||||||
|
| `GADFLY_STATUS_BOARD` | on | set `0` to disable the live status-board comment |
|
||||||
|
| `GADFLY_STATUS_POLL_SECS` | 12 | how often the status board re-renders/upserts |
|
||||||
| `GADFLY_TRIGGER_PHRASE` | `@gadfly review` | comment phrase that re-triggers |
|
| `GADFLY_TRIGGER_PHRASE` | `@gadfly review` | comment phrase that re-triggers |
|
||||||
| `GADFLY_ALLOWED_USERS` | *(collaborators)* | comma-separated allow-list for comment triggers |
|
| `GADFLY_ALLOWED_USERS` | *(collaborators)* | comma-separated allow-list for comment triggers |
|
||||||
| `GADFLY_FINDINGS_URL` | — | gadfly-reports store base URL; set to enable findings telemetry (off when empty) |
|
| `GADFLY_FINDINGS_URL` | — | gadfly-reports store base URL; set to enable findings telemetry (off when empty) |
|
||||||
|
|||||||
+19
-1
@@ -218,6 +218,11 @@ func run() error {
|
|||||||
func runSpecialists(mdl llm.Model, fsTools *repoFS, base string, specialists []Specialist, task, diff string) []specialistResult {
|
func runSpecialists(mdl llm.Model, fsTools *repoFS, base string, specialists []Specialist, task, diff string) []specialistResult {
|
||||||
results := make([]specialistResult, len(specialists))
|
results := make([]specialistResult, len(specialists))
|
||||||
|
|
||||||
|
// Optional live status board: publishes this model's per-lens progress to a
|
||||||
|
// file the entrypoint board renders. Inert (no-op) unless GADFLY_STATUS_FILE
|
||||||
|
// is set, so plain runs are unaffected.
|
||||||
|
sw := newStatusWriter(os.Getenv("GADFLY_MODEL"), modelProvider(), specialists)
|
||||||
|
|
||||||
conc := min(lensConcurrency(), len(specialists))
|
conc := min(lensConcurrency(), len(specialists))
|
||||||
|
|
||||||
sem := make(chan struct{}, conc)
|
sem := make(chan struct{}, conc)
|
||||||
@@ -228,8 +233,21 @@ func runSpecialists(mdl llm.Model, fsTools *repoFS, base string, specialists []S
|
|||||||
go func(i int, sp Specialist) {
|
go func(i int, sp Specialist) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
defer func() { <-sem }()
|
defer func() { <-sem }()
|
||||||
|
// A panic in one lens must not crash the whole binary (which would
|
||||||
|
// kill every other lens's output) or leave this lens stuck at
|
||||||
|
// "running" on the status board. Recover, record it as an errored
|
||||||
|
// result, and mark the lens finished so the board can complete.
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
results[i] = specialistResult{spec: sp, out: fmt.Sprintf("⚠️ This reviewer panicked: %v", r), verdict: verdictUnknown, errored: true}
|
||||||
|
sw.set(sp.Name, lensFinished, "", true)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
sw.set(sp.Name, lensRunning, "", false)
|
||||||
out, errored := reviewWithSpecialist(mdl, fsTools, base, sp, task, diff)
|
out, errored := reviewWithSpecialist(mdl, fsTools, base, sp, task, diff)
|
||||||
results[i] = specialistResult{spec: sp, out: out, verdict: parseVerdict(out), errored: errored}
|
v := parseVerdict(out)
|
||||||
|
results[i] = specialistResult{spec: sp, out: out, verdict: v, errored: errored}
|
||||||
|
sw.set(sp.Name, lensFinished, v.label(), errored)
|
||||||
}(i, sp)
|
}(i, sp)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|||||||
@@ -0,0 +1,131 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Lens states for the live status board. A lens starts queued, becomes running
|
||||||
|
// when its pass begins, and ends finished (with a verdict, or errored).
|
||||||
|
const (
|
||||||
|
lensQueued = "queued"
|
||||||
|
lensRunning = "running"
|
||||||
|
lensFinished = "finished"
|
||||||
|
)
|
||||||
|
|
||||||
|
// lensStatus is one specialist lens's progress, as rendered by the entrypoint
|
||||||
|
// status board (scripts/status-board.sh).
|
||||||
|
type lensStatus struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
State string `json:"state"` // queued | running | finished
|
||||||
|
Verdict string `json:"verdict,omitempty"` // set when finished (the lens's label)
|
||||||
|
Errored bool `json:"errored,omitempty"` // the lens failed to complete
|
||||||
|
}
|
||||||
|
|
||||||
|
// modelStatus is the on-disk shape one model process publishes for the live
|
||||||
|
// status board: a snapshot of this model's lenses as they progress. The board
|
||||||
|
// reads every model's file and renders a single consolidated PR comment.
|
||||||
|
type modelStatus struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Provider string `json:"provider"`
|
||||||
|
Started int64 `json:"started"` // unix seconds
|
||||||
|
Updated int64 `json:"updated"` // unix seconds, bumped on every change
|
||||||
|
Done bool `json:"done"` // all lenses finished
|
||||||
|
Lenses []lensStatus `json:"lenses"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// statusWriter maintains a model's status file as its lenses progress. It is
|
||||||
|
// purely opt-in: when GADFLY_STATUS_FILE is unset the writer's path is empty and
|
||||||
|
// every method is a no-op, so a plain run (and the unit tests) never touch the
|
||||||
|
// filesystem and behave exactly as before. Writes are atomic (temp file +
|
||||||
|
// rename within the same dir) so the board never reads a half-written file even
|
||||||
|
// though lenses can finish concurrently.
|
||||||
|
type statusWriter struct {
|
||||||
|
path string
|
||||||
|
mu sync.Mutex
|
||||||
|
st modelStatus
|
||||||
|
}
|
||||||
|
|
||||||
|
// newStatusWriter seeds a writer with every lens queued and flushes the initial
|
||||||
|
// snapshot. model/provider are echoed into the file so the board can render
|
||||||
|
// them without re-deriving from the filename (which is sanitized). The status
|
||||||
|
// file path comes from GADFLY_STATUS_FILE (set by run.sh per model); when empty
|
||||||
|
// the writer is inert.
|
||||||
|
func newStatusWriter(model, provider string, specialists []Specialist) *statusWriter {
|
||||||
|
w := &statusWriter{path: strings.TrimSpace(os.Getenv("GADFLY_STATUS_FILE"))}
|
||||||
|
w.st = modelStatus{
|
||||||
|
Model: model,
|
||||||
|
Provider: provider,
|
||||||
|
Started: time.Now().Unix(),
|
||||||
|
}
|
||||||
|
for _, sp := range specialists {
|
||||||
|
w.st.Lenses = append(w.st.Lenses, lensStatus{Name: sp.Name, State: lensQueued})
|
||||||
|
}
|
||||||
|
w.flush()
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
|
// set transitions a lens to a new state (and verdict/errored when finished),
|
||||||
|
// recomputes the overall done flag, and atomically rewrites the file. Unknown
|
||||||
|
// lens names are ignored. Safe for concurrent callers (one goroutine per lens).
|
||||||
|
func (w *statusWriter) set(name, state, verdict string, errored bool) {
|
||||||
|
if w == nil || w.path == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.mu.Lock()
|
||||||
|
defer w.mu.Unlock()
|
||||||
|
for i := range w.st.Lenses {
|
||||||
|
if w.st.Lenses[i].Name == name {
|
||||||
|
w.st.Lenses[i].State = state
|
||||||
|
w.st.Lenses[i].Verdict = verdict
|
||||||
|
w.st.Lenses[i].Errored = errored
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done := true
|
||||||
|
for _, l := range w.st.Lenses {
|
||||||
|
if l.State != lensFinished {
|
||||||
|
done = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.st.Done = done
|
||||||
|
w.flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush writes the current snapshot atomically. Best-effort: any error is
|
||||||
|
// swallowed (the status board is advisory and must never affect the review).
|
||||||
|
func (w *statusWriter) flush() {
|
||||||
|
if w.path == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.st.Updated = time.Now().Unix()
|
||||||
|
data, err := json.MarshalIndent(&w.st, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dir := filepath.Dir(w.path)
|
||||||
|
tmp, err := os.CreateTemp(dir, ".status-*.tmp")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tmpName := tmp.Name()
|
||||||
|
if _, err := tmp.Write(data); err != nil {
|
||||||
|
tmp.Close()
|
||||||
|
os.Remove(tmpName)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := tmp.Close(); err != nil {
|
||||||
|
os.Remove(tmpName)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Rename is atomic within the same filesystem, so the board reader sees
|
||||||
|
// either the old complete file or the new complete file — never a partial.
|
||||||
|
if err := os.Rename(tmpName, w.path); err != nil {
|
||||||
|
os.Remove(tmpName)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,103 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// readStatus loads a modelStatus written by the statusWriter.
|
||||||
|
func readStatus(t *testing.T, path string) modelStatus {
|
||||||
|
t.Helper()
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read status file: %v", err)
|
||||||
|
}
|
||||||
|
var st modelStatus
|
||||||
|
if err := json.Unmarshal(data, &st); err != nil {
|
||||||
|
t.Fatalf("unmarshal status: %v", err)
|
||||||
|
}
|
||||||
|
return st
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStatusWriterLifecycle(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "glm.json")
|
||||||
|
t.Setenv("GADFLY_STATUS_FILE", path)
|
||||||
|
|
||||||
|
specs := []Specialist{
|
||||||
|
{Name: "security", Title: "Security"},
|
||||||
|
{Name: "correctness", Title: "Correctness"},
|
||||||
|
}
|
||||||
|
w := newStatusWriter("glm-5.2:cloud", "ollama-cloud", specs)
|
||||||
|
|
||||||
|
// Initial snapshot: both lenses queued, model not done, metadata echoed.
|
||||||
|
st := readStatus(t, path)
|
||||||
|
if st.Model != "glm-5.2:cloud" || st.Provider != "ollama-cloud" {
|
||||||
|
t.Fatalf("model/provider not echoed: %+v", st)
|
||||||
|
}
|
||||||
|
if len(st.Lenses) != 2 {
|
||||||
|
t.Fatalf("want 2 lenses, got %d", len(st.Lenses))
|
||||||
|
}
|
||||||
|
for _, l := range st.Lenses {
|
||||||
|
if l.State != lensQueued {
|
||||||
|
t.Fatalf("lens %q want queued, got %q", l.Name, l.State)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if st.Done {
|
||||||
|
t.Fatal("model marked done while lenses still queued")
|
||||||
|
}
|
||||||
|
if st.Started == 0 {
|
||||||
|
t.Fatal("started timestamp not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transition one lens through running -> finished; model not yet done.
|
||||||
|
w.set("security", lensRunning, "", false)
|
||||||
|
if got := readStatus(t, path); got.Lenses[0].State != lensRunning {
|
||||||
|
t.Fatalf("security want running, got %q", got.Lenses[0].State)
|
||||||
|
}
|
||||||
|
w.set("security", lensFinished, "No material issues found", false)
|
||||||
|
st = readStatus(t, path)
|
||||||
|
if st.Lenses[0].State != lensFinished || st.Lenses[0].Verdict != "No material issues found" {
|
||||||
|
t.Fatalf("security finish not recorded: %+v", st.Lenses[0])
|
||||||
|
}
|
||||||
|
if st.Done {
|
||||||
|
t.Fatal("model marked done with one lens still queued")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finish the second lens (errored) -> model done.
|
||||||
|
w.set("correctness", lensFinished, "Reviewed", true)
|
||||||
|
st = readStatus(t, path)
|
||||||
|
if !st.Done {
|
||||||
|
t.Fatal("model should be done after all lenses finished")
|
||||||
|
}
|
||||||
|
if !st.Lenses[1].Errored {
|
||||||
|
t.Fatal("errored flag not recorded for correctness")
|
||||||
|
}
|
||||||
|
if st.Updated < st.Started {
|
||||||
|
t.Fatalf("updated (%d) should be >= started (%d)", st.Updated, st.Started)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// With GADFLY_STATUS_FILE unset the writer is inert: no file, no panic.
|
||||||
|
func TestStatusWriterDisabled(t *testing.T) {
|
||||||
|
t.Setenv("GADFLY_STATUS_FILE", "")
|
||||||
|
w := newStatusWriter("m", "p", []Specialist{{Name: "security"}})
|
||||||
|
w.set("security", lensFinished, "Minor issues", false)
|
||||||
|
// Nothing to assert beyond "did not panic / did not write" — a nil-safe set
|
||||||
|
// on the disabled writer is the contract.
|
||||||
|
if w.path != "" {
|
||||||
|
t.Fatalf("expected empty path when disabled, got %q", w.path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// set must ignore unknown lens names rather than panic or append.
|
||||||
|
func TestStatusWriterUnknownLens(t *testing.T) {
|
||||||
|
path := filepath.Join(t.TempDir(), "s.json")
|
||||||
|
t.Setenv("GADFLY_STATUS_FILE", path)
|
||||||
|
w := newStatusWriter("m", "p", []Specialist{{Name: "security"}})
|
||||||
|
w.set("does-not-exist", lensRunning, "", false)
|
||||||
|
if st := readStatus(t, path); len(st.Lenses) != 1 || st.Lenses[0].State != lensQueued {
|
||||||
|
t.Fatalf("unknown lens mutated state: %+v", st.Lenses)
|
||||||
|
}
|
||||||
|
}
|
||||||
+53
-2
@@ -155,6 +155,12 @@ DEFAULT_CONC="${GADFLY_CONCURRENCY:-1}"
|
|||||||
|
|
||||||
provider_of() { case "$1" in */*) echo "${1%%/*}";; *) echo "${GADFLY_PROVIDER:-ollama-cloud}";; esac; }
|
provider_of() { case "$1" in */*) echo "${1%%/*}";; *) echo "${GADFLY_PROVIDER:-ollama-cloud}";; esac; }
|
||||||
|
|
||||||
|
# Per-model status file path for the live board. The model id can contain '/'
|
||||||
|
# and ':' (e.g. m1/qwen3:14b), so sanitize to a flat filename; the JSON inside
|
||||||
|
# carries the real model/provider, so this just needs to be unique per model.
|
||||||
|
STATUS_DIR="${WORKDIR}/status"
|
||||||
|
status_file_for() { echo "${STATUS_DIR}/$(echo "$1" | tr -c '[:alnum:]._-' '_').json"; }
|
||||||
|
|
||||||
provider_cap() { # provider -> concurrency (override map "p=N,...", else default)
|
provider_cap() { # provider -> concurrency (override map "p=N,...", else default)
|
||||||
local p="$1" item k v
|
local p="$1" item k v
|
||||||
IFS=',' read -ra _caps <<< "${GADFLY_PROVIDER_CONCURRENCY:-}"
|
IFS=',' read -ra _caps <<< "${GADFLY_PROVIDER_CONCURRENCY:-}"
|
||||||
@@ -167,8 +173,19 @@ provider_cap() { # provider -> concurrency (override map "p=N,...", else default
|
|||||||
}
|
}
|
||||||
|
|
||||||
review_one() {
|
review_one() {
|
||||||
|
local sf=""
|
||||||
|
[ "${GADFLY_STATUS_BOARD:-1}" != "0" ] && sf="$(status_file_for "$1")"
|
||||||
PROVIDER=ollama MODEL="$1" GADFLY_BIN="/usr/local/bin/gadfly" GADFLY_REPO_DIR="$REPO_DIR" \
|
PROVIDER=ollama MODEL="$1" GADFLY_BIN="/usr/local/bin/gadfly" GADFLY_REPO_DIR="$REPO_DIR" \
|
||||||
|
GADFLY_STATUS_FILE="$sf" \
|
||||||
bash "${SCRIPTS_DIR}/run.sh" || log "model $1 failed (continuing)"
|
bash "${SCRIPTS_DIR}/run.sh" || log "model $1 failed (continuing)"
|
||||||
|
# If the binary never wrote real status (run.sh skipped it: empty diff, no key,
|
||||||
|
# binary missing), the pre-seed stays {started:0, done:false} and the board
|
||||||
|
# would show this model "waiting to start" forever and never reach N/N. Mark
|
||||||
|
# such a never-started file done so the board can complete. The binary stamps a
|
||||||
|
# nonzero `started`, so that reliably distinguishes "ran" from "skipped".
|
||||||
|
if [ -n "$sf" ] && [ -f "$sf" ] && [ "$(jq -r '.started // 0' "$sf" 2>/dev/null)" = "0" ]; then
|
||||||
|
tmp="$(jq '.done = true' "$sf" 2>/dev/null)" && printf '%s' "$tmp" > "$sf"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Normalize the model list (trim, drop blanks) into MODEL_LIST.
|
# Normalize the model list (trim, drop blanks) into MODEL_LIST.
|
||||||
@@ -197,10 +214,44 @@ run_lane() { # $1=provider: run its models, at most `cap` at a time
|
|||||||
wait
|
wait
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# --- live status board (optional, default on) ------------------------------
|
||||||
|
# Each model process publishes per-lens progress to STATUS_DIR/<model>.json; a
|
||||||
|
# background renderer (status-board.sh) upserts ONE consolidated PR comment so
|
||||||
|
# progress across all models/lenses is visible at a glance — and a watcher can
|
||||||
|
# tell when the whole swarm is finished. Advisory/best-effort; the per-model
|
||||||
|
# findings still land in each model's own comment. Disable with
|
||||||
|
# GADFLY_STATUS_BOARD=0.
|
||||||
|
BOARD_PID=""
|
||||||
|
if [ "${GADFLY_STATUS_BOARD:-1}" != "0" ]; then
|
||||||
|
rm -rf "$STATUS_DIR"; mkdir -p "$STATUS_DIR"
|
||||||
|
# Pre-seed every model as queued so the board shows the full swarm from t=0,
|
||||||
|
# even models still waiting on their provider lane's concurrency cap. Each
|
||||||
|
# binary overwrites its own file with real per-lens detail once it starts.
|
||||||
|
for m in "${MODEL_LIST[@]}"; do
|
||||||
|
jq -n --arg model "$m" --arg provider "$(provider_of "$m")" \
|
||||||
|
'{model:$model, provider:$provider, started:0, updated:0, done:false, lenses:[]}' \
|
||||||
|
> "$(status_file_for "$m")" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
GITEA_API="$GITEA_API" GITEA_TOKEN="$GITEA_TOKEN" PR="$PR" GADFLY_STATUS_DIR="$STATUS_DIR" \
|
||||||
|
bash "${SCRIPTS_DIR}/status-board.sh" &
|
||||||
|
BOARD_PID=$!
|
||||||
|
log "status board started (pid ${BOARD_PID})"
|
||||||
|
fi
|
||||||
|
|
||||||
log "providers: ${PROVIDERS:-none}"
|
log "providers: ${PROVIDERS:-none}"
|
||||||
# Each provider lane runs in parallel; cap is enforced within each lane.
|
# Each provider lane runs in parallel; cap is enforced within each lane. Track
|
||||||
|
# the lane PIDs so we wait ONLY for the review work — not the status board,
|
||||||
|
# which intentionally runs until we signal it below.
|
||||||
|
LANE_PIDS=()
|
||||||
for p in $PROVIDERS; do
|
for p in $PROVIDERS; do
|
||||||
run_lane "$p" &
|
run_lane "$p" &
|
||||||
|
LANE_PIDS+=("$!")
|
||||||
done
|
done
|
||||||
wait
|
[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}"
|
||||||
|
|
||||||
|
# Reviews are done: signal the board to render the final state once and exit.
|
||||||
|
if [ -n "$BOARD_PID" ]; then
|
||||||
|
touch "${STATUS_DIR}/.done" 2>/dev/null || true
|
||||||
|
wait "$BOARD_PID" 2>/dev/null || true
|
||||||
|
fi
|
||||||
log "done"
|
log "done"
|
||||||
|
|||||||
@@ -65,6 +65,10 @@ jobs:
|
|||||||
# GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3,m1=1"
|
# GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3,m1=1"
|
||||||
# GADFLY_LENS_CONCURRENCY: ${{ vars.GADFLY_LENS_CONCURRENCY }}
|
# GADFLY_LENS_CONCURRENCY: ${{ vars.GADFLY_LENS_CONCURRENCY }}
|
||||||
# GADFLY_PROVIDER_LENS_CONCURRENCY: ${{ vars.GADFLY_PROVIDER_LENS_CONCURRENCY }}
|
# GADFLY_PROVIDER_LENS_CONCURRENCY: ${{ vars.GADFLY_PROVIDER_LENS_CONCURRENCY }}
|
||||||
|
# Live status board (optional; ON by default): one consolidated comment
|
||||||
|
# showing every model's per-lens progress as it runs. Disable with
|
||||||
|
# GADFLY_STATUS_BOARD=0; tune the refresh with GADFLY_STATUS_POLL_SECS.
|
||||||
|
# GADFLY_STATUS_BOARD: ${{ vars.GADFLY_STATUS_BOARD }}
|
||||||
# --- Models & providers (optional; default = Ollama Cloud) ----------
|
# --- Models & providers (optional; default = Ollama Cloud) ----------
|
||||||
# Gadfly is majordomo-powered, so it can target other backends. Set a
|
# Gadfly is majordomo-powered, so it can target other backends. Set a
|
||||||
# provider for bare model ids; point at a different endpoint with a
|
# provider for bare model ids; point at a different endpoint with a
|
||||||
|
|||||||
+4
-1
@@ -24,7 +24,9 @@
|
|||||||
# antigravity: `agy` on PATH with credentials already seeded (~/.gemini)
|
# antigravity: `agy` on PATH with credentials already seeded (~/.gemini)
|
||||||
#
|
#
|
||||||
# Optional:
|
# Optional:
|
||||||
# MAX_DIFF_CHARS diff truncation cap for the prompt (default 60000)
|
# MAX_DIFF_CHARS diff truncation cap for the prompt (default 60000)
|
||||||
|
# GADFLY_STATUS_FILE per-model JSON path for the live status board (set by
|
||||||
|
# entrypoint.sh; empty/unset disables status publishing)
|
||||||
#
|
#
|
||||||
# This script is advisory: it never fails the job for review content. It exits
|
# This script is advisory: it never fails the job for review content. It exits
|
||||||
# non-zero only on a usage/configuration error.
|
# non-zero only on a usage/configuration error.
|
||||||
@@ -161,6 +163,7 @@ case "$PROVIDER" in
|
|||||||
GADFLY_TITLE="$TITLE" \
|
GADFLY_TITLE="$TITLE" \
|
||||||
GADFLY_BODY="$BODY" \
|
GADFLY_BODY="$BODY" \
|
||||||
GADFLY_MAX_DIFF_CHARS="$MAX_DIFF_CHARS" \
|
GADFLY_MAX_DIFF_CHARS="$MAX_DIFF_CHARS" \
|
||||||
|
GADFLY_STATUS_FILE="${GADFLY_STATUS_FILE:-}" \
|
||||||
"$BIN" 2>"$ERR_FILE"
|
"$BIN" 2>"$ERR_FILE"
|
||||||
)"
|
)"
|
||||||
rc=$?
|
rc=$?
|
||||||
|
|||||||
Executable
+137
@@ -0,0 +1,137 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Live status board for a Gadfly review.
|
||||||
|
#
|
||||||
|
# Each model process (the cmd/gadfly binary) publishes its per-lens progress to
|
||||||
|
# $GADFLY_STATUS_DIR/<model>.json as lenses go queued -> running -> finished.
|
||||||
|
# This script polls that directory and upserts ONE consolidated PR comment that
|
||||||
|
# aggregates every model's per-lens status, so a human (or an agent watching the
|
||||||
|
# PR) can see the whole swarm's progress at a glance and know when it's done —
|
||||||
|
# instead of staring at N separate "⏳ Reviewing…" placeholders.
|
||||||
|
#
|
||||||
|
# It is advisory and best-effort: a failed render/post is logged and retried on
|
||||||
|
# the next tick; nothing here can fail the review or block a merge. It runs in
|
||||||
|
# the background from entrypoint.sh and exits once the $GADFLY_STATUS_DIR/.done
|
||||||
|
# sentinel appears (the entrypoint touches it after all model lanes finish),
|
||||||
|
# after one final render.
|
||||||
|
#
|
||||||
|
# Required env:
|
||||||
|
# GITEA_API https://HOST/api/v1/repos/OWNER/REPO
|
||||||
|
# GITEA_TOKEN token with repo write access (posts the comment)
|
||||||
|
# PR pull request number
|
||||||
|
# GADFLY_STATUS_DIR directory holding the per-model <model>.json files
|
||||||
|
# Optional:
|
||||||
|
# GADFLY_STATUS_POLL_SECS render/upsert interval (default 12)
|
||||||
|
set -uo pipefail
|
||||||
|
|
||||||
|
: "${GITEA_API:?GITEA_API required}"
|
||||||
|
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||||||
|
: "${PR:?PR required}"
|
||||||
|
: "${GADFLY_STATUS_DIR:?GADFLY_STATUS_DIR required}"
|
||||||
|
|
||||||
|
POLL="${GADFLY_STATUS_POLL_SECS:-12}"
|
||||||
|
# Guard against a non-numeric poll interval: with `set -uo pipefail` (no set -e)
|
||||||
|
# a bad `sleep "$POLL"` would fail silently and the `while :` loop would spin,
|
||||||
|
# hammering the Gitea API. Coerce anything non-integer (or <1) back to 12.
|
||||||
|
case "$POLL" in ''|*[!0-9]*) POLL=12 ;; esac
|
||||||
|
[ "$POLL" -ge 1 ] 2>/dev/null || POLL=12
|
||||||
|
DONE_FILE="${GADFLY_STATUS_DIR}/.done"
|
||||||
|
MARKER="<!-- gadfly-status-board -->"
|
||||||
|
API_TIMEOUT="--connect-timeout 20 --max-time 30"
|
||||||
|
BOARD_ID="" # cached comment id, so we PATCH in place instead of re-searching
|
||||||
|
|
||||||
|
say() { echo "[gadfly-status-board] $*" >&2; }
|
||||||
|
|
||||||
|
command -v jq >/dev/null 2>&1 || { say "jq not found; status board disabled"; exit 0; }
|
||||||
|
|
||||||
|
# render_section FILE -> markdown for one model (its header + per-lens bullets).
|
||||||
|
# Reads the JSON the binary writes; tolerates a half-written/missing file by
|
||||||
|
# emitting nothing (jq exits non-zero -> caller skips it this tick).
|
||||||
|
render_section() {
|
||||||
|
jq -r '
|
||||||
|
def icon(state; errored):
|
||||||
|
if state == "finished" then (if errored then "⚠️" else "✅" end)
|
||||||
|
elif state == "running" then "🔄"
|
||||||
|
else "⏸️" end;
|
||||||
|
def lensline:
|
||||||
|
"- " + icon(.state; (.errored // false)) + " **" + .name + "** — " +
|
||||||
|
( if .state == "finished" then (if (.errored // false) then "could not complete" else (.verdict // "done") end)
|
||||||
|
elif .state == "running" then "running"
|
||||||
|
else "queued" end );
|
||||||
|
( [.lenses[] | select(.state == "finished")] | length ) as $fin
|
||||||
|
| ( .lenses | length ) as $tot
|
||||||
|
| ( if .done then "✅ done"
|
||||||
|
elif $tot == 0 then "⏳ waiting to start"
|
||||||
|
else "⏳ " + ($fin|tostring) + "/" + ($tot|tostring) + " lenses" end ) as $sum
|
||||||
|
| "#### `" + .model + "` · " + .provider + " — " + $sum + "\n"
|
||||||
|
+ ( if $tot == 0 then "- ⏸️ _no lenses reported yet_"
|
||||||
|
else ([.lenses[] | lensline] | join("\n")) end )
|
||||||
|
' "$1" 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
# render_body -> the full consolidated comment body (marker + header + sections).
|
||||||
|
render_body() {
|
||||||
|
local f sections="" total=0 done=0 ts
|
||||||
|
shopt -s nullglob
|
||||||
|
local files=("${GADFLY_STATUS_DIR}"/*.json)
|
||||||
|
shopt -u nullglob
|
||||||
|
for f in "${files[@]}"; do
|
||||||
|
local sec
|
||||||
|
sec="$(render_section "$f")" || continue
|
||||||
|
[ -z "$sec" ] && continue
|
||||||
|
total=$((total + 1))
|
||||||
|
if [ "$(jq -r 'if .done then 1 else 0 end' "$f" 2>/dev/null)" = "1" ]; then
|
||||||
|
done=$((done + 1))
|
||||||
|
fi
|
||||||
|
sections="${sections}${sec}"$'\n\n'
|
||||||
|
done
|
||||||
|
ts="$(date -u '+%Y-%m-%d %H:%M:%SZ')"
|
||||||
|
if [ "$total" -eq 0 ]; then
|
||||||
|
sections="_Waiting for reviewers to start…_"$'\n'
|
||||||
|
fi
|
||||||
|
printf '%s\n## 🪰 Gadfly — live review status\n\n%d/%d reviewers finished · updated %s\n\n%s\n<sub>Live status board. Findings are posted in each model'\''s own comment. Advisory only — does not block merge.</sub>' \
|
||||||
|
"$MARKER" "$done" "$total" "$ts" "$sections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# find_existing -> id of the board comment if it already exists (paginate by
|
||||||
|
# marker). Used once, to recover the comment across container restarts.
|
||||||
|
find_existing() {
|
||||||
|
local page=1 cmts id
|
||||||
|
while [ "$page" -le 10 ]; do
|
||||||
|
cmts="$(curl $API_TIMEOUT -fsS -H "Authorization: token ${GITEA_TOKEN}" \
|
||||||
|
"${GITEA_API}/issues/${PR}/comments?limit=50&page=${page}" 2>/dev/null || echo '[]')"
|
||||||
|
[ "$(echo "$cmts" | jq 'length' 2>/dev/null || echo 0)" = "0" ] && break
|
||||||
|
id="$(echo "$cmts" | jq -r --arg m "$MARKER" \
|
||||||
|
'.[] | select(.body != null and (.body | startswith($m))) | .id' 2>/dev/null | head -n1)"
|
||||||
|
[ -n "$id" ] && { echo "$id"; return; }
|
||||||
|
page=$((page + 1))
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# upsert BODY — PATCH the cached/known board comment, else POST a new one and
|
||||||
|
# cache its id. A failed PATCH (e.g. comment deleted) clears the cache so the
|
||||||
|
# next tick re-discovers or re-creates it.
|
||||||
|
upsert() {
|
||||||
|
local body="$1" post_body resp
|
||||||
|
post_body="$(jq -n --arg b "$body" '{body:$b}')"
|
||||||
|
[ -z "$BOARD_ID" ] && BOARD_ID="$(find_existing)"
|
||||||
|
if [ -n "$BOARD_ID" ]; then
|
||||||
|
if ! curl $API_TIMEOUT -fsS -X PATCH -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \
|
||||||
|
"${GITEA_API}/issues/comments/${BOARD_ID}" -d "$post_body" >/dev/null 2>&1; then
|
||||||
|
say "patch of comment ${BOARD_ID} failed; will re-discover"
|
||||||
|
BOARD_ID=""
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
resp="$(curl $API_TIMEOUT -fsS -X POST -H "Authorization: token ${GITEA_TOKEN}" -H "Content-Type: application/json" \
|
||||||
|
"${GITEA_API}/issues/${PR}/comments" -d "$post_body" 2>/dev/null || echo '{}')"
|
||||||
|
BOARD_ID="$(echo "$resp" | jq -r '.id // ""' 2>/dev/null)"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
say "starting (poll ${POLL}s, dir ${GADFLY_STATUS_DIR})"
|
||||||
|
while :; do
|
||||||
|
upsert "$(render_body)"
|
||||||
|
[ -f "$DONE_FILE" ] && break
|
||||||
|
sleep "$POLL"
|
||||||
|
done
|
||||||
|
say "done"
|
||||||
Reference in New Issue
Block a user