P4c: remaining batteries — checkpoint + schedule + critic #6

Closed
steve wants to merge 5 commits from phase-4c-batteries into phase-4b-skill
Showing only changes of commit 5b5e130cee - Show all commits
+8 -3
View File
@@ -44,7 +44,7 @@ jobs:
# 3 cloud models, all concurrent, 3-lens suite. ~12 min typical.
timeout-minutes: 30
steps:
- uses: docker://gitea.stevedudenhoeffer.com/steve/gadfly:sha-6e3a83c
- uses: docker://gitea.stevedudenhoeffer.com/steve/gadfly:sha-d0de034
env:
GITEA_API: ${{ github.server_url }}/api/v1/repos/${{ github.repository }}
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
@@ -53,9 +53,14 @@ jobs:
# on a P2-review measurement they took 2629 min (with lens timeouts)
# and contributed ZERO real findings — the two cloud models found every
# genuine bug in 612 min. Cloud-only is faster AND higher-signal.
# 3 cloud models, one consolidated comment each, all run in parallel.
# 3 cloud models. Concurrency now lives in the LENSES, not the models:
# one model runs at a time (PROVIDER_CONCURRENCY=1) with its 3 lenses
# concurrent (PROVIDER_LENS_CONCURRENCY=3). So the first model's
# comment lands sooner and each model finishes a bit faster, at the
# cost of the other two models' comments arriving in series after it.
GADFLY_MODELS: "minimax-m3:cloud,deepseek-v4-flash:cloud,glm-5.2:cloud"
GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=3"
GADFLY_PROVIDER_CONCURRENCY: "ollama-cloud=1"
GADFLY_PROVIDER_LENS_CONCURRENCY: "ollama-cloud=3"
# Default => the 3-lens suite (security, correctness, error-handling).
# Set the repo var GADFLY_SPECIALISTS to override (csv / "all" / "auto").
GADFLY_SPECIALISTS: ${{ vars.GADFLY_SPECIALISTS || 'security,correctness,error-handling' }}