fix: address gadfly swarm review of the executus re-platform
Build & push image / build-and-push (pull_request) Successful in 8s
Build & push image / build-and-push (pull_request) Successful in 8s
Folds in the real findings the dogfood swarm raised on PR #20 (21 graded real, 1 false positive): - tools.go: anchor get_diff `path` matching to whole path tokens (foo.go no longer pulls in barfoo.go; a trailing "/" still scopes a directory); split the diff once + cache it; drop the spurious trailing blank line; fix the "truncated after line N" off-by-one wording. Share one tool-list source (allTools) between toolbox() and the executus registry. - executus.go: drop the dead Config.Defaults caps (per-run RunnableAgent always overrides them); shared envBool/reviewTimeout helpers; resolveContextTokens logs a failed lookup and uses a 5s timeout (was 15s); note the budget guard is pass-granular (the wall-clock backstop covers mid-pass). - main.go/recheck.go: shared envBool; fix package-doc drift (the removed finalization fallback, the paginated get_diff). - entrypoint.sh/run.sh: export GADFLY_MAX_DIFF_CHARS directly (run.sh prefers it); guard the watchdog's delayed SIGKILL on a .disarmed marker so it can't catch the consolidation pass. - tests: anchoring test; corrected obsolete env var + truncation-wording asserts. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+23
-16
@@ -56,6 +56,11 @@ const (
|
|||||||
// compactor defaults; surfaced as gadfly env knobs for tuning.
|
// compactor defaults; surfaced as gadfly env knobs for tuning.
|
||||||
defaultCompactKeepRecent = 8
|
defaultCompactKeepRecent = 8
|
||||||
defaultCompactSummaryWords = 200
|
defaultCompactSummaryWords = 200
|
||||||
|
// contextTokenLookupTimeout bounds the one-shot /api/show call that resolves a
|
||||||
|
// cloud model's context window at executor-build time. Kept short so a slow or
|
||||||
|
// unreachable endpoint adds at most this to startup before degrading to
|
||||||
|
// no-compaction (rather than the provider cache's default 15s).
|
||||||
|
contextTokenLookupTimeout = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// runSeq mints a unique-per-process RunID suffix for each executor run so audit
|
// runSeq mints a unique-per-process RunID suffix for each executor run so audit
|
||||||
@@ -79,10 +84,7 @@ func (w wrappedTool) BuildLLM(_ exectool.Invocation) llm.Tool { return w.t }
|
|||||||
// returns it along with the tool names for RunnableAgent.LowLevelTools.
|
// returns it along with the tool names for RunnableAgent.LowLevelTools.
|
||||||
func gadflyToolRegistry(fs *repoFS) (exectool.Registry, []string, error) {
|
func gadflyToolRegistry(fs *repoFS) (exectool.Registry, []string, error) {
|
||||||
reg := exectool.NewRegistry()
|
reg := exectool.NewRegistry()
|
||||||
tools := fs.fsTools()
|
tools := fs.allTools()
|
||||||
if fs.worker != nil {
|
|
||||||
tools = append(tools, fs.delegateTool())
|
|
||||||
}
|
|
||||||
names := make([]string, 0, len(tools))
|
names := make([]string, 0, len(tools))
|
||||||
for _, t := range tools {
|
for _, t := range tools {
|
||||||
if err := reg.Register(wrappedTool{t: t}); err != nil {
|
if err := reg.Register(wrappedTool{t: t}); err != nil {
|
||||||
@@ -100,6 +102,11 @@ func gadflyToolRegistry(fs *repoFS) (exectool.Registry, []string, error) {
|
|||||||
// the engine calls addUsage after each pass with run.Result.Usage. A nil
|
// the engine calls addUsage after each pass with run.Result.Usage. A nil
|
||||||
// *gadflyBudget is never installed — caps of 0 mean "unlimited", so the port is
|
// *gadflyBudget is never installed — caps of 0 mean "unlimited", so the port is
|
||||||
// only wired when at least one cap is set.
|
// only wired when at least one cap is set.
|
||||||
|
//
|
||||||
|
// The guard is PASS-granular: Check runs before each pass, so it stops the NEXT
|
||||||
|
// pass once the budget is spent but cannot abort a single runaway pass mid-flight.
|
||||||
|
// The swarm-wide GADFLY_PR_BUDGET_SECS wall-clock backstop (entrypoint.sh) is what
|
||||||
|
// bounds a mid-pass runaway.
|
||||||
type gadflyBudget struct {
|
type gadflyBudget struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
maxTokens int64
|
maxTokens int64
|
||||||
@@ -254,8 +261,10 @@ func newReviewExecutor(fs *repoFS, mdl, summarizer llm.Model, modelSpec string,
|
|||||||
Compactor: compactor,
|
Compactor: compactor,
|
||||||
ContextTokens: ctxTokens,
|
ContextTokens: ctxTokens,
|
||||||
Defaults: exrun.Defaults{
|
Defaults: exrun.Defaults{
|
||||||
MaxIterations: envInt("GADFLY_MAX_STEPS", defaultMaxSteps),
|
// MaxIterations/MaxRuntime are intentionally omitted: every pass sets its
|
||||||
MaxRuntime: time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second,
|
// own per-run cap on the RunnableAgent below (the review and recheck caps
|
||||||
|
// differ), so a Defaults value here would always be overridden — dead. This
|
||||||
|
// leaves only the cross-pass guards + the compaction ratio.
|
||||||
MaxConsecutiveToolErrors: 4,
|
MaxConsecutiveToolErrors: 4,
|
||||||
MaxSameToolCallRepeats: 4,
|
MaxSameToolCallRepeats: 4,
|
||||||
CompactionThresholdRatio: compactRatio(),
|
CompactionThresholdRatio: compactRatio(),
|
||||||
@@ -282,7 +291,7 @@ func (r *reviewExecutor) run(ctx context.Context, system, task string, maxSteps
|
|||||||
SystemPrompt: system,
|
SystemPrompt: system,
|
||||||
ModelTier: r.modelSpec,
|
ModelTier: r.modelSpec,
|
||||||
MaxIterations: maxSteps,
|
MaxIterations: maxSteps,
|
||||||
MaxRuntime: time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second,
|
MaxRuntime: reviewTimeout(),
|
||||||
LowLevelTools: r.toolNames,
|
LowLevelTools: r.toolNames,
|
||||||
Critic: exrun.CriticConfig{Enabled: true},
|
Critic: exrun.CriticConfig{Enabled: true},
|
||||||
}, exectool.Invocation{
|
}, exectool.Invocation{
|
||||||
@@ -316,14 +325,7 @@ func prCallerID() string {
|
|||||||
|
|
||||||
// compactionEnabled reports whether context compaction should be wired. On
|
// compactionEnabled reports whether context compaction should be wired. On
|
||||||
// unless GADFLY_COMPACT is explicitly falsey.
|
// unless GADFLY_COMPACT is explicitly falsey.
|
||||||
func compactionEnabled() bool {
|
func compactionEnabled() bool { return envBool("GADFLY_COMPACT", true) }
|
||||||
switch strings.ToLower(strings.TrimSpace(os.Getenv("GADFLY_COMPACT"))) {
|
|
||||||
case "0", "false", "no", "off":
|
|
||||||
return false
|
|
||||||
default:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// compactRatio is the compaction threshold as a fraction of the model context
|
// compactRatio is the compaction threshold as a fraction of the model context
|
||||||
// window (GADFLY_COMPACT_RATIO), clamped to (0,1]; default defaultCompactRatio.
|
// window (GADFLY_COMPACT_RATIO), clamped to (0,1]; default defaultCompactRatio.
|
||||||
@@ -355,10 +357,15 @@ func resolveContextTokens(modelSpec string) int {
|
|||||||
key = strings.TrimSpace(os.Getenv("OLLAMA_API_KEY"))
|
key = strings.TrimSpace(os.Getenv("OLLAMA_API_KEY"))
|
||||||
}
|
}
|
||||||
cache := model.NewCloudOllamaLimitCache("", key, nil)
|
cache := model.NewCloudOllamaLimitCache("", key, nil)
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), contextTokenLookupTimeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
if n, ok := model.MaxContextTokensResolving(ctx, modelSpec, cache); ok {
|
if n, ok := model.MaxContextTokensResolving(ctx, modelSpec, cache); ok {
|
||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
|
// Unknown model or a failed lookup (e.g. no key / unreachable endpoint): don't
|
||||||
|
// guess — compaction is disabled. Log it so a misconfiguration is debuggable
|
||||||
|
// rather than silently dropping the protection. Set GADFLY_MODEL_CONTEXT_TOKENS
|
||||||
|
// to force a window for an endpoint executus can't introspect.
|
||||||
|
fmt.Fprintf(os.Stderr, "gadfly: no context window resolved for %q; compaction disabled (set GADFLY_MODEL_CONTEXT_TOKENS to enable it)\n", modelSpec)
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|||||||
+26
-8
@@ -39,10 +39,9 @@
|
|||||||
// GADFLY_TITLE PR title (optional).
|
// GADFLY_TITLE PR title (optional).
|
||||||
// GADFLY_BODY PR description (optional).
|
// GADFLY_BODY PR description (optional).
|
||||||
// GADFLY_MAX_STEPS review-pass step cap (optional, default 24).
|
// GADFLY_MAX_STEPS review-pass step cap (optional, default 24).
|
||||||
// GADFLY_WRAPUP_RESERVE steps before the cap at which the agent is told to
|
// GADFLY_WRAPUP_RESERVE steps before the cap at which the wrap-up critic nudges
|
||||||
// stop investigating and write its answer (optional,
|
// the agent to stop investigating and write its answer
|
||||||
// default 4). Plus a tool-free finalization fallback
|
// (optional, default 4).
|
||||||
// guarantees a step-exhausted pass still emits output.
|
|
||||||
// GADFLY_RECHECK set to 0/false to skip the recheck pass (optional, default on).
|
// GADFLY_RECHECK set to 0/false to skip the recheck pass (optional, default on).
|
||||||
// GADFLY_RECHECK_MAX_STEPS recheck-pass step cap (optional, default 16).
|
// GADFLY_RECHECK_MAX_STEPS recheck-pass step cap (optional, default 16).
|
||||||
// GADFLY_TIMEOUT_SECS overall deadline in seconds, shared by both passes (optional, default 300).
|
// GADFLY_TIMEOUT_SECS overall deadline in seconds, shared by both passes (optional, default 300).
|
||||||
@@ -55,8 +54,8 @@
|
|||||||
// lanes as GADFLY_PROVIDER_CONCURRENCY (e.g.
|
// lanes as GADFLY_PROVIDER_CONCURRENCY (e.g.
|
||||||
// "ollama-cloud=3,m1=1"). Wins over GADFLY_LENS_CONCURRENCY
|
// "ollama-cloud=3,m1=1"). Wins over GADFLY_LENS_CONCURRENCY
|
||||||
// for the model's provider; falls back to it otherwise.
|
// for the model's provider; falls back to it otherwise.
|
||||||
// GADFLY_MAX_DIFF_CHARS diff chars embedded in the prompt (optional, default 60000;
|
// GADFLY_MAX_DIFF_CHARS diff chars embedded in the review prompt (optional, default 60000;
|
||||||
// the full diff is always available via the get_diff tool).
|
// the full diff is reachable via the paginated get_diff tool).
|
||||||
//
|
//
|
||||||
// On success it prints the review to stdout and exits 0. On a usage/config or
|
// On success it prints the review to stdout and exits 0. On a usage/config or
|
||||||
// model error it prints a diagnostic to stderr and exits non-zero; run.sh then
|
// model error it prints a diagnostic to stderr and exits non-zero; run.sh then
|
||||||
@@ -322,8 +321,7 @@ func providerOverride(envName, provider string) (int, bool) {
|
|||||||
// returned bool is true when the review pass failed (rendered as an inline
|
// returned bool is true when the review pass failed (rendered as an inline
|
||||||
// notice — advisory; one lens failing never sinks the others or the job).
|
// notice — advisory; one lens failing never sinks the others or the job).
|
||||||
func reviewWithSpecialist(eng reviewEngine, base string, sp Specialist, task, diff string) (string, bool) {
|
func reviewWithSpecialist(eng reviewEngine, base string, sp Specialist, task, diff string) (string, bool) {
|
||||||
timeout := time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second
|
ctx, cancel := context.WithTimeout(context.Background(), reviewTimeout())
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
draft, err := eng.runPass(ctx, composeSpecialistPrompt(base, sp), task,
|
draft, err := eng.runPass(ctx, composeSpecialistPrompt(base, sp), task,
|
||||||
@@ -389,3 +387,23 @@ func envInt(name string, def int) int {
|
|||||||
}
|
}
|
||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// envBool reads a boolean-ish env var: def when unset, false for an explicit
|
||||||
|
// falsey value (0/false/no/off), true otherwise. The shared spelling for
|
||||||
|
// gadfly's "on unless disabled" opt-out flags (GADFLY_RECHECK, GADFLY_COMPACT).
|
||||||
|
func envBool(name string, def bool) bool {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(os.Getenv(name))) {
|
||||||
|
case "":
|
||||||
|
return def
|
||||||
|
case "0", "false", "no", "off":
|
||||||
|
return false
|
||||||
|
default:
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// reviewTimeout is the per-specialist-lens deadline (GADFLY_TIMEOUT_SECS), shared
|
||||||
|
// across a lens's review+recheck passes and applied as each pass's run cap.
|
||||||
|
func reviewTimeout() time.Duration {
|
||||||
|
return time.Duration(envInt("GADFLY_TIMEOUT_SECS", defaultTimeoutSecs)) * time.Second
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -65,14 +64,7 @@ Output rules:
|
|||||||
|
|
||||||
// recheckEnabled reports whether the verification pass should run. On unless
|
// recheckEnabled reports whether the verification pass should run. On unless
|
||||||
// GADFLY_RECHECK is explicitly a falsey value.
|
// GADFLY_RECHECK is explicitly a falsey value.
|
||||||
func recheckEnabled() bool {
|
func recheckEnabled() bool { return envBool("GADFLY_RECHECK", true) }
|
||||||
switch strings.ToLower(strings.TrimSpace(os.Getenv("GADFLY_RECHECK"))) {
|
|
||||||
case "0", "false", "no", "off":
|
|
||||||
return false
|
|
||||||
default:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// shouldRecheck decides whether to run the verification pass for a given draft.
|
// shouldRecheck decides whether to run the verification pass for a given draft.
|
||||||
// A clean "no material issues" draft has nothing to verify, so it is skipped
|
// A clean "no material issues" draft has nothing to verify, so it is skipped
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ func TestRecheckEnabled(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestBuildRecheckTask(t *testing.T) {
|
func TestBuildRecheckTask(t *testing.T) {
|
||||||
t.Setenv("GADFLY_MAX_DIFF_CHARS", "")
|
t.Setenv("GADFLY_RECHECK_DIFF_CHARS", "")
|
||||||
draft := "VERDICT: Blocking issues found\n- foo.go:1 broken"
|
draft := "VERDICT: Blocking issues found\n- foo.go:1 broken"
|
||||||
out := buildRecheckTask(draft, "diff --git a/x b/x\n+y\n")
|
out := buildRecheckTask(draft, "diff --git a/x b/x\n+y\n")
|
||||||
if !strings.Contains(out, draft) {
|
if !strings.Contains(out, draft) {
|
||||||
|
|||||||
+83
-29
@@ -9,6 +9,7 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||||
)
|
)
|
||||||
@@ -42,6 +43,12 @@ type repoFS struct {
|
|||||||
root string // absolute, symlink-resolved repo root
|
root string // absolute, symlink-resolved repo root
|
||||||
diff string // the full PR unified diff (served by get_diff)
|
diff string // the full PR unified diff (served by get_diff)
|
||||||
worker llm.Model // optional cheap model for delegate_investigation; nil = no delegation
|
worker llm.Model // optional cheap model for delegate_investigation; nil = no delegation
|
||||||
|
|
||||||
|
// diffLines caches the split diff so paging through get_diff doesn't re-split
|
||||||
|
// the whole (possibly large) diff on every call. The diff is immutable, so the
|
||||||
|
// cache is computed once and safe to share across concurrent lenses.
|
||||||
|
diffOnce sync.Once
|
||||||
|
diffLines []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// newRepoFS resolves root to an absolute, symlink-free path.
|
// newRepoFS resolves root to an absolute, symlink-free path.
|
||||||
@@ -110,15 +117,24 @@ func (r *repoFS) fsTools() []llm.Tool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// toolbox builds the reviewer's toolbox: the read-only repo tools, plus the
|
// allTools is the single source of truth for the reviewer's tool set: the
|
||||||
// delegate_investigation tool when a worker model is configured.
|
// read-only repo tools, plus delegate_investigation when a worker model is
|
||||||
func (r *repoFS) toolbox() (*llm.Toolbox, error) {
|
// configured. Both the executus registry (gadflyToolRegistry, the production
|
||||||
box := llm.NewToolbox("gadfly")
|
// path) and toolbox() build from this list.
|
||||||
|
func (r *repoFS) allTools() []llm.Tool {
|
||||||
tools := r.fsTools()
|
tools := r.fsTools()
|
||||||
if r.worker != nil {
|
if r.worker != nil {
|
||||||
tools = append(tools, r.delegateTool())
|
tools = append(tools, r.delegateTool())
|
||||||
}
|
}
|
||||||
for _, t := range tools {
|
return tools
|
||||||
|
}
|
||||||
|
|
||||||
|
// toolbox builds a majordomo toolbox from allTools(). The production review path
|
||||||
|
// now goes through executus's tool.Registry (see executus.go); this remains for
|
||||||
|
// the toolbox-level tests (the `call` helper).
|
||||||
|
func (r *repoFS) toolbox() (*llm.Toolbox, error) {
|
||||||
|
box := llm.NewToolbox("gadfly")
|
||||||
|
for _, t := range r.allTools() {
|
||||||
if err := box.Add(t); err != nil {
|
if err := box.Add(t); err != nil {
|
||||||
return nil, fmt.Errorf("add tool %q: %w", t.Name, err)
|
return nil, fmt.Errorf("add tool %q: %w", t.Name, err)
|
||||||
}
|
}
|
||||||
@@ -409,30 +425,49 @@ func (r *repoFS) getDiffTool() llm.Tool {
|
|||||||
"get_diff",
|
"get_diff",
|
||||||
"Return the unified diff under review as a numbered, PAGINATED window (like read_file) — not the whole diff at once, so a huge PR can't blow the context window. Pass `path` to fetch just one changed file's hunks, or `start_line`/`limit` to page through. A truncated copy of the diff is also embedded in the task message.",
|
"Return the unified diff under review as a numbered, PAGINATED window (like read_file) — not the whole diff at once, so a huge PR can't blow the context window. Pass `path` to fetch just one changed file's hunks, or `start_line`/`limit` to page through. A truncated copy of the diff is also embedded in the task message.",
|
||||||
func(_ context.Context, args getDiffArgs) (any, error) {
|
func(_ context.Context, args getDiffArgs) (any, error) {
|
||||||
diff := r.diff
|
|
||||||
scope := "the diff"
|
scope := "the diff"
|
||||||
|
var lines []string
|
||||||
if p := strings.TrimSpace(args.Path); p != "" {
|
if p := strings.TrimSpace(args.Path); p != "" {
|
||||||
diff = diffForPath(r.diff, p)
|
lines = diffLinesForPath(r.diff, p)
|
||||||
if strings.TrimSpace(diff) == "" {
|
if len(lines) == 0 {
|
||||||
return fmt.Sprintf("(no diff hunks for path %q; check it against the changed-files list — the path must match a file the diff touches)", p), nil
|
return fmt.Sprintf("(no diff hunks for path %q; check it against the changed-files list — the path must match a file the diff touches)", p), nil
|
||||||
}
|
}
|
||||||
scope = "the diff for " + p
|
scope = "the diff for " + p
|
||||||
}
|
} else {
|
||||||
if strings.TrimSpace(diff) == "" {
|
lines = r.diffAllLines()
|
||||||
|
if len(lines) == 0 {
|
||||||
return "(empty diff)", nil
|
return "(empty diff)", nil
|
||||||
}
|
}
|
||||||
return windowDiff(diff, scope, args.StartLine, args.Limit), nil
|
}
|
||||||
|
return windowDiff(lines, scope, args.StartLine, args.Limit), nil
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// windowDiff returns a numbered, paginated slice of a unified diff, mirroring
|
// diffAllLines returns the whole diff split into lines, cached so paging never
|
||||||
// read_file's caps (maxGetDiffLines / maxGetDiffBytes / maxLineLen) so a single
|
// re-splits the (possibly large) diff.
|
||||||
// get_diff call can never dump a multi-hundred-KB diff into the transcript — the
|
func (r *repoFS) diffAllLines() []string {
|
||||||
// amplifier behind the large-PR token burn. The full diff stays reachable by
|
r.diffOnce.Do(func() { r.diffLines = splitDiffLines(r.diff) })
|
||||||
// paging with start_line, or scoped per file via the path arg.
|
return r.diffLines
|
||||||
func windowDiff(diff, scope string, start, limit int) string {
|
}
|
||||||
|
|
||||||
|
// splitDiffLines splits a unified diff into lines, dropping the single trailing
|
||||||
|
// empty element a trailing newline produces — otherwise windowDiff would emit a
|
||||||
|
// blank final line and over-count the total by one.
|
||||||
|
func splitDiffLines(diff string) []string {
|
||||||
lines := strings.Split(diff, "\n")
|
lines := strings.Split(diff, "\n")
|
||||||
|
if n := len(lines); n > 0 && lines[n-1] == "" {
|
||||||
|
lines = lines[:n-1]
|
||||||
|
}
|
||||||
|
return lines
|
||||||
|
}
|
||||||
|
|
||||||
|
// windowDiff returns a numbered, paginated slice of pre-split diff lines,
|
||||||
|
// mirroring read_file's caps (maxGetDiffLines / maxGetDiffBytes / maxLineLen) so
|
||||||
|
// a single get_diff call can never dump a multi-hundred-KB diff into the
|
||||||
|
// transcript — the amplifier behind the large-PR token burn. The full diff stays
|
||||||
|
// reachable by paging with start_line, or scoped per file via the path arg.
|
||||||
|
func windowDiff(lines []string, scope string, start, limit int) string {
|
||||||
total := len(lines)
|
total := len(lines)
|
||||||
if start < 1 {
|
if start < 1 {
|
||||||
start = 1
|
start = 1
|
||||||
@@ -458,26 +493,45 @@ func windowDiff(diff, scope string, start, limit int) string {
|
|||||||
emitted++
|
emitted++
|
||||||
}
|
}
|
||||||
if i < total {
|
if i < total {
|
||||||
fmt.Fprintf(&b, "... (%s truncated at line %d of %d; call get_diff again with start_line=%d for more, or pass a `path` to scope to one file)\n", scope, i, total, i+1)
|
// i (0-based) is the first line NOT emitted; line i was the last shown.
|
||||||
|
fmt.Fprintf(&b, "... (%s truncated after line %d of %d; call get_diff again with start_line=%d for the rest, or pass a `path` to scope to one file)\n", scope, i, total, i+1)
|
||||||
}
|
}
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
// diffForPath extracts the unified-diff section for one changed file: from the
|
// diffLinesForPath returns the unified-diff lines for one changed file: from the
|
||||||
// `diff --git` header that names the path through to the next `diff --git`
|
// `diff --git` header that names path through to the next header (or end). The
|
||||||
// header (or end). Matching is a substring on the header line, so either side
|
// header names two path tokens (a/<old> b/<new>); a match is on a WHOLE token,
|
||||||
// (a/<path> or b/<path>) hits, including a rename's two paths.
|
// so path "foo.go" does not pull in "barfoo.go" — and a trailing "/" scopes to a
|
||||||
func diffForPath(diff, path string) string {
|
// directory (e.g. "pkg/foo/" matches pkg/foo/bar.go).
|
||||||
var b strings.Builder
|
func diffLinesForPath(diff, path string) []string {
|
||||||
|
want := strings.TrimPrefix(strings.TrimPrefix(strings.TrimSpace(path), "a/"), "b/")
|
||||||
|
var out []string
|
||||||
inSection := false
|
inSection := false
|
||||||
for _, ln := range strings.Split(diff, "\n") {
|
for _, ln := range splitDiffLines(diff) {
|
||||||
if strings.HasPrefix(ln, "diff --git ") {
|
if strings.HasPrefix(ln, "diff --git ") {
|
||||||
inSection = strings.Contains(ln, path)
|
inSection = diffHeaderNames(ln, want)
|
||||||
}
|
}
|
||||||
if inSection {
|
if inSection {
|
||||||
b.WriteString(ln)
|
out = append(out, ln)
|
||||||
b.WriteByte('\n')
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return b.String()
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// diffHeaderNames reports whether a `diff --git a/X b/Y` header names want as one
|
||||||
|
// of its (a/b-stripped) path tokens — exact whole-token match, or a directory
|
||||||
|
// prefix when want ends with "/".
|
||||||
|
func diffHeaderNames(header, want string) bool {
|
||||||
|
fields := strings.Fields(header)
|
||||||
|
if len(fields) < 3 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, f := range fields[2:] {
|
||||||
|
p := strings.TrimPrefix(strings.TrimPrefix(f, "a/"), "b/")
|
||||||
|
if p == want || (strings.HasSuffix(want, "/") && strings.HasPrefix(p, want)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -247,7 +247,7 @@ func TestGetDiffTool_Paginates(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("get_diff: %v", err)
|
t.Fatalf("get_diff: %v", err)
|
||||||
}
|
}
|
||||||
if !strings.Contains(out, "truncated at line") {
|
if !strings.Contains(out, "truncated after line") {
|
||||||
t.Error("a diff longer than the per-call cap should be truncated with a paging hint")
|
t.Error("a diff longer than the per-call cap should be truncated with a paging hint")
|
||||||
}
|
}
|
||||||
if strings.Contains(out, "801\t") {
|
if strings.Contains(out, "801\t") {
|
||||||
@@ -263,6 +263,30 @@ func TestGetDiffTool_Paginates(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestDiffLinesForPath_Anchored: get_diff path= matches on a WHOLE path token,
|
||||||
|
// so "foo.go" never pulls in "barfoo.go" (the unanchored-substring weakness the
|
||||||
|
// swarm flagged), while a trailing "/" still scopes to a directory.
|
||||||
|
func TestDiffLinesForPath_Anchored(t *testing.T) {
|
||||||
|
diff := "diff --git a/foo.go b/foo.go\n+foo change\n" +
|
||||||
|
"diff --git a/barfoo.go b/barfoo.go\n+barfoo change\n" +
|
||||||
|
"diff --git a/pkg/x.go b/pkg/x.go\n+x change\n"
|
||||||
|
|
||||||
|
joined := strings.Join(diffLinesForPath(diff, "foo.go"), "\n")
|
||||||
|
if !strings.Contains(joined, "foo change") {
|
||||||
|
t.Errorf("foo.go should match its own hunk:\n%s", joined)
|
||||||
|
}
|
||||||
|
if strings.Contains(joined, "barfoo change") {
|
||||||
|
t.Errorf("foo.go must NOT match barfoo.go (unanchored substring regression):\n%s", joined)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.Contains(strings.Join(diffLinesForPath(diff, "pkg/"), "\n"), "x change") {
|
||||||
|
t.Error("a trailing-slash path should scope to the directory (pkg/ -> pkg/x.go)")
|
||||||
|
}
|
||||||
|
if len(diffLinesForPath(diff, "nope.go")) != 0 {
|
||||||
|
t.Error("an unknown path should yield no lines")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNewRepoFS_BadRoot(t *testing.T) {
|
func TestNewRepoFS_BadRoot(t *testing.T) {
|
||||||
// A file (not a directory) is rejected.
|
// A file (not a directory) is rejected.
|
||||||
f := filepath.Join(t.TempDir(), "afile")
|
f := filepath.Join(t.TempDir(), "afile")
|
||||||
|
|||||||
+15
-5
@@ -205,7 +205,9 @@ if [ "$HUGE_DIFF_BYTES" -gt 0 ] 2>/dev/null; then
|
|||||||
export GADFLY_MAX_STEPS="${GADFLY_HUGE_DIFF_MAX_STEPS:-12}"
|
export GADFLY_MAX_STEPS="${GADFLY_HUGE_DIFF_MAX_STEPS:-12}"
|
||||||
export GADFLY_RECHECK_MAX_STEPS="${GADFLY_HUGE_DIFF_RECHECK_MAX_STEPS:-8}"
|
export GADFLY_RECHECK_MAX_STEPS="${GADFLY_HUGE_DIFF_RECHECK_MAX_STEPS:-8}"
|
||||||
export GADFLY_RECHECK="${GADFLY_HUGE_DIFF_RECHECK:-0}" # skip recheck on huge PRs
|
export GADFLY_RECHECK="${GADFLY_HUGE_DIFF_RECHECK:-0}" # skip recheck on huge PRs
|
||||||
export MAX_DIFF_CHARS="${GADFLY_HUGE_DIFF_MAX_DIFF_CHARS:-20000}" # run.sh -> GADFLY_MAX_DIFF_CHARS
|
# The Go-visible name directly (run.sh prefers GADFLY_MAX_DIFF_CHARS over its
|
||||||
|
# own MAX_DIFF_CHARS), so the cap is honored without relying on run.sh's alias.
|
||||||
|
export GADFLY_MAX_DIFF_CHARS="${GADFLY_HUGE_DIFF_MAX_DIFF_CHARS:-20000}"
|
||||||
# Surfaced on each posted comment so the shallower review is self-explaining.
|
# Surfaced on each posted comment so the shallower review is self-explaining.
|
||||||
export GADFLY_NOTICE="⚠️ Large PR (${PR_DIFF_BYTES} bytes): Gadfly downshifted to a focused, single-model review to stay within budget — coverage is intentionally shallower. Consider splitting the PR for a deeper review."
|
export GADFLY_NOTICE="⚠️ Large PR (${PR_DIFF_BYTES} bytes): Gadfly downshifted to a focused, single-model review to stay within budget — coverage is intentionally shallower. Consider splitting the PR for a deeper review."
|
||||||
fi
|
fi
|
||||||
@@ -333,7 +335,7 @@ fi
|
|||||||
# findings were gathered are still posted and the job never fails (advisory).
|
# findings were gathered are still posted and the job never fails (advisory).
|
||||||
# GADFLY_PR_BUDGET_SECS=0 (default) disables it.
|
# GADFLY_PR_BUDGET_SECS=0 (default) disables it.
|
||||||
KILLER_PID=""
|
KILLER_PID=""
|
||||||
rm -f "${WORKDIR}/.budget_killed" 2>/dev/null || true
|
rm -f "${WORKDIR}/.budget_killed" "${WORKDIR}/.disarmed" 2>/dev/null || true
|
||||||
if [ "${GADFLY_PR_BUDGET_SECS:-0}" -gt 0 ] 2>/dev/null; then
|
if [ "${GADFLY_PR_BUDGET_SECS:-0}" -gt 0 ] 2>/dev/null; then
|
||||||
(
|
(
|
||||||
sleep "${GADFLY_PR_BUDGET_SECS}"
|
sleep "${GADFLY_PR_BUDGET_SECS}"
|
||||||
@@ -342,7 +344,10 @@ if [ "${GADFLY_PR_BUDGET_SECS:-0}" -gt 0 ] 2>/dev/null; then
|
|||||||
pkill -TERM -f '/usr/local/bin/gadfly' 2>/dev/null || true
|
pkill -TERM -f '/usr/local/bin/gadfly' 2>/dev/null || true
|
||||||
pkill -TERM -f "${SCRIPTS_DIR}/run.sh" 2>/dev/null || true
|
pkill -TERM -f "${SCRIPTS_DIR}/run.sh" 2>/dev/null || true
|
||||||
sleep 5
|
sleep 5
|
||||||
pkill -KILL -f '/usr/local/bin/gadfly' 2>/dev/null || true
|
# Guard the delayed SIGKILL on the disarm marker: once the lanes finished and
|
||||||
|
# the watchdog was disarmed, the consolidation gadfly pass runs next, and a
|
||||||
|
# name-based KILL here must NOT catch it.
|
||||||
|
[ -f "${WORKDIR}/.disarmed" ] || pkill -KILL -f '/usr/local/bin/gadfly' 2>/dev/null || true
|
||||||
) &
|
) &
|
||||||
KILLER_PID=$!
|
KILLER_PID=$!
|
||||||
log "PR budget watchdog armed (${GADFLY_PR_BUDGET_SECS}s, pid ${KILLER_PID})"
|
log "PR budget watchdog armed (${GADFLY_PR_BUDGET_SECS}s, pid ${KILLER_PID})"
|
||||||
@@ -360,8 +365,13 @@ done
|
|||||||
[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}"
|
[ "${#LANE_PIDS[@]}" -gt 0 ] && wait "${LANE_PIDS[@]}"
|
||||||
|
|
||||||
# Reviews finished (or the watchdog killed them): disarm the watchdog so its
|
# Reviews finished (or the watchdog killed them): disarm the watchdog so its
|
||||||
# delayed SIGKILL can't catch the consolidation pass that runs next.
|
# delayed SIGKILL can't catch the consolidation pass that runs next. Drop the
|
||||||
if [ -n "$KILLER_PID" ]; then kill "$KILLER_PID" 2>/dev/null || true; fi
|
# disarm marker FIRST so even a racing watchdog that already reached its KILL line
|
||||||
|
# skips it (the kill below also tears the watchdog subshell down during its sleep).
|
||||||
|
if [ -n "$KILLER_PID" ]; then
|
||||||
|
: > "${WORKDIR}/.disarmed"
|
||||||
|
kill "$KILLER_PID" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
# If the backstop fired, note it on the consensus comment (per-model comments
|
# If the backstop fired, note it on the consensus comment (per-model comments
|
||||||
# were already posted during the run; a killed model surfaces as a failed lane).
|
# were already posted during the run; a killed model surfaces as a failed lane).
|
||||||
|
|||||||
+1
-1
@@ -177,7 +177,7 @@ case "$PROVIDER" in
|
|||||||
GADFLY_SYSTEM_FILE="${SCRIPT_DIR}/system-prompt.txt" \
|
GADFLY_SYSTEM_FILE="${SCRIPT_DIR}/system-prompt.txt" \
|
||||||
GADFLY_TITLE="$TITLE" \
|
GADFLY_TITLE="$TITLE" \
|
||||||
GADFLY_BODY="$BODY" \
|
GADFLY_BODY="$BODY" \
|
||||||
GADFLY_MAX_DIFF_CHARS="$MAX_DIFF_CHARS" \
|
GADFLY_MAX_DIFF_CHARS="${GADFLY_MAX_DIFF_CHARS:-$MAX_DIFF_CHARS}" \
|
||||||
GADFLY_STATUS_FILE="${GADFLY_STATUS_FILE:-}" \
|
GADFLY_STATUS_FILE="${GADFLY_STATUS_FILE:-}" \
|
||||||
GADFLY_FINDINGS_OUT="${GADFLY_FINDINGS_OUT:-}" \
|
GADFLY_FINDINGS_OUT="${GADFLY_FINDINGS_OUT:-}" \
|
||||||
"$BIN" 2>"$ERR_FILE"
|
"$BIN" 2>"$ERR_FILE"
|
||||||
|
|||||||
Reference in New Issue
Block a user