feat(run): execute multi-phase pipelines (RunnableAgent.Phases)
executus CI / test (pull_request) Successful in 1m49s
Adversarial Review (Gadfly) / review (pull_request) Successful in 13m59s

The kernel carried RunnableAgent.Phases as a DTO but never executed it —
Run always ran a single agent loop with ra.SystemPrompt, so a phased agent
(mort's deepresearch/research) silently ran one loop with the base prompt
instead of its pipeline. This implements the phase loop, ported from mort's
agentexec pipeline but reusing the kernel's own machinery.

- run/phases.go: runPhases / runOnePhase. Phases run sequentially; each is a
  fresh agent loop (or a bare LLM call for IsRunFunc phases) with its own
  template-expanded system prompt ({{.Query}} + {{.<PhaseName>}}), model
  tier, step cap, and tool subset. Outputs thread into later phases; the
  final phase's output is the run output. Optional phases swallow errors and
  substitute FallbackMessage; a non-optional phase that merely exhausts its
  step/tool budget salvages its partial transcript and continues (a hard
  error still aborts); per-phase tier-resolve failures fall back with a WARN.
- run/agent.go: Phase gains IsRunFunc + FallbackMessage (the kernel Phase
  struct previously omitted them).
- run/executor.go: Run factors the shared agent options (tool-error limits,
  step observer, compactor) and branches — single loop (critic's dynamic
  step ceiling) vs the phase runner (fixed per-phase caps; the run-level
  critic's steer + hard deadline still apply across phases). systemPrompt
  now delegates to systemPromptWithBody so each phase keeps the platform
  header. The same step observer feeds audit/steps/critic across all phases.

Tests (run/phases_test.go): sequential output threading + template
expansion, Optional-failure → FallbackMessage continues, hard-error abort,
IsRunFunc bare call, per-phase SystemHeader, filterToolbox subset, template
expansion. Full ./... suite green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-29 15:14:45 -04:00
parent b25a13ed4f
commit 30b79a330f
4 changed files with 603 additions and 15 deletions
+44 -12
View File
@@ -289,12 +289,10 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
}
}
opts := []agent.Option{
agent.WithToolbox(toolbox),
// Step ceiling: a fixed WithMaxSteps(maxIter) normally, but when a critic is
// active it owns a DYNAMIC ceiling (WithMaxStepsFunc) so it can raise a
// healthy-but-long run's budget mid-flight. Falls back to maxIter.
critic.maxStepsOption(maxIter),
// Shared agent options used by BOTH the single-loop path and every phase: the
// tool-error guards, the step observer, and optional compaction. The toolbox +
// step ceiling are NOT shared (they vary per phase), so they're added per path.
sharedOpts := []agent.Option{
agent.WithToolErrorLimits(e.cfg.Defaults.MaxConsecutiveToolErrors, e.cfg.Defaults.MaxSameToolCallRepeats),
agent.WithStepObserver(stepObserver),
}
@@ -313,11 +311,10 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
})
}
}
opts = append(opts, agent.WithCompactor(e.cfg.Compactor(threshold, onFire)))
sharedOpts = append(sharedOpts, agent.WithCompactor(e.cfg.Compactor(threshold, onFire)))
}
}
ag := agent.New(model, e.systemPrompt(ra), opts...)
// Stage non-image input attachments (audio/PDF/binary) into the host file
// store and fold an [ATTACHED FILES] descriptor into the prompt so the agent
// can reach them by file_id. No-op when Ports.InputFiles is nil or there are
@@ -327,7 +324,35 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
// One WithSteer drains BOTH the session mailbox (a tool's AttachImages) and
// the critic's nudges before each step.
steer := func() []llm.Message { return append(mailbox.drain(), critic.drainSteer()...) }
runRes, runErr := runAgent(runCtx, ag, input, inv.Images, agent.WithSteer(steer))
var runRes *agent.Result
var runErr error
if len(ra.Phases) == 0 {
// Single-loop run: the agent's base prompt + full toolbox, with the
// critic's DYNAMIC step ceiling (WithMaxStepsFunc, so it can raise a
// healthy-but-long run's budget mid-flight; falls back to maxIter).
opts := append([]agent.Option{
agent.WithToolbox(toolbox),
critic.maxStepsOption(maxIter),
}, sharedOpts...)
ag := agent.New(model, e.systemPrompt(ra), opts...)
runRes, runErr = runAgent(runCtx, ag, input, inv.Images, agent.WithSteer(steer))
} else {
// Multi-phase pipeline: each phase runs its own prompt/tier/tools/step-cap
// sequentially, threading outputs through {{.<PhaseName>}} templates. Reuses
// the shared opts so audit/steps/critic-steer accumulate across every phase.
// (Per-phase step caps are fixed — the critic's dynamic ceiling is not
// propagated to phases — but its steer + hard deadline still apply.)
runRes, runErr = e.runPhases(runCtx, ra, phaseDeps{
baseModel: model,
baseTier: tier,
baseToolbox: toolbox,
baseMaxIter: maxIter,
sharedOpts: sharedOpts,
steer: steer,
rec: rec,
}, input, inv.Images)
}
status := statusFor(runCtx, runErr)
if runRes != nil {
@@ -403,13 +428,20 @@ func (e *Executor) finishAudit(ctx context.Context, rec RunRecorder, status stri
}
func (e *Executor) systemPrompt(ra RunnableAgent) string {
return e.systemPromptWithBody(ra.SystemPrompt)
}
// systemPromptWithBody composes the optional platform header with an arbitrary
// body. The single-loop path passes ra.SystemPrompt; the phase runner passes a
// phase's expanded instructions, so each phase keeps the platform header.
func (e *Executor) systemPromptWithBody(body string) string {
if e.cfg.SystemHeader == "" {
return ra.SystemPrompt
return body
}
if ra.SystemPrompt == "" {
if body == "" {
return e.cfg.SystemHeader
}
return e.cfg.SystemHeader + "\n\n" + ra.SystemPrompt
return e.cfg.SystemHeader + "\n\n" + body
}
// compactionThreshold returns the token threshold for the tier's model context