feat(run): execute multi-phase pipelines (RunnableAgent.Phases)

The kernel carried RunnableAgent.Phases as a DTO but never executed it — Run always ran a single agent loop with ra.SystemPrompt, so a phased agent (mort's deepresearch/research) silently ran one loop with the base prompt instead of its pipeline. This implements the phase loop, ported from mort's agentexec pipeline but reusing the kernel's own machinery. - run/phases.go: runPhases / runOnePhase. Phases run sequentially; each is a fresh agent loop (or a bare LLM call for IsRunFunc phases) with its own template-expanded system prompt ({{.Query}} + {{.<PhaseName>}}), model tier, step cap, and tool subset. Outputs thread into later phases; the final phase's output is the run output. Optional phases swallow errors and substitute FallbackMessage; a non-optional phase that merely exhausts its step/tool budget salvages its partial transcript and continues (a hard error still aborts); per-phase tier-resolve failures fall back with a WARN. - run/agent.go: Phase gains IsRunFunc + FallbackMessage (the kernel Phase struct previously omitted them). - run/executor.go: Run factors the shared agent options (tool-error limits, step observer, compactor) and branches — single loop (critic's dynamic step ceiling) vs the phase runner (fixed per-phase caps; the run-level critic's steer + hard deadline still apply across phases). systemPrompt now delegates to systemPromptWithBody so each phase keeps the platform header. The same step observer feeds audit/steps/critic across all phases. Tests (run/phases_test.go): sequential output threading + template expansion, Optional-failure → FallbackMessage continues, hard-error abort, IsRunFunc bare call, per-phase SystemHeader, filterToolbox subset, template expansion. Full ./... suite green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-29 15:14:45 -04:00
parent b25a13ed4f
commit 30b79a330f
4 changed files with 603 additions and 15 deletions
@@ -289,12 +289,10 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 		}
 	}

-	opts := []agent.Option{
-		agent.WithToolbox(toolbox),
-		// Step ceiling: a fixed WithMaxSteps(maxIter) normally, but when a critic is
-		// active it owns a DYNAMIC ceiling (WithMaxStepsFunc) so it can raise a
-		// healthy-but-long run's budget mid-flight. Falls back to maxIter.
-		critic.maxStepsOption(maxIter),
+	// Shared agent options used by BOTH the single-loop path and every phase: the
+	// tool-error guards, the step observer, and optional compaction. The toolbox +
+	// step ceiling are NOT shared (they vary per phase), so they're added per path.
+	sharedOpts := []agent.Option{
 		agent.WithToolErrorLimits(e.cfg.Defaults.MaxConsecutiveToolErrors, e.cfg.Defaults.MaxSameToolCallRepeats),
 		agent.WithStepObserver(stepObserver),
 	}
@@ -313,11 +311,10 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 					})
 				}
 			}
-			opts = append(opts, agent.WithCompactor(e.cfg.Compactor(threshold, onFire)))
+			sharedOpts = append(sharedOpts, agent.WithCompactor(e.cfg.Compactor(threshold, onFire)))
 		}
 	}

-	ag := agent.New(model, e.systemPrompt(ra), opts...)
 	// Stage non-image input attachments (audio/PDF/binary) into the host file
 	// store and fold an [ATTACHED FILES] descriptor into the prompt so the agent
 	// can reach them by file_id. No-op when Ports.InputFiles is nil or there are
@@ -327,7 +324,35 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
 	// One WithSteer drains BOTH the session mailbox (a tool's AttachImages) and
 	// the critic's nudges before each step.
 	steer := func() []llm.Message { return append(mailbox.drain(), critic.drainSteer()...) }
-	runRes, runErr := runAgent(runCtx, ag, input, inv.Images, agent.WithSteer(steer))
+
+	var runRes *agent.Result
+	var runErr error
+	if len(ra.Phases) == 0 {
+		// Single-loop run: the agent's base prompt + full toolbox, with the
+		// critic's DYNAMIC step ceiling (WithMaxStepsFunc, so it can raise a
+		// healthy-but-long run's budget mid-flight; falls back to maxIter).
+		opts := append([]agent.Option{
+			agent.WithToolbox(toolbox),
+			critic.maxStepsOption(maxIter),
+		}, sharedOpts...)
+		ag := agent.New(model, e.systemPrompt(ra), opts...)
+		runRes, runErr = runAgent(runCtx, ag, input, inv.Images, agent.WithSteer(steer))
+	} else {
+		// Multi-phase pipeline: each phase runs its own prompt/tier/tools/step-cap
+		// sequentially, threading outputs through {{.<PhaseName>}} templates. Reuses
+		// the shared opts so audit/steps/critic-steer accumulate across every phase.
+		// (Per-phase step caps are fixed — the critic's dynamic ceiling is not
+		// propagated to phases — but its steer + hard deadline still apply.)
+		runRes, runErr = e.runPhases(runCtx, ra, phaseDeps{
+			baseModel:   model,
+			baseTier:    tier,
+			baseToolbox: toolbox,
+			baseMaxIter: maxIter,
+			sharedOpts:  sharedOpts,
+			steer:       steer,
+			rec:         rec,
+		}, input, inv.Images)
+	}

 	status := statusFor(runCtx, runErr)
 	if runRes != nil {
@@ -403,13 +428,20 @@ func (e *Executor) finishAudit(ctx context.Context, rec RunRecorder, status stri
 }

 func (e *Executor) systemPrompt(ra RunnableAgent) string {
+	return e.systemPromptWithBody(ra.SystemPrompt)
+}
+
+// systemPromptWithBody composes the optional platform header with an arbitrary
+// body. The single-loop path passes ra.SystemPrompt; the phase runner passes a
+// phase's expanded instructions, so each phase keeps the platform header.
+func (e *Executor) systemPromptWithBody(body string) string {
 	if e.cfg.SystemHeader == "" {
-		return ra.SystemPrompt
+		return body
 	}
-	if ra.SystemPrompt == "" {
+	if body == "" {
 		return e.cfg.SystemHeader
 	}
-	return e.cfg.SystemHeader + "\n\n" + ra.SystemPrompt
+	return e.cfg.SystemHeader + "\n\n" + body
 }

 // compactionThreshold returns the token threshold for the tier's model context