C0b: wire Critic + Delivery into run.Executor
executus CI / test (pull_request) Failing after 1m0s
Adversarial Review (Gadfly) / review (pull_request) Successful in 5m9s

Continues finishing the executor's run.Ports wiring (after C0's Palette).

Critic (run/critic.go): when Ports.Critic is set and the agent enables it, the
executor calls Monitor at run start, feeds RecordStep/RecordToolStart from the
step observer, drains the critic's Steer messages into the loop via
agent.WithSteer, and binds the run's hard cancellation to the critic's
(extendable) Deadline through a watch goroutine — a healthy-but-slow run gets
room while a hung one is killed. Stop() on run end. Soft timeout from
Defaults.CriticSoftTimeout (default 90s). nil-safe: no critic / not-enabled =
no-op.

Delivery (run/executor.go deliver): after the run, when Ports.Delivery is set
and inv.DeliveryID is non-empty, the executor posts Result.Output (or
DeliverError on failure) to a host-interpreted deliver.Target
{inv.DeliveryKind, inv.DeliveryID}. Empty target = caller reads Result.Output
itself (the synchronous default; the `.agent run` canary). Best-effort +
detached.

tool.Invocation gains DeliveryKind/DeliveryID (host-set egress target).

Tests: critic monitored/fed/steered/stopped when enabled, untouched when not;
delivery posts on a target, skips without one. Deferred: Checkpointer (needs a
majordomo hook to snapshot the running message history).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-27 10:00:05 -04:00
parent 0c80679719
commit 43b2471737
6 changed files with 317 additions and 14 deletions
+43 -11
View File
@@ -10,6 +10,7 @@ import (
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
"gitea.stevedudenhoeffer.com/steve/executus/compact"
"gitea.stevedudenhoeffer.com/steve/executus/deliver"
"gitea.stevedudenhoeffer.com/steve/executus/tool"
)
@@ -27,6 +28,7 @@ type Defaults struct {
MaxConsecutiveToolErrors int // loop guard; default 3
MaxSameToolCallRepeats int // retry-storm guard; default 3
CompactionThresholdRatio float64 // fraction of model context to compact at; default 0.7
CriticSoftTimeout time.Duration // idle window before the critic wakes; default 90s
}
func (d Defaults) withFallbacks() Defaults {
@@ -48,6 +50,9 @@ func (d Defaults) withFallbacks() Defaults {
if d.CompactionThresholdRatio <= 0 {
d.CompactionThresholdRatio = 0.7
}
if d.CriticSoftTimeout <= 0 {
d.CriticSoftTimeout = 90 * time.Second
}
return d
}
@@ -141,19 +146,20 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
// Audit start (optional). The recorder satisfies RunTally; stamp it on the
// invocation so a self-status tool can read live progress.
info := RunInfo{
RunID: inv.RunID,
SubjectID: ra.ID,
Name: ra.Name,
CallerID: inv.CallerID,
ChannelID: inv.ChannelID,
ParentRunID: inv.ParentRunID,
Inputs: inv.SkillInputs,
StartedAt: started,
}
var rec RunRecorder
var stateAcc *RunStateAccessor
if e.cfg.Ports.Audit != nil {
rec = e.cfg.Ports.Audit.StartRun(ctx, RunInfo{
RunID: inv.RunID,
SubjectID: ra.ID,
Name: ra.Name,
CallerID: inv.CallerID,
ChannelID: inv.ChannelID,
ParentRunID: inv.ParentRunID,
Inputs: inv.SkillInputs,
StartedAt: started,
})
rec = e.cfg.Ports.Audit.StartRun(ctx, info)
}
if rec != nil {
stateAcc = NewRunStateAccessor(rec, maxIter, 0, started)
@@ -186,6 +192,12 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
runCtx, mergeCancel := MergeCancellation(runCtx, ctx)
defer mergeCancel()
// Critic (optional): monitors the run for a stall, can nudge/extend/kill via
// its host Escalator. Its hard deadline is bound to runCtx (cancel on pass).
// nil-safe: no-op when no critic is configured or the agent doesn't enable it.
critic, stopCritic := e.startCritic(runCtx, cancel, ra, info)
defer stopCritic()
// Step instrumentation: accumulate Result.Steps + fire inv.OnStep, feed the
// audit recorder, and keep the live iteration counter fresh. majordomo's
// step observer hands us each completed iteration; we zip the model's tool
@@ -200,6 +212,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
if rec != nil {
rec.OnStep(s.Index, s.Response)
}
critic.recordStep(s.Index) // keep the critic's activity clock fresh
var calls []llm.ToolCall
if s.Response != nil {
calls = s.Response.ToolCalls
@@ -210,6 +223,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
}
for i := 0; i < n; i++ {
call, r := calls[i], s.Results[i]
critic.recordToolStart(call.Name, string(call.Arguments))
emitter.toolStart(runCtx, call.Name, call.Arguments)
emitter.toolEnd(runCtx, call, r.Content, r.IsError)
if rec != nil {
@@ -244,7 +258,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
}
ag := agent.New(model, e.systemPrompt(ra), opts...)
runRes, runErr := ag.Run(runCtx, input)
runRes, runErr := ag.Run(runCtx, input, critic.steerOptions()...)
status := statusFor(runErr)
if runRes != nil {
@@ -258,6 +272,7 @@ func (e *Executor) Run(ctx context.Context, ra RunnableAgent, inv tool.Invocatio
if e.cfg.Ports.Budget != nil {
e.cfg.Ports.Budget.Commit(detach(ctx), inv.CallerID, time.Since(started).Seconds())
}
e.deliver(ctx, inv, res, runErr)
return res
}
@@ -316,6 +331,23 @@ func (e *Executor) compactionThreshold(tier string) int {
return int(float64(max) * e.cfg.Defaults.CompactionThresholdRatio)
}
// deliver posts the run's output (or error) via run.Ports.Delivery when both a
// Delivery and a target (inv.DeliveryID) are set. No target = the caller reads
// Result.Output itself (the synchronous default). Best-effort + detached: a
// delivery failure must not change the run's outcome.
func (e *Executor) deliver(ctx context.Context, inv tool.Invocation, res Result, runErr error) {
if e.cfg.Ports.Delivery == nil || inv.DeliveryID == "" {
return
}
target := deliver.Target{Kind: inv.DeliveryKind, ID: inv.DeliveryID}
dctx := detach(ctx)
if runErr != nil {
_ = e.cfg.Ports.Delivery.DeliverError(dctx, target, runErr)
return
}
_, _ = e.cfg.Ports.Delivery.Deliver(dctx, target, res.Output, nil)
}
// detach derives a bounded cleanup context off ctx, detached from its
// cancellation, for post-run writes. The cancel is intentionally not returned;
// CleanupContextTimeout bounds the lifetime.