// Package run is executus's run kernel: the shared run-loop mechanics around // majordomo's agent loop, plus the host seams (run.Ports / RunnableAgent) that // let one executor serve every surface — a light host's bounded one-shot run, // a heavy host's persona agent or saved skill — without the kernel importing a // battery. // // This file holds the genuinely-identical scaffolding both run shapes need: // context cancellation merging, the detached-cleanup timeout, the per-run // progress accessor the self-status tool reads, the legacy `submit` // compatibility tool (submit.go), the ancestor progress bridge (progress.go), // and the run-finalizer machinery — one source of truth. // // The kernel depends only on majordomo + executus/tool + the run.Ports // interfaces; persistence, audit, the persona/skill nouns, and the critic are // host-supplied via Ports (see ports.go) so importing the kernel never drags in // a store or a battery. package run import ( "context" "errors" "log/slog" "sync/atomic" "time" "gitea.stevedudenhoeffer.com/steve/executus/tool" ) // ErrShutdown is the cancellation cause set on mort's base lifecycle context // when the process is shutting down (SIGTERM after the drain window). The // agent executor uses it to distinguish a run interrupted by shutdown (which // should be left durable-recoverable) from a run that errored or hit its own // deadline (terminal). var ErrShutdown = errors.New("mort: shutting down") // CleanupContextTimeout caps how long a run's post-completion cleanup ops // (budget commit, audit Close, attachment bookkeeping) may wait on // storage after detaching from the caller's — possibly already // cancelled — context. 10s is generous for a single-row UPDATE against // MySQL; longer suggests a hung connection the run goroutine shouldn't // keep waiting on. Both executors derive their cleanup contexts as // context.WithTimeout(context.WithoutCancel(ctx), CleanupContextTimeout). const CleanupContextTimeout = 10 * time.Second // Reserved state-react lifecycle event keys, shared so both nouns surface // the same UX shape. Namespaced with double-underscores to make accidental // collision with a tool name near-impossible. const ( StateReactStart = "__start__" StateReactEnd = "__end__" StateReactError = "__error__" StateReactBudgetExceeded = "__budget_exceeded__" ) // MergeCancellation returns a context cancelled when EITHER input is // cancelled, propagating the cancellation Cause from whichever fired. Used // by the lane preemption path (the lane's per-job ctx.Cause flows into the // run context) and by the runtime-detach path (process shutdown still // reaches a run whose deadline was reset after a lane wait). Always call // the returned cancel to release the watcher goroutine; it is also invoked // once when either input fires. func MergeCancellation(parent, secondary context.Context) (context.Context, context.CancelFunc) { merged, cancel := context.WithCancelCause(parent) go func() { select { case <-merged.Done(): return case <-secondary.Done(): cancel(context.Cause(secondary)) } }() return merged, func() { cancel(nil) } } // RunFinalizer is invoked at run finish so per-run tool state (open HTTP // streams, per-run code_exec counters, per-run search budgets) is released // and the process-lifetime maps keyed by run id don't grow unbounded. // Both executors fire their registered finalizers via FireFinalizers. type RunFinalizer interface { FinalizeRun(runID string) } // FireFinalizers runs every finalizer for runID, isolating each behind a // panic-recover so one buggy finalizer can't take down the run goroutine // or skip the others. Safe to call with a nil/empty slice. func FireFinalizers(fs []RunFinalizer, runID string) { for _, f := range fs { if f == nil { continue } func() { defer func() { if r := recover(); r != nil { slog.Error("runengine: run finalizer panicked", "run_id", runID, "panic", r) } }() f.FinalizeRun(runID) }() } } // RunTally is the narrow live-progress source the RunStateAccessor reads — // the running token and tool-call counts for the in-flight run. The audit // battery's writer satisfies it; this interface is how the run kernel reads // live tallies without importing the audit package (the inversion of mort's // direct *skillaudit.Writer dependency). type RunTally interface { // TokenStats returns the running input, output, and thinking token totals. TokenStats() (in, out, thinking int64) // ToolCallsCount returns the number of tool calls executed so far. ToolCallsCount() int } // RunStateAccessor is the per-run live-progress accessor the executor // stamps on Invocation.RunState before building the toolbox, so the // self-status tool can report iteration / tool-calls / tokens / elapsed for // the in-flight run. Construct with NewRunStateAccessor; the executor's step // observer calls SetIteration each loop. type RunStateAccessor struct { tally RunTally iter atomic.Int32 maxIter int maxCalls int startedAt time.Time } // NewRunStateAccessor builds the accessor. writer supplies the live token // + tool-call tallies; maxIter / maxCalls are the reported caps (0 = // uncapped); startedAt anchors the elapsed clock. func NewRunStateAccessor(tally RunTally, maxIter, maxCalls int, startedAt time.Time) *RunStateAccessor { return &RunStateAccessor{ tally: tally, maxIter: maxIter, maxCalls: maxCalls, startedAt: startedAt, } } // SetIteration records the current agent-loop iteration (called from the // executor's step observer). func (a *RunStateAccessor) SetIteration(iter int) { a.iter.Store(int32(iter)) } // RunState satisfies tool.RunStateAccessor. func (a *RunStateAccessor) RunState() tool.RunState { in, out, think := a.tally.TokenStats() return tool.RunState{ Iteration: int(a.iter.Load()), MaxIterations: a.maxIter, ToolCalls: a.tally.ToolCallsCount(), MaxToolCalls: a.maxCalls, InputTokens: in, OutputTokens: out, ThinkingTokens: think, ElapsedSeconds: int(time.Since(a.startedAt).Seconds()), } }