package run import ( "context" "errors" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // Durable-recovery plumbing for the executor. The Checkpointer port (set via // Ports.Checkpointer, a CheckpointerFactory) persists a run's resumable progress // during the loop; on boot a host re-dispatches an interrupted run through the // executor with a ResumeState (the saved transcript / completed phases) so it // CONTINUES rather than restarting, reusing the SAME durable record via an // existing Checkpointer. Both are carried into Run via the context (mirrors // mort's agentexec.WithResumeState / WithExistingCheckpointer). // ResumeState carries a recovered run's prior progress into Run so the run // continues instead of restarting. The host's recovery path sets it via // WithResumeState; the executor reads it (single-loop seeds the saved transcript // as history; multi-phase skips completed phases and seeds the active phase). type ResumeState struct { History []llm.Message // single-loop transcript OR active-phase transcript CompletedPhases []PhaseOutput // multi-phase: outputs of finished phases, in order ActivePhase string // multi-phase: the phase that was in flight } type resumeStateKey struct{} // WithResumeState carries a recovered run's prior progress into Run. func WithResumeState(ctx context.Context, rs *ResumeState) context.Context { return context.WithValue(ctx, resumeStateKey{}, rs) } func resumeStateFromContext(ctx context.Context) *ResumeState { rs, _ := ctx.Value(resumeStateKey{}).(*ResumeState) return rs } type existingCheckpointerKey struct{} // WithExistingCheckpointer carries a pre-existing Checkpointer into Run so a // recovery re-run reuses the SAME durable record (the executor uses it instead of // calling Ports.Checkpointer.Begin). func WithExistingCheckpointer(ctx context.Context, cp Checkpointer) context.Context { return context.WithValue(ctx, existingCheckpointerKey{}, cp) } func existingCheckpointerFromContext(ctx context.Context) Checkpointer { cp, _ := ctx.Value(existingCheckpointerKey{}).(Checkpointer) return cp } // checkpointOutcome is the finalize decision for a durable run. type checkpointOutcome int const ( checkpointComplete checkpointOutcome = iota checkpointLeaveRunning checkpointFail ) // classifyCheckpointOutcome maps (run error, cancellation cause) to the durable // finalize action: success clears the checkpoint (Complete); a shutdown-caused // cancellation leaves the record so boot recovery picks it up (neither // Complete nor Fail); anything else (model error, tool loop, the run's own // deadline, a critic kill, a caller cancel) is terminal (Fail). Mirrors mort's // agentexec.classifyCheckpointOutcome. func classifyCheckpointOutcome(runErr, cause error) checkpointOutcome { switch { case runErr == nil: return checkpointComplete case errors.Is(cause, ErrShutdown): return checkpointLeaveRunning default: return checkpointFail } } // finalizeCheckpoint applies the outcome to the per-run checkpointer (nil-safe). // Runs on a detached context so a cancelled run still records its terminal state. func finalizeCheckpoint(ctx context.Context, cp Checkpointer, runErr error, cause error) { if cp == nil { return } switch classifyCheckpointOutcome(runErr, cause) { case checkpointComplete: _ = cp.Complete(detach(ctx)) case checkpointFail: _ = cp.Fail(detach(ctx), runErr) case checkpointLeaveRunning: // Interrupted by shutdown: leave the record for boot recovery. } }