e856dacc12
Plugs into run.Ports.Checkpointer (the executor's call site is a P2 follow-up;
this provides the seam + impls ahead of it):
- checkpoint.go: CheckpointStore seam + RunCheckpoint{Meta, Messages, Iteration,
ActivePhase} + RunCheckpointMeta (mirrors mort's agentexec types).
- handle.go: New(store, meta, throttle, now) -> run.Checkpointer. Save writes a
throttled snapshot; Complete/Fail delete it (a cleanly finished or terminally
failed run is NOT a recovery candidate; a shutdown-interrupted run never calls
them, so its checkpoint survives ListInterrupted at boot). nil store -> no-op.
- memory.go: NewMemory() default (with the honest caveat that in-memory does
not survive the restart it exists to recover from — a durable store is mort's).
Tests: save+complete clears the recovery candidate; throttle skips in-window
saves; nil-store is a clean no-op. Core imports ZERO from checkpoint.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
72 lines
2.2 KiB
Go
72 lines
2.2 KiB
Go
package checkpoint
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/run"
|
|
)
|
|
|
|
// handle is a per-run run.Checkpointer bound to one run's id + meta. Save writes
|
|
// a fresh snapshot (throttled), Complete/Fail delete the checkpoint (a cleanly
|
|
// finished or terminally failed run is NOT a recovery candidate). A run
|
|
// interrupted by shutdown never calls Complete/Fail, so its checkpoint survives
|
|
// for ListInterrupted at boot.
|
|
type handle struct {
|
|
store CheckpointStore
|
|
meta RunCheckpointMeta
|
|
throttle time.Duration
|
|
now func() time.Time
|
|
|
|
mu sync.Mutex
|
|
lastSave time.Time
|
|
}
|
|
|
|
var _ run.Checkpointer = (*handle)(nil)
|
|
|
|
// New returns a run.Checkpointer that persists snapshots of the run identified
|
|
// by meta.RunID to store, no more often than throttle (Save calls inside the
|
|
// window are skipped). A nil store yields a no-op Checkpointer. throttle <= 0
|
|
// saves every call; now defaults to time.Now.
|
|
func New(store CheckpointStore, meta RunCheckpointMeta, throttle time.Duration, now func() time.Time) run.Checkpointer {
|
|
if store == nil {
|
|
return noop{}
|
|
}
|
|
if now == nil {
|
|
now = time.Now
|
|
}
|
|
return &handle{store: store, meta: meta, throttle: throttle, now: now}
|
|
}
|
|
|
|
func (h *handle) Save(ctx context.Context, st run.RunCheckpointState) error {
|
|
h.mu.Lock()
|
|
now := h.now()
|
|
if h.throttle > 0 && !h.lastSave.IsZero() && now.Sub(h.lastSave) < h.throttle {
|
|
h.mu.Unlock()
|
|
return nil // throttled — a more recent snapshot will land shortly
|
|
}
|
|
h.lastSave = now
|
|
h.mu.Unlock()
|
|
|
|
return h.store.Save(ctx, RunCheckpoint{
|
|
Meta: h.meta,
|
|
Messages: st.Messages,
|
|
Iteration: st.Iteration,
|
|
UpdatedAt: now,
|
|
})
|
|
}
|
|
|
|
func (h *handle) Complete(ctx context.Context) error { return h.store.Delete(ctx, h.meta.RunID) }
|
|
|
|
func (h *handle) Fail(ctx context.Context, _ error) error { return h.store.Delete(ctx, h.meta.RunID) }
|
|
|
|
// noop is the nil-store Checkpointer: every method is a successful no-op.
|
|
type noop struct{}
|
|
|
|
var _ run.Checkpointer = noop{}
|
|
|
|
func (noop) Save(context.Context, run.RunCheckpointState) error { return nil }
|
|
func (noop) Complete(context.Context) error { return nil }
|
|
func (noop) Fail(context.Context, error) error { return nil }
|