package checkpoint import ( "context" "sync" "time" "gitea.stevedudenhoeffer.com/steve/executus/run" ) // handle is a per-run run.Checkpointer bound to one run's id + meta. Save writes // a fresh snapshot (throttled), Complete/Fail delete the checkpoint (a cleanly // finished or terminally failed run is NOT a recovery candidate). A run // interrupted by shutdown never calls Complete/Fail, so its checkpoint survives // for ListInterrupted at boot. type handle struct { store CheckpointStore meta RunCheckpointMeta throttle time.Duration now func() time.Time mu sync.Mutex lastSave time.Time } var _ run.Checkpointer = (*handle)(nil) // New returns a run.Checkpointer that persists snapshots of the run identified // by meta.RunID to store, no more often than throttle (Save calls inside the // window are skipped). A nil store yields a no-op Checkpointer. throttle <= 0 // saves every call; now defaults to time.Now. func New(store CheckpointStore, meta RunCheckpointMeta, throttle time.Duration, now func() time.Time) run.Checkpointer { if store == nil { return noop{} } if now == nil { now = time.Now } return &handle{store: store, meta: meta, throttle: throttle, now: now} } func (h *handle) Save(ctx context.Context, st run.RunCheckpointState) error { h.mu.Lock() now := h.now() if h.throttle > 0 && !h.lastSave.IsZero() && now.Sub(h.lastSave) < h.throttle { h.mu.Unlock() return nil // throttled — a more recent snapshot will land shortly } h.mu.Unlock() // Advance the throttle clock only AFTER a successful persist. If the store // write fails, lastSave stays put so the next Save isn't throttled away — // otherwise a transient store error would silently drop the snapshot the // caller believes was saved. (A run drives one Save goroutine, so the brief // unguarded window here can't double-write.) if err := h.store.Save(ctx, RunCheckpoint{ Meta: h.meta, Messages: st.Messages, Iteration: st.Iteration, UpdatedAt: now, }); err != nil { return err } h.mu.Lock() if now.After(h.lastSave) { h.lastSave = now } h.mu.Unlock() return nil } func (h *handle) Complete(ctx context.Context) error { return h.store.Delete(ctx, h.meta.RunID) } func (h *handle) Fail(ctx context.Context, _ error) error { return h.store.Delete(ctx, h.meta.RunID) } // noop is the nil-store Checkpointer: every method is a successful no-op. type noop struct{} var _ run.Checkpointer = noop{} func (noop) Save(context.Context, run.RunCheckpointState) error { return nil } func (noop) Complete(context.Context) error { return nil } func (noop) Fail(context.Context, error) error { return nil }