executus/run/checkpoint_test.go

package run

import (
	"context"
	"errors"
	"testing"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"

	"gitea.stevedudenhoeffer.com/steve/executus/tool"
)

// fakeCheckpointer records every Save state + whether Complete/Fail fired.
type fakeCheckpointer struct {
	saves     []RunCheckpointState
	completed bool
	failed    bool
	failErr   error
}

func (c *fakeCheckpointer) Save(_ context.Context, st RunCheckpointState) error {
	c.saves = append(c.saves, st)
	return nil
}
func (c *fakeCheckpointer) Complete(context.Context) error { c.completed = true; return nil }
func (c *fakeCheckpointer) Fail(_ context.Context, err error) error {
	c.failed = true
	c.failErr = err
	return nil
}

// fakeCheckpointFactory hands out one fakeCheckpointer and records the RunInfo.
type fakeCheckpointFactory struct {
	cp   *fakeCheckpointer
	info RunInfo
}

func (f *fakeCheckpointFactory) Begin(_ context.Context, info RunInfo) (Checkpointer, error) {
	f.info = info
	return f.cp, nil
}

// TestClassifyCheckpointOutcome covers the finalize decision matrix.
func TestClassifyCheckpointOutcome(t *testing.T) {
	cases := []struct {
		name  string
		err   error
		cause error
		want  checkpointOutcome
	}{
		{"success", nil, nil, checkpointComplete},
		{"shutdown", context.Canceled, ErrShutdown, checkpointLeaveRunning},
		{"critic-kill", context.Canceled, ErrCriticKill, checkpointFail},
		{"deadline", context.DeadlineExceeded, context.DeadlineExceeded, checkpointFail},
		{"model-error", errors.New("boom"), nil, checkpointFail},
		{"caller-cancel", context.Canceled, context.Canceled, checkpointFail},
	}
	for _, tc := range cases {
		if got := classifyCheckpointOutcome(tc.err, tc.cause); got != tc.want {
			t.Errorf("%s: classifyCheckpointOutcome = %v, want %v", tc.name, got, tc.want)
		}
	}
}

// TestCheckpoint_SingleLoopSaveAndComplete: a durable single-loop run gets a
// per-run checkpointer (Begin), Saves its transcript each step, and Completes on
// success (clearing the checkpoint). The RunInfo carries the resume meta.
func TestCheckpoint_SingleLoopSaveAndComplete(t *testing.T) {
	models, _ := phaseProvider(t, fake.Reply("done"))
	cp := &fakeCheckpointer{}
	f := &fakeCheckpointFactory{cp: cp}
	ex := New(Config{Registry: tool.NewRegistry(), Models: models, Ports: Ports{Checkpointer: f}})

	res := ex.Run(context.Background(),
		RunnableAgent{ID: "a1", Name: "boss", ModelTier: "test-model"},
		tool.Invocation{RunID: "run-x", CallerID: "steve", ChannelID: "chan", GuildID: "g", SkillInputs: map[string]any{"prompt": "go"}},
		"go")
	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	if f.info.RunID != "run-x" || f.info.SubjectID != "a1" || f.info.ModelTier != "test-model" || f.info.GuildID != "g" {
		t.Errorf("Begin RunInfo missing resume meta: %+v", f.info)
	}
	if len(cp.saves) == 0 {
		t.Error("expected at least one checkpoint Save during the run")
	} else if len(cp.saves[len(cp.saves)-1].Messages) == 0 {
		t.Error("checkpoint Save should carry the running transcript")
	}
	if !cp.completed {
		t.Error("a successful run must Complete (clear) its checkpoint")
	}
	if cp.failed {
		t.Error("a successful run must NOT Fail its checkpoint")
	}
}

// TestCheckpoint_TerminalErrorFails: a run that errors (not shutdown) Fails its
// checkpoint (clears it — not a recovery candidate).
func TestCheckpoint_TerminalErrorFails(t *testing.T) {
	models, _ := phaseProvider(t, fake.Fail(errors.New("model down")))
	cp := &fakeCheckpointer{}
	ex := New(Config{Registry: tool.NewRegistry(), Models: models, Ports: Ports{Checkpointer: &fakeCheckpointFactory{cp: cp}}})

	res := ex.Run(context.Background(),
		RunnableAgent{ID: "a1", ModelTier: "test-model"},
		tool.Invocation{RunID: "r", CallerID: "c", SkillInputs: map[string]any{"prompt": "go"}}, "go")
	if res.Err == nil {
		t.Fatal("expected a run error")
	}
	if !cp.failed {
		t.Error("a terminal (non-shutdown) error must Fail the checkpoint")
	}
	if cp.completed {
		t.Error("a failed run must NOT Complete its checkpoint")
	}
}

// TestCheckpoint_ResumeSeedsHistory: a run carrying a ResumeState seeds the saved
// transcript as the model's opening messages (continues) instead of the input.
func TestCheckpoint_ResumeSeedsHistory(t *testing.T) {
	models, fp := phaseProvider(t, fake.Reply("continued"))
	history := []llm.Message{llm.UserText("prior turn 1"), llm.AssistantText("prior answer 1")}
	ctx := WithResumeState(context.Background(), &ResumeState{History: history})

	ex := New(Config{Registry: tool.NewRegistry(), Models: models})
	res := ex.Run(ctx,
		RunnableAgent{ID: "a1", ModelTier: "test-model"},
		tool.Invocation{RunID: "r", CallerID: "c", SkillInputs: map[string]any{"prompt": "ignored-on-resume"}}, "ignored-on-resume")
	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	got := fp.Calls()[0].Request.Messages
	if len(got) != len(history) {
		t.Fatalf("resume should seed the saved %d-message transcript, got %d messages", len(history), len(got))
	}
}

// TestCheckpoint_PhaseBoundarySavesCompleted: a durable multi-phase run records
// the completed phases at each boundary, growing the list, and Completes on
// success.
func TestCheckpoint_PhaseBoundarySavesCompleted(t *testing.T) {
	models, _ := phaseProvider(t, fake.Reply("out-a"), fake.Reply("out-b"))
	cp := &fakeCheckpointer{}
	ex := New(Config{Registry: tool.NewRegistry(), Models: models, Ports: Ports{Checkpointer: &fakeCheckpointFactory{cp: cp}}})

	ra := RunnableAgent{
		ID: "p", ModelTier: "test-model",
		Phases: []Phase{{Name: "a", SystemPrompt: "A"}, {Name: "b", SystemPrompt: "B"}},
	}
	if res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "Q"); res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	// The final phase-boundary Save must list both completed phases.
	var lastPhaseSave *RunCheckpointState
	for i := range cp.saves {
		if len(cp.saves[i].CompletedPhases) > 0 {
			lastPhaseSave = &cp.saves[i]
		}
	}
	if lastPhaseSave == nil || len(lastPhaseSave.CompletedPhases) != 2 {
		t.Fatalf("expected a phase-boundary Save listing 2 completed phases; saves=%+v", cp.saves)
	}
	if !cp.completed {
		t.Error("a successful phased run must Complete its checkpoint")
	}
}

// TestCheckpoint_ResumeSkipsCompletedPhases: a resumed multi-phase run skips
// phases already in ResumeState.CompletedPhases (only the remaining phase calls
// the model) and threads their outputs into the remaining phase's template.
func TestCheckpoint_ResumeSkipsCompletedPhases(t *testing.T) {
	models, fp := phaseProvider(t, fake.Reply("out-b")) // ONLY phase b should call the model
	ctx := WithResumeState(context.Background(), &ResumeState{
		CompletedPhases: []PhaseOutput{{Name: "a", Output: "saved-a"}},
	})
	ex := New(Config{Registry: tool.NewRegistry(), Models: models})

	ra := RunnableAgent{
		ID: "p", ModelTier: "test-model",
		Phases: []Phase{
			{Name: "a", SystemPrompt: "A"},
			{Name: "b", SystemPrompt: "B saw {{.a}}"},
		},
	}
	res := ex.Run(ctx, ra, tool.Invocation{RunID: "r", CallerID: "c"}, "Q")
	if res.Err != nil {
		t.Fatalf("run error: %v", res.Err)
	}
	if res.Output != "out-b" {
		t.Fatalf("output = %q, want out-b", res.Output)
	}
	calls := fp.Calls()
	if len(calls) != 1 {
		t.Fatalf("only the un-completed phase b should call the model; got %d calls", len(calls))
	}
	if calls[0].Request.System != "B saw saved-a" {
		t.Errorf("resumed phase b should see the completed phase a's saved output; system = %q", calls[0].Request.System)
	}
}