package run_test import ( "context" "sync" "testing" "time" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" "gitea.stevedudenhoeffer.com/steve/executus/run" "gitea.stevedudenhoeffer.com/steve/executus/tool" ) // slowToolInvocation builds an Invocation whose session factory adds a "slow" // tool that sleeps for d (respecting ctx). The model script calls it once, then // answers — so the run's wall-clock is dominated by d, letting a test set a tiny // MaxRuntime and observe whether MaxRuntime hard-cancels the run. func slowToolInvocation(runID string, d time.Duration) tool.Invocation { slow := llm.DefineTool("slow", "sleeps for a while", func(ctx context.Context, _ struct{}) (any, error) { select { case <-time.After(d): return "ok", nil case <-ctx.Done(): return nil, ctx.Err() } }) return tool.Invocation{ RunID: runID, SessionToolFactory: func(_ tool.AgentSession) tool.SessionTools { return tool.SessionTools{Tools: []llm.Tool{slow}} }, } } func slowModel() llm.Model { fp := fake.New("fake") fp.Enqueue("m", fake.ReplyWith(llm.Response{ToolCalls: []llm.ToolCall{{ID: "c1", Name: "slow", Arguments: []byte(`{}`)}}}), fake.Reply("done"), ) m, _ := fp.Model("m") return m } // TestNoCritic_MaxRuntimeIsHardCap: the legacy contract is preserved — without a // critic, MaxRuntime is a literal WithTimeout that kills a run whose work outlasts // it. The slow tool (200ms) outlasts MaxRuntime (20ms), so runCtx cancels mid-tool // and the run ends in error (timeout). func TestNoCritic_MaxRuntimeIsHardCap(t *testing.T) { m := slowModel() ex := run.New(run.Config{ Registry: tool.NewRegistry(), Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil }, }) res := ex.Run(context.Background(), run.RunnableAgent{Name: "x", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond}, slowToolInvocation("r", 200*time.Millisecond), "go") if res.Err == nil { t.Fatalf("non-critic run should hard-timeout at MaxRuntime; got output=%q err=nil", res.Output) } } // TestCriticOwnsDeadline_SurvivesPastMaxRuntime: the fix — when the critic owns the // deadline (Ports.Critic set + Critic.Enabled), MaxRuntime becomes the SOFT trigger // and is NOT a hard cap. The fake critic exposes no hard deadline (Deadline()==zero, // no kill), so the only hard ceiling is CriticAbsoluteMax (10s here). The slow tool // (200ms) outlasts the tiny MaxRuntime (20ms) but the run completes — proving the // old agentexec two-tier semantics are restored. func TestCriticOwnsDeadline_SurvivesPastMaxRuntime(t *testing.T) { m := slowModel() h := &fakeCriticHandle{} // Deadline()==zero → no hard deadline, no kill ex := run.New(run.Config{ Registry: tool.NewRegistry(), Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil }, Ports: run.Ports{Critic: &fakeCritic{h: h}}, Defaults: run.Defaults{CriticAbsoluteMax: 10 * time.Second}, }) res := ex.Run(context.Background(), run.RunnableAgent{Name: "watched", ModelTier: "m", MaxIterations: 5, MaxRuntime: 20 * time.Millisecond, Critic: run.CriticConfig{Enabled: true}}, slowToolInvocation("r", 200*time.Millisecond), "go") if res.Err != nil { t.Fatalf("critic-owned run must survive past MaxRuntime (soft trigger); got err=%v", res.Err) } if res.Output != "done" { t.Errorf("output = %q, want %q", res.Output, "done") } } // capturingCritic records the soft trigger the executor passes to Monitor. type capturingCritic struct { mu sync.Mutex soft time.Duration h run.CriticHandle } func (c *capturingCritic) Monitor(_ context.Context, _ run.RunInfo, soft time.Duration) run.CriticHandle { c.mu.Lock() c.soft = soft c.mu.Unlock() return c.h } // TestCriticSoftTriggerIsMaxRuntime: the soft trigger handed to the host critic is // the run's resolved MaxRuntime (mort's two-tier model — the critic first wakes once // the run exceeds its nominal budget), NOT the global Defaults.CriticSoftTimeout. func TestCriticSoftTriggerIsMaxRuntime(t *testing.T) { fp := fake.New("fake") fp.Enqueue("m", fake.Reply("done")) m, _ := fp.Model("m") cc := &capturingCritic{h: &fakeCriticHandle{}} ex := run.New(run.Config{ Registry: tool.NewRegistry(), Models: func(ctx context.Context, _ string) (context.Context, llm.Model, error) { return ctx, m, nil }, Ports: run.Ports{Critic: cc}, Defaults: run.Defaults{CriticSoftTimeout: 90 * time.Second}, // distinct from MaxRuntime below }) const wantSoft = 7 * time.Minute ex.Run(context.Background(), run.RunnableAgent{Name: "x", ModelTier: "m", MaxRuntime: wantSoft, Critic: run.CriticConfig{Enabled: true}}, tool.Invocation{RunID: "r"}, "go") cc.mu.Lock() got := cc.soft cc.mu.Unlock() if got != wantSoft { t.Errorf("soft trigger = %v, want the agent's MaxRuntime %v (not Defaults.CriticSoftTimeout)", got, wantSoft) } }