0dd2ced717
executus CI / test (pull_request) Successful in 48s
Real findings from the consensus review (37 raw; many devstral dups/noise): - Optional/budget-salvage branches no longer swallow a context cancellation / deadline / critic-kill: such errors return immediately so the run is classified cancelled/timeout/killed, not "ok" with a fallback. (the most serious finding — an Optional final phase could mask a killed run) - IsRunFunc bare phase now feeds the SHARED step observer (not just the audit recorder), so the critic's activity clock + Result.Steps see it — a long synthesize phase no longer looks idle to the critic. - phaseModel returns the resolver's enriched (usage-attribution) context and the phase's calls use it, mirroring the single-loop path (non-base-tier phases were mis-attributed). - salvagePhaseTranscript trims the tail on a rune boundary (was a raw byte slice that could split a UTF-8 rune); maxSalvage is now a named const with rationale. - expandPhaseTemplate logs a WARN on parse/execute failure instead of silently returning the unexpanded template; documented the phase-name identifier requirement + the "Query" shadow. - removed the dead phaseDeps.baseTier field. - extracted multimodalUserMessage, shared by runAgent + the phase runner (was duplicated image-folding). - aggregated phase usage is stamped onto the result even on a hard-error return; TrimSpace computed once; filterToolbox returns the base toolbox as-is for the empty-names (full-palette) case instead of copying; phaseModel WARN no longer prints error=<nil>. New test: Optional phase does not swallow a cancellation. Full ./... green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
259 lines
9.1 KiB
Go
259 lines
9.1 KiB
Go
package run
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"strings"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
|
)
|
|
|
|
// phaseProvider builds a fake provider scripted with the given per-call steps
|
|
// (consumed in order across every phase's model call) and a resolver over it,
|
|
// returning both so a test can read back each call's request.
|
|
func phaseProvider(t *testing.T, steps ...fake.Step) (ModelResolver, *fake.Provider) {
|
|
t.Helper()
|
|
fp := fake.New("fake")
|
|
fp.Enqueue("test-model", steps...)
|
|
m, err := fp.Model("test-model")
|
|
if err != nil {
|
|
t.Fatalf("fake model: %v", err)
|
|
}
|
|
return func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
|
return ctx, m, nil
|
|
}, fp
|
|
}
|
|
|
|
// TestPhases_SequentialThreadsOutputs: phases run in order, each phase's output
|
|
// is threaded into the next via {{.<PhaseName>}}, {{.Query}} reaches a phase, and
|
|
// the final phase's output is the run output.
|
|
func TestPhases_SequentialThreadsOutputs(t *testing.T) {
|
|
models, fp := phaseProvider(t,
|
|
fake.Reply("out-a"),
|
|
fake.Reply("out-b"),
|
|
fake.Reply("out-c"),
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "a", SystemPrompt: "Phase A instructions"},
|
|
{Name: "b", SystemPrompt: "B saw: {{.a}}"},
|
|
{Name: "c", SystemPrompt: "C saw: {{.b}} and query {{.Query}}"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "QUERY-TEXT")
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
if res.Output != "out-c" {
|
|
t.Fatalf("final output = %q, want the LAST phase's output out-c", res.Output)
|
|
}
|
|
calls := fp.Calls()
|
|
if len(calls) != 3 {
|
|
t.Fatalf("want 3 model calls (one per phase), got %d", len(calls))
|
|
}
|
|
if got := calls[0].Request.System; got != "Phase A instructions" {
|
|
t.Errorf("phase a system = %q", got)
|
|
}
|
|
if got := calls[1].Request.System; got != "B saw: out-a" {
|
|
t.Errorf("phase b should see phase a's output threaded; system = %q", got)
|
|
}
|
|
if got := calls[2].Request.System; got != "C saw: out-b and query QUERY-TEXT" {
|
|
t.Errorf("phase c should see phase b's output + {{.Query}}; system = %q", got)
|
|
}
|
|
}
|
|
|
|
// TestPhases_OptionalFailureSubstitutesFallback: an Optional phase that errors
|
|
// does not abort the pipeline — its FallbackMessage becomes its output and is
|
|
// threaded into later phases, which still run.
|
|
func TestPhases_OptionalFailureSubstitutesFallback(t *testing.T) {
|
|
models, fp := phaseProvider(t,
|
|
fake.Fail(errors.New("provider exploded")), // phase a fails
|
|
fake.Reply("out-b"), // phase b runs
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "a", SystemPrompt: "Phase A", Optional: true, FallbackMessage: "FALLBACK-A"},
|
|
{Name: "b", SystemPrompt: "B saw: {{.a}}"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "Q")
|
|
if res.Err != nil {
|
|
t.Fatalf("optional-phase failure must not fail the run: %v", res.Err)
|
|
}
|
|
if res.Output != "out-b" {
|
|
t.Fatalf("final output = %q, want out-b", res.Output)
|
|
}
|
|
calls := fp.Calls()
|
|
if len(calls) != 2 {
|
|
t.Fatalf("want 2 calls (failed phase a + phase b), got %d", len(calls))
|
|
}
|
|
if got := calls[1].Request.System; got != "B saw: FALLBACK-A" {
|
|
t.Errorf("phase b should see the fallback threaded; system = %q", got)
|
|
}
|
|
}
|
|
|
|
// TestPhases_OptionalDoesNotSwallowCancellation: an Optional phase that fails
|
|
// with a context cancellation must NOT be swallowed into its FallbackMessage —
|
|
// the run genuinely ended (cancel/deadline/critic-kill) and must surface the
|
|
// error so the run is classified cancelled/timeout/killed, not "ok".
|
|
func TestPhases_OptionalDoesNotSwallowCancellation(t *testing.T) {
|
|
models, _ := phaseProvider(t, fake.Fail(context.Canceled))
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
// IsRunFunc so the cancellation surfaces directly wrapped (%w).
|
|
{Name: "a", SystemPrompt: "Phase A", IsRunFunc: true, Optional: true, FallbackMessage: "FB"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "Q")
|
|
if !errors.Is(res.Err, context.Canceled) {
|
|
t.Fatalf("Optional phase must NOT swallow a cancellation; res.Err = %v", res.Err)
|
|
}
|
|
if res.Output == "FB" {
|
|
t.Error("a cancelled run must not report the fallback message as output")
|
|
}
|
|
}
|
|
|
|
// TestPhases_HardErrorAborts: a NON-optional phase that hits a hard error (not a
|
|
// budget/step exhaustion) aborts the pipeline; later phases do not run.
|
|
func TestPhases_HardErrorAborts(t *testing.T) {
|
|
boom := errors.New("model down")
|
|
models, fp := phaseProvider(t,
|
|
fake.Fail(boom), // phase a (non-optional) fails hard
|
|
fake.Reply("out-b"), // must NOT be consumed
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "a", SystemPrompt: "Phase A"},
|
|
{Name: "b", SystemPrompt: "Phase B"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "Q")
|
|
if res.Err == nil {
|
|
t.Fatal("a hard non-optional phase error must fail the run")
|
|
}
|
|
if !errors.Is(res.Err, boom) {
|
|
t.Errorf("run error %v should wrap the phase's model error", res.Err)
|
|
}
|
|
if n := len(fp.Calls()); n != 1 {
|
|
t.Errorf("pipeline must abort after phase a; got %d calls (phase b should not run)", n)
|
|
}
|
|
}
|
|
|
|
// TestPhases_IsRunFuncBareCall: an IsRunFunc phase produces output via a bare LLM
|
|
// call and that output threads into a following loop phase.
|
|
func TestPhases_IsRunFuncBareCall(t *testing.T) {
|
|
models, fp := phaseProvider(t,
|
|
fake.Reply("plan-output"), // IsRunFunc phase a
|
|
fake.Reply("final"), // loop phase b
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "plan", SystemPrompt: "Make a plan for {{.Query}}", IsRunFunc: true},
|
|
{Name: "exec", SystemPrompt: "Execute: {{.plan}}"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "do-thing")
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
if res.Output != "final" {
|
|
t.Fatalf("output = %q, want final", res.Output)
|
|
}
|
|
calls := fp.Calls()
|
|
if len(calls) != 2 {
|
|
t.Fatalf("want 2 calls, got %d", len(calls))
|
|
}
|
|
if got := calls[0].Request.System; got != "Make a plan for do-thing" {
|
|
t.Errorf("IsRunFunc phase system = %q", got)
|
|
}
|
|
if got := calls[1].Request.System; got != "Execute: plan-output" {
|
|
t.Errorf("exec phase should see the plan output threaded; system = %q", got)
|
|
}
|
|
}
|
|
|
|
// TestPhases_SystemHeaderAppliedPerPhase: the platform SystemHeader is prepended
|
|
// to every phase's prompt (each phase keeps it).
|
|
func TestPhases_SystemHeaderAppliedPerPhase(t *testing.T) {
|
|
models, fp := phaseProvider(t, fake.Reply("a"), fake.Reply("b"))
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models, SystemHeader: "PLATFORM"})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "p",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{{Name: "one", SystemPrompt: "P1"}, {Name: "two", SystemPrompt: "P2"}},
|
|
}
|
|
if res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r"}, "Q"); res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
for i, want := range []string{"PLATFORM\n\nP1", "PLATFORM\n\nP2"} {
|
|
if got := fp.Calls()[i].Request.System; got != want {
|
|
t.Errorf("phase %d system = %q, want %q", i, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestFilterToolbox: a named subset restricts the toolbox (preserving order);
|
|
// empty names = the full palette; unknown names are skipped.
|
|
func TestFilterToolbox(t *testing.T) {
|
|
box := llm.NewToolbox("base")
|
|
noop := func(context.Context, json.RawMessage) (any, error) { return "", nil }
|
|
for _, name := range []string{"alpha", "beta", "gamma"} {
|
|
if err := box.Add(llm.Tool{Name: name, Description: "d", Handler: noop}); err != nil {
|
|
t.Fatalf("add %s: %v", name, err)
|
|
}
|
|
}
|
|
|
|
full := filterToolbox(box, nil)
|
|
if len(full.Tools()) != 3 {
|
|
t.Errorf("nil names = full palette; got %d tools", len(full.Tools()))
|
|
}
|
|
|
|
sub := filterToolbox(box, []string{"gamma", "alpha", "nonexistent"})
|
|
names := make([]string, 0)
|
|
for _, tl := range sub.Tools() {
|
|
names = append(names, tl.Name)
|
|
}
|
|
if strings.Join(names, ",") != "gamma,alpha" {
|
|
t.Errorf("subset (order-preserving, unknown skipped) = %v, want [gamma alpha]", names)
|
|
}
|
|
}
|
|
|
|
// TestExpandPhaseTemplate: {{.Query}} + prior outputs substitute; a parse error
|
|
// returns the template unchanged (best-effort).
|
|
func TestExpandPhaseTemplate(t *testing.T) {
|
|
got := expandPhaseTemplate("q={{.Query}} a={{.a}}", "QQ", map[string]string{"a": "AA"})
|
|
if got != "q=QQ a=AA" {
|
|
t.Errorf("expand = %q", got)
|
|
}
|
|
// Malformed template → returned unchanged.
|
|
bad := "{{.Unclosed"
|
|
if expandPhaseTemplate(bad, "QQ", nil) != bad {
|
|
t.Errorf("malformed template should pass through unchanged")
|
|
}
|
|
}
|