30b79a330f
The kernel carried RunnableAgent.Phases as a DTO but never executed it —
Run always ran a single agent loop with ra.SystemPrompt, so a phased agent
(mort's deepresearch/research) silently ran one loop with the base prompt
instead of its pipeline. This implements the phase loop, ported from mort's
agentexec pipeline but reusing the kernel's own machinery.
- run/phases.go: runPhases / runOnePhase. Phases run sequentially; each is a
fresh agent loop (or a bare LLM call for IsRunFunc phases) with its own
template-expanded system prompt ({{.Query}} + {{.<PhaseName>}}), model
tier, step cap, and tool subset. Outputs thread into later phases; the
final phase's output is the run output. Optional phases swallow errors and
substitute FallbackMessage; a non-optional phase that merely exhausts its
step/tool budget salvages its partial transcript and continues (a hard
error still aborts); per-phase tier-resolve failures fall back with a WARN.
- run/agent.go: Phase gains IsRunFunc + FallbackMessage (the kernel Phase
struct previously omitted them).
- run/executor.go: Run factors the shared agent options (tool-error limits,
step observer, compactor) and branches — single loop (critic's dynamic
step ceiling) vs the phase runner (fixed per-phase caps; the run-level
critic's steer + hard deadline still apply across phases). systemPrompt
now delegates to systemPromptWithBody so each phase keeps the platform
header. The same step observer feeds audit/steps/critic across all phases.
Tests (run/phases_test.go): sequential output threading + template
expansion, Optional-failure → FallbackMessage continues, hard-error abort,
IsRunFunc bare call, per-phase SystemHeader, filterToolbox subset, template
expansion. Full ./... suite green.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
234 lines
8.0 KiB
Go
234 lines
8.0 KiB
Go
package run
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"strings"
|
|
"testing"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/executus/tool"
|
|
)
|
|
|
|
// phaseProvider builds a fake provider scripted with the given per-call steps
|
|
// (consumed in order across every phase's model call) and a resolver over it,
|
|
// returning both so a test can read back each call's request.
|
|
func phaseProvider(t *testing.T, steps ...fake.Step) (ModelResolver, *fake.Provider) {
|
|
t.Helper()
|
|
fp := fake.New("fake")
|
|
fp.Enqueue("test-model", steps...)
|
|
m, err := fp.Model("test-model")
|
|
if err != nil {
|
|
t.Fatalf("fake model: %v", err)
|
|
}
|
|
return func(ctx context.Context, _ string) (context.Context, llm.Model, error) {
|
|
return ctx, m, nil
|
|
}, fp
|
|
}
|
|
|
|
// TestPhases_SequentialThreadsOutputs: phases run in order, each phase's output
|
|
// is threaded into the next via {{.<PhaseName>}}, {{.Query}} reaches a phase, and
|
|
// the final phase's output is the run output.
|
|
func TestPhases_SequentialThreadsOutputs(t *testing.T) {
|
|
models, fp := phaseProvider(t,
|
|
fake.Reply("out-a"),
|
|
fake.Reply("out-b"),
|
|
fake.Reply("out-c"),
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "a", SystemPrompt: "Phase A instructions"},
|
|
{Name: "b", SystemPrompt: "B saw: {{.a}}"},
|
|
{Name: "c", SystemPrompt: "C saw: {{.b}} and query {{.Query}}"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "QUERY-TEXT")
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
if res.Output != "out-c" {
|
|
t.Fatalf("final output = %q, want the LAST phase's output out-c", res.Output)
|
|
}
|
|
calls := fp.Calls()
|
|
if len(calls) != 3 {
|
|
t.Fatalf("want 3 model calls (one per phase), got %d", len(calls))
|
|
}
|
|
if got := calls[0].Request.System; got != "Phase A instructions" {
|
|
t.Errorf("phase a system = %q", got)
|
|
}
|
|
if got := calls[1].Request.System; got != "B saw: out-a" {
|
|
t.Errorf("phase b should see phase a's output threaded; system = %q", got)
|
|
}
|
|
if got := calls[2].Request.System; got != "C saw: out-b and query QUERY-TEXT" {
|
|
t.Errorf("phase c should see phase b's output + {{.Query}}; system = %q", got)
|
|
}
|
|
}
|
|
|
|
// TestPhases_OptionalFailureSubstitutesFallback: an Optional phase that errors
|
|
// does not abort the pipeline — its FallbackMessage becomes its output and is
|
|
// threaded into later phases, which still run.
|
|
func TestPhases_OptionalFailureSubstitutesFallback(t *testing.T) {
|
|
models, fp := phaseProvider(t,
|
|
fake.Fail(errors.New("provider exploded")), // phase a fails
|
|
fake.Reply("out-b"), // phase b runs
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "a", SystemPrompt: "Phase A", Optional: true, FallbackMessage: "FALLBACK-A"},
|
|
{Name: "b", SystemPrompt: "B saw: {{.a}}"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "Q")
|
|
if res.Err != nil {
|
|
t.Fatalf("optional-phase failure must not fail the run: %v", res.Err)
|
|
}
|
|
if res.Output != "out-b" {
|
|
t.Fatalf("final output = %q, want out-b", res.Output)
|
|
}
|
|
calls := fp.Calls()
|
|
if len(calls) != 2 {
|
|
t.Fatalf("want 2 calls (failed phase a + phase b), got %d", len(calls))
|
|
}
|
|
if got := calls[1].Request.System; got != "B saw: FALLBACK-A" {
|
|
t.Errorf("phase b should see the fallback threaded; system = %q", got)
|
|
}
|
|
}
|
|
|
|
// TestPhases_HardErrorAborts: a NON-optional phase that hits a hard error (not a
|
|
// budget/step exhaustion) aborts the pipeline; later phases do not run.
|
|
func TestPhases_HardErrorAborts(t *testing.T) {
|
|
boom := errors.New("model down")
|
|
models, fp := phaseProvider(t,
|
|
fake.Fail(boom), // phase a (non-optional) fails hard
|
|
fake.Reply("out-b"), // must NOT be consumed
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "a", SystemPrompt: "Phase A"},
|
|
{Name: "b", SystemPrompt: "Phase B"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "Q")
|
|
if res.Err == nil {
|
|
t.Fatal("a hard non-optional phase error must fail the run")
|
|
}
|
|
if !errors.Is(res.Err, boom) {
|
|
t.Errorf("run error %v should wrap the phase's model error", res.Err)
|
|
}
|
|
if n := len(fp.Calls()); n != 1 {
|
|
t.Errorf("pipeline must abort after phase a; got %d calls (phase b should not run)", n)
|
|
}
|
|
}
|
|
|
|
// TestPhases_IsRunFuncBareCall: an IsRunFunc phase produces output via a bare LLM
|
|
// call and that output threads into a following loop phase.
|
|
func TestPhases_IsRunFuncBareCall(t *testing.T) {
|
|
models, fp := phaseProvider(t,
|
|
fake.Reply("plan-output"), // IsRunFunc phase a
|
|
fake.Reply("final"), // loop phase b
|
|
)
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "pipeline",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{
|
|
{Name: "plan", SystemPrompt: "Make a plan for {{.Query}}", IsRunFunc: true},
|
|
{Name: "exec", SystemPrompt: "Execute: {{.plan}}"},
|
|
},
|
|
}
|
|
res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r", CallerID: "c"}, "do-thing")
|
|
if res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
if res.Output != "final" {
|
|
t.Fatalf("output = %q, want final", res.Output)
|
|
}
|
|
calls := fp.Calls()
|
|
if len(calls) != 2 {
|
|
t.Fatalf("want 2 calls, got %d", len(calls))
|
|
}
|
|
if got := calls[0].Request.System; got != "Make a plan for do-thing" {
|
|
t.Errorf("IsRunFunc phase system = %q", got)
|
|
}
|
|
if got := calls[1].Request.System; got != "Execute: plan-output" {
|
|
t.Errorf("exec phase should see the plan output threaded; system = %q", got)
|
|
}
|
|
}
|
|
|
|
// TestPhases_SystemHeaderAppliedPerPhase: the platform SystemHeader is prepended
|
|
// to every phase's prompt (each phase keeps it).
|
|
func TestPhases_SystemHeaderAppliedPerPhase(t *testing.T) {
|
|
models, fp := phaseProvider(t, fake.Reply("a"), fake.Reply("b"))
|
|
ex := New(Config{Registry: tool.NewRegistry(), Models: models, SystemHeader: "PLATFORM"})
|
|
|
|
ra := RunnableAgent{
|
|
Name: "p",
|
|
ModelTier: "test-model",
|
|
Phases: []Phase{{Name: "one", SystemPrompt: "P1"}, {Name: "two", SystemPrompt: "P2"}},
|
|
}
|
|
if res := ex.Run(context.Background(), ra, tool.Invocation{RunID: "r"}, "Q"); res.Err != nil {
|
|
t.Fatalf("run error: %v", res.Err)
|
|
}
|
|
for i, want := range []string{"PLATFORM\n\nP1", "PLATFORM\n\nP2"} {
|
|
if got := fp.Calls()[i].Request.System; got != want {
|
|
t.Errorf("phase %d system = %q, want %q", i, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestFilterToolbox: a named subset restricts the toolbox (preserving order);
|
|
// empty names = the full palette; unknown names are skipped.
|
|
func TestFilterToolbox(t *testing.T) {
|
|
box := llm.NewToolbox("base")
|
|
noop := func(context.Context, json.RawMessage) (any, error) { return "", nil }
|
|
for _, name := range []string{"alpha", "beta", "gamma"} {
|
|
if err := box.Add(llm.Tool{Name: name, Description: "d", Handler: noop}); err != nil {
|
|
t.Fatalf("add %s: %v", name, err)
|
|
}
|
|
}
|
|
|
|
full := filterToolbox(box, nil)
|
|
if len(full.Tools()) != 3 {
|
|
t.Errorf("nil names = full palette; got %d tools", len(full.Tools()))
|
|
}
|
|
|
|
sub := filterToolbox(box, []string{"gamma", "alpha", "nonexistent"})
|
|
names := make([]string, 0)
|
|
for _, tl := range sub.Tools() {
|
|
names = append(names, tl.Name)
|
|
}
|
|
if strings.Join(names, ",") != "gamma,alpha" {
|
|
t.Errorf("subset (order-preserving, unknown skipped) = %v, want [gamma alpha]", names)
|
|
}
|
|
}
|
|
|
|
// TestExpandPhaseTemplate: {{.Query}} + prior outputs substitute; a parse error
|
|
// returns the template unchanged (best-effort).
|
|
func TestExpandPhaseTemplate(t *testing.T) {
|
|
got := expandPhaseTemplate("q={{.Query}} a={{.a}}", "QQ", map[string]string{"a": "AA"})
|
|
if got != "q=QQ a=AA" {
|
|
t.Errorf("expand = %q", got)
|
|
}
|
|
// Malformed template → returned unchanged.
|
|
bad := "{{.Unclosed"
|
|
if expandPhaseTemplate(bad, "QQ", nil) != bad {
|
|
t.Errorf("malformed template should pass through unchanged")
|
|
}
|
|
}
|