From 7dab4112ffcd6fb3f044cc77b268f268ce0a3408 Mon Sep 17 00:00:00 2001
From: Steve Dudenhoeffer <steve@stevedudenhoeffer.com>
Date: Wed, 10 Jun 2026 13:10:18 +0200
Subject: [PATCH] feat: agent run loop, Generate[T], reflect-derived schemas

Phase 5:
- agent/: model + system prompt + toolboxes composition; bounded
  tool-dispatch loop (default 10 steps); panic-proof tool execution;
  unknown-tool and duplicate-name handling; history continuation; step
  observers; partial results on ErrMaxSteps/errors (ADR-0012)
- llm.SchemaFor[T]: strict-compatible JSON schemas from Go types
  (nullable pointers, description/enum tags, recursion rejected)
- majordomo.Generate[T]: typed structured output with fence-stripping
  decode and model-naming errors
- README agents/structured-output sections + matrix synced

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 README.md                   |  47 ++++-
 agent/agent.go              | 294 +++++++++++++++++++++++++++++++
 agent/agent_test.go         | 339 ++++++++++++++++++++++++++++++++++++
 docs/adr/0012-agent-loop.md |  53 ++++++
 docs/adr/README.md          |   1 +
 generate.go                 |  71 ++++++++
 generate_test.go            |  89 ++++++++++
 llm/jsonschema.go           | 166 ++++++++++++++++++
 llm/jsonschema_test.go      | 140 +++++++++++++++
 progress.md                 |  18 ++
 10 files changed, 1211 insertions(+), 7 deletions(-)
 create mode 100644 agent/agent.go
 create mode 100644 agent/agent_test.go
 create mode 100644 docs/adr/0012-agent-loop.md
 create mode 100644 generate.go
 create mode 100644 generate_test.go
 create mode 100644 llm/jsonschema.go
 create mode 100644 llm/jsonschema_test.go

diff --git a/README.md b/README.md
index fa776d8..ce5da73 100644
--- a/README.md
+++ b/README.md
@@ -200,13 +200,46 @@ resp, _ := m.Generate(ctx, req, majordomo.WithSchema(schemaJSON, "answer"))
 ```
 
 Maps to OpenAI `response_format: json_schema`, Anthropic
-`output_config.format`, and Ollama `format`. A generic `Generate[T]` helper
-(schema from your struct, unmarshal into it) lands with the agent phase.
+`output_config.format`, Ollama `format`, and Google `responseJsonSchema`.
 
-## Agents & skills *(pending — Phases 5–6)*
+The typed helper derives the schema from your struct (all fields required,
+`additionalProperties:false`, pointers nullable; `description:"..."` and
+`enum:"a,b,c"` tags supported) and unmarshals the result:
 
-Agents = model + system prompt + toolboxes, running a tool-dispatch loop;
-skills = reusable instruction+tool bundles attachable to any agent.
+```go
+type Verdict struct {
+    Guilty bool   `json:"guilty"`
+    Why    string `json:"why" description:"one-sentence rationale"`
+}
+v, err := majordomo.Generate[Verdict](ctx, m, req)
+```
+
+## Agents
+
+An agent is a model + system prompt + toolboxes, run as a tool-dispatch
+loop until the model answers (or `MaxSteps`):
+
+```go
+import "gitea.stevedudenhoeffer.com/steve/majordomo/agent"
+
+a := agent.New(m, "You are a research assistant.",
+    agent.WithToolbox(searchTools),
+    agent.WithMaxSteps(8),
+    agent.WithStepObserver(func(s agent.Step) { log.Printf("step %d", s.Index) }),
+)
+res, err := a.Run(ctx, "What changed in Go 1.26?")
+// res.Output, res.Steps, res.Usage; res.Messages round-trips via
+// agent.WithHistory for conversation continuation.
+```
+
+The loop never panics: tool handler errors and panics become error results
+the model can react to; unknown tools likewise; duplicate tool names across
+toolboxes fail loudly. On `agent.ErrMaxSteps` (and on model errors) the
+partial result with the full transcript is still returned.
+
+## Skills *(pending — Phase 6)*
+
+Skills = reusable instruction+tool bundles attachable to any agent.
 
 ## Feature/provider support matrix
 
@@ -229,8 +262,8 @@ Notes: Ollama has no native tool_choice — `"none"` drops the tools;
 
 Cross-cutting: Parse grammar ✅ · aliases/tiers ✅ · failover chains ✅ ·
 health tracking/backoff ✅ · LLM_* env DSNs ✅ · media pipeline ✅
-(per-target normalization in chains) · agent loop pending · skills pending
-· `Generate[T]` pending.
+(per-target normalization in chains) · agent loop ✅ · `Generate[T]` +
+schema derivation ✅ · skills pending.
 
 ## Development
 
diff --git a/agent/agent.go b/agent/agent.go
new file mode 100644
index 0000000..c2806ec
--- /dev/null
+++ b/agent/agent.go
@@ -0,0 +1,294 @@
+// Package agent runs LLM-backed agents: a Model, a system prompt, and one
+// or more toolboxes, executed as a tool-dispatch loop until the model
+// produces a final answer (or MaxSteps intervenes).
+//
+// The loop never panics: tool handlers run through the panic-recovering
+// executor in llm, unknown tools come back as error results the model can
+// react to, and step observers receive every intermediate step. Skills
+// (package skill) attach additively: their instructions extend the system
+// prompt and their tools extend the merged toolset.
+package agent
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// DefaultMaxSteps bounds the tool-dispatch loop when WithMaxSteps is not
+// given.
+const DefaultMaxSteps = 10
+
+// ErrMaxSteps reports that the loop hit its step budget before the model
+// produced a final answer. Run returns it alongside a non-nil *Result
+// carrying the transcript so far.
+var ErrMaxSteps = errors.New("agent: max steps reached without a final answer")
+
+// Skill is the contract skills satisfy (defined here so agent does not
+// depend on the skill package; package skill provides implementations).
+// Instructions are appended to the agent's system prompt; Tools (optional,
+// may be nil) extend the agent's toolset.
+type Skill interface {
+	Name() string
+	Instructions() string
+	Tools() *llm.Toolbox
+}
+
+// Step is one completed iteration of the loop: the model's response and,
+// when it requested tools, the results that were fed back.
+type Step struct {
+	// Index is the 0-based step number.
+	Index int
+	// Response is the model output for this step.
+	Response *llm.Response
+	// Results are the executed tool outcomes (empty on the final step).
+	Results []llm.ToolResult
+}
+
+// Result is the outcome of a Run.
+type Result struct {
+	// Output is the final assistant text.
+	Output string
+	// Messages is the full transcript: prior history, the input, and every
+	// assistant/tool turn. Feed it back via WithHistory to continue the
+	// conversation.
+	Messages []llm.Message
+	// Steps records each loop iteration.
+	Steps []Step
+	// Usage is the token total across all steps.
+	Usage llm.Usage
+}
+
+// Agent is a reusable model + system prompt + toolboxes (+ skills)
+// composition. Configure at construction; AddSkill/AddToolbox may extend
+// it later. Agents are safe to share across goroutines only after
+// configuration is complete.
+type Agent struct {
+	model     llm.Model
+	system    string
+	toolboxes []*llm.Toolbox
+	skills    []Skill
+	maxSteps  int
+	reqOpts   []llm.Option
+	observers []func(Step)
+}
+
+// Option configures an Agent at construction.
+type Option func(*Agent)
+
+// WithToolbox attaches a toolbox.
+func WithToolbox(b *llm.Toolbox) Option {
+	return func(a *Agent) { a.toolboxes = append(a.toolboxes, b) }
+}
+
+// WithTools attaches loose tools (wrapped in an anonymous toolbox).
+func WithTools(tools ...llm.Tool) Option {
+	return func(a *Agent) { a.toolboxes = append(a.toolboxes, llm.NewToolbox("", tools...)) }
+}
+
+// WithSkill attaches a skill at construction (see also AddSkill).
+func WithSkill(s Skill) Option {
+	return func(a *Agent) { a.skills = append(a.skills, s) }
+}
+
+// WithMaxSteps bounds the tool-dispatch loop.
+func WithMaxSteps(n int) Option {
+	return func(a *Agent) { a.maxSteps = n }
+}
+
+// WithRequestOptions sets default request options (temperature, max
+// tokens, ...) applied to every step of every run.
+func WithRequestOptions(opts ...llm.Option) Option {
+	return func(a *Agent) { a.reqOpts = append(a.reqOpts, opts...) }
+}
+
+// WithStepObserver registers a callback invoked after every completed
+// step (intermediate-step streaming for UIs, tracing, usage recording).
+// Observers run synchronously in Run's goroutine.
+func WithStepObserver(fn func(Step)) Option {
+	return func(a *Agent) { a.observers = append(a.observers, fn) }
+}
+
+// New creates an agent from a model and system prompt.
+func New(model llm.Model, system string, opts ...Option) *Agent {
+	a := &Agent{model: model, system: system, maxSteps: DefaultMaxSteps}
+	for _, opt := range opts {
+		opt(a)
+	}
+	return a
+}
+
+// AddSkill attaches a skill to the agent on demand.
+func (a *Agent) AddSkill(s Skill) { a.skills = append(a.skills, s) }
+
+// AddToolbox attaches a toolbox to the agent on demand.
+func (a *Agent) AddToolbox(b *llm.Toolbox) { a.toolboxes = append(a.toolboxes, b) }
+
+// RunOption configures one Run.
+type RunOption func(*runConfig)
+
+type runConfig struct {
+	history []llm.Message
+	reqOpts []llm.Option
+	onStep  []func(Step)
+}
+
+// WithHistory seeds the run with prior conversation messages (e.g. a
+// previous Result.Messages).
+func WithHistory(msgs []llm.Message) RunOption {
+	return func(rc *runConfig) { rc.history = msgs }
+}
+
+// WithRunRequestOptions adds request options for this run only.
+func WithRunRequestOptions(opts ...llm.Option) RunOption {
+	return func(rc *runConfig) { rc.reqOpts = append(rc.reqOpts, opts...) }
+}
+
+// OnStep registers a per-run step callback (in addition to agent-level
+// observers).
+func OnStep(fn func(Step)) RunOption {
+	return func(rc *runConfig) { rc.onStep = append(rc.onStep, fn) }
+}
+
+// systemPrompt composes the agent's system prompt with each skill's
+// instructions, in attachment order.
+func (a *Agent) systemPrompt() string {
+	parts := make([]string, 0, 1+len(a.skills))
+	if a.system != "" {
+		parts = append(parts, a.system)
+	}
+	for _, s := range a.skills {
+		if ins := strings.TrimSpace(s.Instructions()); ins != "" {
+			parts = append(parts, ins)
+		}
+	}
+	return strings.Join(parts, "\n\n")
+}
+
+// mergedTools flattens toolboxes plus skill toolboxes into one toolset.
+// Duplicate tool names are a configuration error and fail loudly — a
+// silently shadowed tool is far harder to debug than this error.
+func (a *Agent) mergedTools() (map[string]llm.Tool, []llm.Tool, error) {
+	byName := make(map[string]llm.Tool)
+	var ordered []llm.Tool
+
+	add := func(origin string, tools []llm.Tool) error {
+		for _, t := range tools {
+			if _, exists := byName[t.Name]; exists {
+				return fmt.Errorf("agent: duplicate tool %q (from %s)", t.Name, origin)
+			}
+			byName[t.Name] = t
+			ordered = append(ordered, t)
+		}
+		return nil
+	}
+
+	for _, b := range a.toolboxes {
+		if err := add("toolbox "+b.Name(), b.Tools()); err != nil {
+			return nil, nil, err
+		}
+	}
+	for _, s := range a.skills {
+		if b := s.Tools(); b != nil {
+			if err := add("skill "+s.Name(), b.Tools()); err != nil {
+				return nil, nil, err
+			}
+		}
+	}
+	return byName, ordered, nil
+}
+
+// Run executes the loop: send the conversation; while the model requests
+// tools, execute them and feed results back; stop on a final answer,
+// MaxSteps, or an unrecoverable model error.
+func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Result, error) {
+	var rc runConfig
+	for _, opt := range opts {
+		opt(&rc)
+	}
+
+	byName, ordered, err := a.mergedTools()
+	if err != nil {
+		return nil, err
+	}
+
+	msgs := append([]llm.Message(nil), rc.history...)
+	if input != "" {
+		msgs = append(msgs, llm.UserText(input))
+	}
+	if len(msgs) == 0 {
+		return nil, errors.New("agent: empty input and no history")
+	}
+
+	result := &Result{}
+	reqOpts := append(append([]llm.Option(nil), a.reqOpts...), rc.reqOpts...)
+	system := a.systemPrompt()
+
+	for stepIdx := range a.maxSteps {
+		req := llm.Request{System: system, Messages: msgs, Tools: ordered}
+		resp, err := a.model.Generate(ctx, req, reqOpts...)
+		if err != nil {
+			result.Messages = msgs
+			return result, fmt.Errorf("agent: step %d: %w", stepIdx, err)
+		}
+
+		msgs = append(msgs, resp.Message())
+		result.Usage.Add(resp.Usage)
+		step := Step{Index: stepIdx, Response: resp}
+
+		if len(resp.ToolCalls) == 0 {
+			// Final answer.
+			result.Output = resp.Text()
+			result.Steps = append(result.Steps, step)
+			result.Messages = msgs
+			a.notify(rc, step)
+			return result, nil
+		}
+
+		results := make([]llm.ToolResult, 0, len(resp.ToolCalls))
+		for _, call := range resp.ToolCalls {
+			if err := ctx.Err(); err != nil {
+				result.Messages = msgs
+				return result, err
+			}
+			tool, ok := byName[call.Name]
+			if !ok {
+				results = append(results, llm.ToolResult{
+					ID: call.ID, Name: call.Name,
+					Content: fmt.Sprintf("unknown tool %q", call.Name),
+					IsError: true,
+				})
+				continue
+			}
+			// ExecuteTool recovers panics and converts errors to IsError
+			// results — the loop always continues.
+			results = append(results, llm.ExecuteTool(ctx, tool, call))
+		}
+
+		step.Results = results
+		result.Steps = append(result.Steps, step)
+		a.notify(rc, step)
+		msgs = append(msgs, llm.ToolResultsMessage(results...))
+	}
+
+	result.Messages = msgs
+	return result, fmt.Errorf("%w (max %d)", ErrMaxSteps, a.maxSteps)
+}
+
+// notify fans a step out to agent observers and run callbacks; observer
+// panics are swallowed (the loop must never die for a UI callback).
+func (a *Agent) notify(rc runConfig, step Step) {
+	emit := func(fn func(Step)) {
+		defer func() { _ = recover() }()
+		fn(step)
+	}
+	for _, fn := range a.observers {
+		emit(fn)
+	}
+	for _, fn := range rc.onStep {
+		emit(fn)
+	}
+}
diff --git a/agent/agent_test.go b/agent/agent_test.go
new file mode 100644
index 0000000..6f57eee
--- /dev/null
+++ b/agent/agent_test.go
@@ -0,0 +1,339 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+	"testing"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
+)
+
+func newModel(t *testing.T, fp *fake.Provider) llm.Model {
+	t.Helper()
+	m, err := fp.Model("test-model")
+	if err != nil {
+		t.Fatalf("Model: %v", err)
+	}
+	return m
+}
+
+// toolCallReply scripts an assistant response requesting one tool call.
+func toolCallReply(id, name, args string) fake.Step {
+	return fake.ReplyWith(llm.Response{
+		ToolCalls:    []llm.ToolCall{{ID: id, Name: name, Arguments: json.RawMessage(args)}},
+		FinishReason: llm.FinishToolCalls,
+		Usage:        llm.Usage{InputTokens: 10, OutputTokens: 5},
+	})
+}
+
+func adderToolbox(t *testing.T) *llm.Toolbox {
+	t.Helper()
+	return llm.NewToolbox("math", llm.Tool{
+		Name:        "add",
+		Description: "Add two integers",
+		Parameters:  json.RawMessage(`{"type":"object","properties":{"a":{"type":"integer"},"b":{"type":"integer"}},"required":["a","b"]}`),
+		Handler: func(_ context.Context, args json.RawMessage) (any, error) {
+			var p struct{ A, B int }
+			if err := json.Unmarshal(args, &p); err != nil {
+				return nil, err
+			}
+			return map[string]int{"sum": p.A + p.B}, nil
+		},
+	})
+}
+
+func TestRunWithoutTools(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model", fake.Reply("direct answer"))
+
+	a := New(newModel(t, fp), "You are terse.")
+	res, err := a.Run(context.Background(), "question?")
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	if res.Output != "direct answer" {
+		t.Errorf("output = %q", res.Output)
+	}
+	if len(res.Steps) != 1 {
+		t.Errorf("steps = %d", len(res.Steps))
+	}
+	// The system prompt reached the model.
+	calls := fp.Calls()
+	if calls[0].Request.System != "You are terse." {
+		t.Errorf("system = %q", calls[0].Request.System)
+	}
+}
+
+func TestRunToolLoop(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model",
+		toolCallReply("c1", "add", `{"a":2,"b":3}`),
+		fake.Reply("the sum is 5"),
+	)
+
+	a := New(newModel(t, fp), "do math", WithToolbox(adderToolbox(t)))
+	res, err := a.Run(context.Background(), "2+3?")
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	if res.Output != "the sum is 5" {
+		t.Errorf("output = %q", res.Output)
+	}
+	if len(res.Steps) != 2 {
+		t.Fatalf("steps = %d, want 2", len(res.Steps))
+	}
+	if res.Usage.InputTokens != 11 || res.Usage.OutputTokens != 6 {
+		t.Errorf("usage = %+v (must sum both steps)", res.Usage)
+	}
+
+	// The tool executed and its result went back to the model.
+	step1 := res.Steps[0]
+	if len(step1.Results) != 1 || step1.Results[0].IsError {
+		t.Fatalf("step 1 results = %+v", step1.Results)
+	}
+	if !strings.Contains(step1.Results[0].Content, `"sum":5`) {
+		t.Errorf("tool result = %q", step1.Results[0].Content)
+	}
+
+	// Second model call must carry the tool transcript: user, assistant
+	// (with the call), tool results.
+	second := fp.Calls()[1].Request
+	if len(second.Messages) != 3 {
+		t.Fatalf("second request messages = %d, want 3", len(second.Messages))
+	}
+	if second.Messages[1].Role != llm.RoleAssistant || len(second.Messages[1].ToolCalls) != 1 {
+		t.Errorf("assistant turn = %+v", second.Messages[1])
+	}
+	toolMsg := second.Messages[2]
+	if toolMsg.Role != llm.RoleTool || toolMsg.ToolResults[0].ID != "c1" {
+		t.Errorf("tool turn = %+v", toolMsg)
+	}
+
+	// The tools were offered on every step.
+	for i, c := range fp.Calls() {
+		if len(c.Request.Tools) != 1 || c.Request.Tools[0].Name != "add" {
+			t.Errorf("call %d tools = %+v", i, c.Request.Tools)
+		}
+	}
+
+	// Result.Messages is the full transcript.
+	if len(res.Messages) != 4 {
+		t.Errorf("transcript = %d messages, want 4", len(res.Messages))
+	}
+}
+
+func TestRunUnknownToolContinues(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model",
+		toolCallReply("c1", "nonexistent", `{}`),
+		fake.Reply("recovered"),
+	)
+
+	a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)))
+	res, err := a.Run(context.Background(), "go")
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	if res.Output != "recovered" {
+		t.Errorf("output = %q", res.Output)
+	}
+	r := res.Steps[0].Results[0]
+	if !r.IsError || !strings.Contains(r.Content, "nonexistent") {
+		t.Errorf("unknown-tool result = %+v", r)
+	}
+}
+
+func TestRunPanickingToolContinues(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model",
+		toolCallReply("c1", "bomb", `{}`),
+		fake.Reply("survived"),
+	)
+	bomb := llm.NewToolbox("danger", llm.Tool{
+		Name:    "bomb",
+		Handler: func(context.Context, json.RawMessage) (any, error) { panic("boom") },
+	})
+
+	a := New(newModel(t, fp), "", WithToolbox(bomb))
+	res, err := a.Run(context.Background(), "go")
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	if res.Output != "survived" {
+		t.Errorf("output = %q", res.Output)
+	}
+	r := res.Steps[0].Results[0]
+	if !r.IsError || !strings.Contains(r.Content, "boom") {
+		t.Errorf("panic result = %+v", r)
+	}
+}
+
+func TestRunMaxSteps(t *testing.T) {
+	fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step {
+		return toolCallReply("c", "add", `{"a":1,"b":1}`)
+	}))
+
+	a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithMaxSteps(3))
+	res, err := a.Run(context.Background(), "loop forever")
+	if !errors.Is(err, ErrMaxSteps) {
+		t.Fatalf("err = %v, want ErrMaxSteps", err)
+	}
+	if res == nil || len(res.Steps) != 3 {
+		t.Fatalf("result = %+v, want 3 recorded steps", res)
+	}
+	if len(res.Messages) == 0 {
+		t.Error("transcript must be preserved on ErrMaxSteps")
+	}
+}
+
+func TestDuplicateToolNamesFailLoudly(t *testing.T) {
+	fp := fake.New("fp")
+	box1 := llm.NewToolbox("a", llm.Tool{Name: "dup"})
+	box2 := llm.NewToolbox("b", llm.Tool{Name: "dup"})
+
+	a := New(newModel(t, fp), "", WithToolbox(box1), WithToolbox(box2))
+	_, err := a.Run(context.Background(), "go")
+	if err == nil || !strings.Contains(err.Error(), `duplicate tool "dup"`) {
+		t.Errorf("err = %v, want duplicate-tool error", err)
+	}
+}
+
+func TestRunWithHistory(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model", fake.Reply("continued"))
+
+	history := []llm.Message{
+		llm.UserText("first question"),
+		llm.AssistantText("first answer"),
+	}
+	a := New(newModel(t, fp), "")
+	res, err := a.Run(context.Background(), "follow-up", WithHistory(history))
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	got := fp.Calls()[0].Request.Messages
+	if len(got) != 3 || got[0].Text() != "first question" || got[2].Text() != "follow-up" {
+		t.Errorf("messages = %+v", got)
+	}
+	if len(res.Messages) != 4 {
+		t.Errorf("transcript = %d, want history+input+answer", len(res.Messages))
+	}
+}
+
+func TestStepObservers(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model",
+		toolCallReply("c1", "add", `{"a":1,"b":2}`),
+		fake.Reply("3"),
+	)
+
+	var agentSteps, runSteps []int
+	a := New(newModel(t, fp), "",
+		WithToolbox(adderToolbox(t)),
+		WithStepObserver(func(s Step) { agentSteps = append(agentSteps, s.Index) }),
+	)
+	_, err := a.Run(context.Background(), "1+2?",
+		OnStep(func(s Step) { runSteps = append(runSteps, s.Index) }),
+	)
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	if fmt.Sprint(agentSteps) != "[0 1]" || fmt.Sprint(runSteps) != "[0 1]" {
+		t.Errorf("agentSteps=%v runSteps=%v", agentSteps, runSteps)
+	}
+}
+
+func TestObserverPanicIsSwallowed(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model", fake.Reply("fine"))
+
+	a := New(newModel(t, fp), "", WithStepObserver(func(Step) { panic("ui bug") }))
+	res, err := a.Run(context.Background(), "go")
+	if err != nil || res.Output != "fine" {
+		t.Errorf("res=%+v err=%v — observer panic must not kill the run", res, err)
+	}
+}
+
+func TestSkillComposition(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model", fake.Reply("ok"))
+
+	sk := stubSkill{
+		name:         "haiku",
+		instructions: "Answer in haiku.",
+		tools: llm.NewToolbox("haiku-tools", llm.Tool{
+			Name:    "count_syllables",
+			Handler: func(context.Context, json.RawMessage) (any, error) { return 5, nil },
+		}),
+	}
+	a := New(newModel(t, fp), "Base prompt.", WithSkill(sk))
+	if _, err := a.Run(context.Background(), "hello"); err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+
+	req := fp.Calls()[0].Request
+	if req.System != "Base prompt.\n\nAnswer in haiku." {
+		t.Errorf("system = %q", req.System)
+	}
+	if len(req.Tools) != 1 || req.Tools[0].Name != "count_syllables" {
+		t.Errorf("tools = %+v", req.Tools)
+	}
+}
+
+func TestAddSkillOnDemand(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model", fake.Reply("a"), fake.Reply("b"))
+
+	a := New(newModel(t, fp), "Base.")
+	if _, err := a.Run(context.Background(), "one"); err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	a.AddSkill(stubSkill{name: "later", instructions: "Later skill."})
+	if _, err := a.Run(context.Background(), "two"); err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+
+	calls := fp.Calls()
+	if calls[0].Request.System != "Base." {
+		t.Errorf("first system = %q", calls[0].Request.System)
+	}
+	if calls[1].Request.System != "Base.\n\nLater skill." {
+		t.Errorf("second system = %q", calls[1].Request.System)
+	}
+}
+
+func TestRunErrorPreservesTranscript(t *testing.T) {
+	fp := fake.New("fp")
+	fp.Enqueue("test-model", fake.Fail(errors.New("model down")))
+
+	a := New(newModel(t, fp), "")
+	res, err := a.Run(context.Background(), "go")
+	if err == nil || !strings.Contains(err.Error(), "model down") {
+		t.Fatalf("err = %v", err)
+	}
+	if res == nil || len(res.Messages) != 1 {
+		t.Errorf("result = %+v, want transcript with the input", res)
+	}
+}
+
+func TestEmptyInputNeedsHistory(t *testing.T) {
+	fp := fake.New("fp")
+	a := New(newModel(t, fp), "")
+	if _, err := a.Run(context.Background(), ""); err == nil {
+		t.Error("empty input with no history must error")
+	}
+}
+
+type stubSkill struct {
+	name         string
+	instructions string
+	tools        *llm.Toolbox
+}
+
+func (s stubSkill) Name() string         { return s.name }
+func (s stubSkill) Instructions() string { return s.instructions }
+func (s stubSkill) Tools() *llm.Toolbox  { return s.tools }
diff --git a/docs/adr/0012-agent-loop.md b/docs/adr/0012-agent-loop.md
new file mode 100644
index 0000000..456126d
--- /dev/null
+++ b/docs/adr/0012-agent-loop.md
@@ -0,0 +1,53 @@
+# ADR-0012: Agent run loop
+
+**Status:** Accepted — 2026-06-10
+
+## Context
+
+Agents are the consumer-facing composition: Model + system prompt +
+toolboxes (+ skills), run as a tool-dispatch loop. mort's agents need
+multi-step tool use, bounded loops, per-step observation (tracing/usage
+recording), conversation continuation, and a loop that survives every tool
+failure.
+
+## Decision
+
+- `agent.New(model, system, opts...)` with options WithToolbox, WithTools,
+  WithSkill, WithMaxSteps (default 10), WithRequestOptions (per-step
+  generation knobs), WithStepObserver. `AddSkill`/`AddToolbox` extend an
+  agent after construction (configure-then-share concurrency contract).
+- `Run(ctx, input, opts...)`: history seeds via WithHistory (a previous
+  `Result.Messages` round-trips), per-run request options, per-run OnStep
+  callbacks. The loop: Generate with the merged toolset → no tool calls =
+  final answer → otherwise execute calls sequentially, append the
+  assistant turn + one RoleTool message, repeat.
+- **Never panics, never stalls on tool failure:** handlers run through
+  `llm.ExecuteTool` (panic-recovering, errors → IsError results); unknown
+  tool names come back as IsError results the model can react to; observer
+  panics are swallowed. Only model errors (already retried/failed-over by
+  the chain) and ctx cancellation abort the run — and even then the
+  partial `Result` (transcript, steps, usage) is returned alongside the
+  error, as it is on ErrMaxSteps.
+- **Duplicate tool names across toolboxes/skills fail loudly at Run** —
+  a silently shadowed tool is a debugging trap.
+- Skills compose additively (the `agent.Skill` interface lives in this
+  package so agent does not import skill): `Instructions()` append to the
+  system prompt in attachment order; `Tools()` join the merged toolset.
+- **Intermediate-step streaming = step observers** (agent-level and
+  per-run), invoked synchronously after every step with the response and
+  tool results. Token-level streaming inside the loop was deliberately
+  deferred: tool-heavy steps end on buffered tool calls anyway, and mort's
+  observers (trace/usage recorders) want completed steps. `Model.Stream`
+  remains available for direct streaming outside the loop.
+- `Generate[T]` + `llm.SchemaFor[T]` give typed structured output:
+  reflect-derived schemas (strict-compatible: all properties required,
+  additionalProperties:false, pointers → nullable anyOf), markdown-fence
+  stripping as a decode fallback, errors that name the serving model.
+
+## Consequences
+
+- An agent over a failover chain inherits retry/backoff/media
+  normalization transparently — no agent-level error handling for
+  transient provider trouble.
+- Sequential tool execution is deterministic and trace-friendly; parallel
+  dispatch can be added later behind an option without API change.
diff --git a/docs/adr/README.md b/docs/adr/README.md
index 1320a97..6c8eaa6 100644
--- a/docs/adr/README.md
+++ b/docs/adr/README.md
@@ -15,3 +15,4 @@ One decision per file, append-only; supersede rather than rewrite.
 | [0009](0009-multimodal-strategy.md) | Multimodal strategy — normalize per target, enforce at provider | Accepted |
 | [0010](0010-tools-structured-output-mapping.md) | Tools and structured output — canonical shape, native mappings | Accepted |
 | [0011](0011-google-provider.md) | Google provider on the official Gen AI SDK | Accepted |
+| [0012](0012-agent-loop.md) | Agent run loop | Accepted |
diff --git a/generate.go b/generate.go
new file mode 100644
index 0000000..b3f39fd
--- /dev/null
+++ b/generate.go
@@ -0,0 +1,71 @@
+package majordomo
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"reflect"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// SchemaFor re-exports llm.SchemaFor: a JSON Schema derived from a Go type,
+// suitable for WithSchema and tool parameters.
+func SchemaFor[T any]() (json.RawMessage, error) { return llm.SchemaFor[T]() }
+
+// Generate performs a structured-output request and unmarshals the result
+// into T: the schema is derived from T (llm.SchemaFor), injected via the
+// provider's native structured-output mechanism, and the response text is
+// decoded into a T value.
+//
+//	type Verdict struct {
+//	    Guilty bool   `json:"guilty"`
+//	    Why    string `json:"why" description:"one-sentence rationale"`
+//	}
+//	v, err := majordomo.Generate[Verdict](ctx, m, req)
+func Generate[T any](ctx context.Context, m Model, req Request, opts ...Option) (T, error) {
+	var zero T
+	schema, err := llm.SchemaFor[T]()
+	if err != nil {
+		return zero, err
+	}
+
+	name := "response"
+	if t := reflect.TypeFor[T](); t.Name() != "" {
+		name = strings.ToLower(t.Name())
+	}
+
+	resp, err := m.Generate(ctx, req, append(opts, llm.WithSchema(schema, name))...)
+	if err != nil {
+		return zero, err
+	}
+
+	text := strings.TrimSpace(resp.Text())
+	if text == "" {
+		return zero, fmt.Errorf("majordomo: structured response from %s is empty (finish: %s)", resp.Model, resp.FinishReason)
+	}
+	// Defensive: some models wrap JSON in a markdown fence despite the
+	// schema constraint.
+	text = stripFence(text)
+
+	var out T
+	if err := json.Unmarshal([]byte(text), &out); err != nil {
+		return zero, fmt.Errorf("majordomo: decode structured response from %s: %w (text: %.200s)", resp.Model, err, text)
+	}
+	return out, nil
+}
+
+// stripFence removes a surrounding markdown code fence, if present.
+func stripFence(s string) string {
+	if !strings.HasPrefix(s, "```") {
+		return s
+	}
+	s = strings.TrimPrefix(s, "```")
+	// Drop an optional language tag on the opening fence line.
+	if i := strings.IndexByte(s, '\n'); i >= 0 {
+		s = s[i+1:]
+	}
+	s = strings.TrimSuffix(strings.TrimSpace(s), "```")
+	return strings.TrimSpace(s)
+}
diff --git a/generate_test.go b/generate_test.go
new file mode 100644
index 0000000..2b123ac
--- /dev/null
+++ b/generate_test.go
@@ -0,0 +1,89 @@
+package majordomo
+
+import (
+	"context"
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+	"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
+)
+
+type verdict struct {
+	Guilty bool   `json:"guilty"`
+	Why    string `json:"why"`
+}
+
+func TestGenerateTyped(t *testing.T) {
+	r := newTestRegistry(t)
+	fp := fake.New("fp")
+	r.RegisterProvider(fp)
+	fp.Enqueue("judge", fake.Reply(`{"guilty":true,"why":"caught red-handed"}`))
+
+	m, _ := r.Parse("fp/judge")
+	v, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}})
+	if err != nil {
+		t.Fatalf("Generate: %v", err)
+	}
+	if !v.Guilty || v.Why != "caught red-handed" {
+		t.Errorf("verdict = %+v", v)
+	}
+
+	// The schema derived from the struct reached the provider, named after
+	// the type.
+	req := fp.Calls()[0].Request
+	if req.SchemaName != "verdict" {
+		t.Errorf("schema name = %q", req.SchemaName)
+	}
+	var schema map[string]any
+	if err := json.Unmarshal(req.Schema, &schema); err != nil {
+		t.Fatalf("schema: %v", err)
+	}
+	props := schema["properties"].(map[string]any)
+	if _, ok := props["guilty"]; !ok {
+		t.Errorf("schema = %v", schema)
+	}
+}
+
+func TestGenerateStripsMarkdownFence(t *testing.T) {
+	r := newTestRegistry(t)
+	fp := fake.New("fp")
+	r.RegisterProvider(fp)
+	fp.Enqueue("judge", fake.Reply("```json\n{\"guilty\":false,\"why\":\"alibi\"}\n```"))
+
+	m, _ := r.Parse("fp/judge")
+	v, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}})
+	if err != nil {
+		t.Fatalf("Generate: %v", err)
+	}
+	if v.Guilty || v.Why != "alibi" {
+		t.Errorf("verdict = %+v", v)
+	}
+}
+
+func TestGenerateDecodeErrorNamesModel(t *testing.T) {
+	r := newTestRegistry(t)
+	fp := fake.New("fp")
+	r.RegisterProvider(fp)
+	fp.Enqueue("judge", fake.Reply("not json at all"))
+
+	m, _ := r.Parse("fp/judge")
+	_, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}})
+	if err == nil || !strings.Contains(err.Error(), "fp/judge") {
+		t.Errorf("err = %v, want decode error naming the model", err)
+	}
+}
+
+func TestGenerateEmptyResponse(t *testing.T) {
+	r := newTestRegistry(t)
+	fp := fake.New("fp")
+	r.RegisterProvider(fp)
+	fp.Enqueue("judge", fake.ReplyWith(llm.Response{FinishReason: llm.FinishLength}))
+
+	m, _ := r.Parse("fp/judge")
+	_, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}})
+	if err == nil || !strings.Contains(err.Error(), "empty") {
+		t.Errorf("err = %v, want empty-response error", err)
+	}
+}
diff --git a/llm/jsonschema.go b/llm/jsonschema.go
new file mode 100644
index 0000000..6ea2731
--- /dev/null
+++ b/llm/jsonschema.go
@@ -0,0 +1,166 @@
+package llm
+
+import (
+	"encoding/json"
+	"fmt"
+	"reflect"
+	"slices"
+	"strings"
+	"time"
+)
+
+// SchemaFor derives a JSON Schema from Go type T for structured output.
+//
+// The emitted schema is strict-mode-compatible across providers: every
+// object lists all its properties as required and sets
+// additionalProperties:false; optionality is expressed by pointer fields,
+// which become nullable (anyOf with null) and may simply be returned as
+// null by the model.
+//
+// Field tags: `json:"name"` / `json:"-"` control naming and omission;
+// `description:"..."` documents a field for the model; `enum:"a,b,c"`
+// constrains a string field to fixed values.
+//
+// Supported kinds: strings, bools, integer and float kinds, structs
+// (nested), slices/arrays, map[string]V, pointers (nullable), time.Time
+// (string, date-time), json.RawMessage and interface{} (any). Recursive
+// types are rejected — no provider's structured-output mode accepts them.
+func SchemaFor[T any]() (json.RawMessage, error) {
+	t := reflect.TypeFor[T]()
+	schema, err := schemaOf(t, "", "", nil)
+	if err != nil {
+		return nil, fmt.Errorf("llm: SchemaFor[%s]: %w", t, err)
+	}
+	out, err := json.Marshal(schema)
+	if err != nil {
+		return nil, fmt.Errorf("llm: SchemaFor[%s]: encode: %w", t, err)
+	}
+	return out, nil
+}
+
+var (
+	timeType = reflect.TypeFor[time.Time]()
+	rawType  = reflect.TypeFor[json.RawMessage]()
+)
+
+// schemaOf builds the schema map for one type. visiting tracks the struct
+// types on the current path for cycle detection.
+func schemaOf(t reflect.Type, description, enum string, visiting []reflect.Type) (map[string]any, error) {
+	s := make(map[string]any)
+	if description != "" {
+		s["description"] = description
+	}
+
+	switch {
+	case t == timeType:
+		s["type"] = "string"
+		s["format"] = "date-time"
+		return s, nil
+	case t == rawType:
+		// Any JSON value.
+		return s, nil
+	}
+
+	switch t.Kind() {
+	case reflect.Pointer:
+		inner, err := schemaOf(t.Elem(), "", enum, visiting)
+		if err != nil {
+			return nil, err
+		}
+		s["anyOf"] = []any{inner, map[string]any{"type": "null"}}
+		return s, nil
+
+	case reflect.String:
+		s["type"] = "string"
+		if enum != "" {
+			var vals []any
+			for v := range strings.SplitSeq(enum, ",") {
+				vals = append(vals, strings.TrimSpace(v))
+			}
+			s["enum"] = vals
+		}
+		return s, nil
+
+	case reflect.Bool:
+		s["type"] = "boolean"
+		return s, nil
+
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
+		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		s["type"] = "integer"
+		return s, nil
+
+	case reflect.Float32, reflect.Float64:
+		s["type"] = "number"
+		return s, nil
+
+	case reflect.Slice, reflect.Array:
+		if t.Elem().Kind() == reflect.Uint8 {
+			// []byte: base64 text on the wire.
+			s["type"] = "string"
+			return s, nil
+		}
+		items, err := schemaOf(t.Elem(), "", "", visiting)
+		if err != nil {
+			return nil, err
+		}
+		s["type"] = "array"
+		s["items"] = items
+		return s, nil
+
+	case reflect.Map:
+		if t.Key().Kind() != reflect.String {
+			return nil, fmt.Errorf("map key type %s unsupported (only string keys)", t.Key())
+		}
+		values, err := schemaOf(t.Elem(), "", "", visiting)
+		if err != nil {
+			return nil, err
+		}
+		s["type"] = "object"
+		s["additionalProperties"] = values
+		return s, nil
+
+	case reflect.Interface:
+		// Any JSON value.
+		return s, nil
+
+	case reflect.Struct:
+		if slices.Contains(visiting, t) {
+			return nil, fmt.Errorf("recursive type %s (structured-output schemas cannot recurse)", t)
+		}
+		visiting = append(visiting, t)
+
+		properties := make(map[string]any)
+		required := []string{}
+		for i := range t.NumField() {
+			f := t.Field(i)
+			if !f.IsExported() {
+				continue
+			}
+			name := f.Name
+			if tag, ok := f.Tag.Lookup("json"); ok {
+				base, _, _ := strings.Cut(tag, ",")
+				if base == "-" {
+					continue
+				}
+				if base != "" {
+					name = base
+				}
+			}
+			fs, err := schemaOf(f.Type, f.Tag.Get("description"), f.Tag.Get("enum"), visiting)
+			if err != nil {
+				return nil, fmt.Errorf("field %s: %w", f.Name, err)
+			}
+			properties[name] = fs
+			required = append(required, name)
+		}
+		s["type"] = "object"
+		s["properties"] = properties
+		s["required"] = required
+		s["additionalProperties"] = false
+		return s, nil
+
+	default:
+		return nil, fmt.Errorf("kind %s unsupported in structured-output schemas", t.Kind())
+	}
+}
diff --git a/llm/jsonschema_test.go b/llm/jsonschema_test.go
new file mode 100644
index 0000000..ba8d448
--- /dev/null
+++ b/llm/jsonschema_test.go
@@ -0,0 +1,140 @@
+package llm
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+	"time"
+)
+
+func mustSchema[T any](t *testing.T) map[string]any {
+	t.Helper()
+	raw, err := SchemaFor[T]()
+	if err != nil {
+		t.Fatalf("SchemaFor: %v", err)
+	}
+	var m map[string]any
+	if err := json.Unmarshal(raw, &m); err != nil {
+		t.Fatalf("schema is not valid JSON: %v", err)
+	}
+	return m
+}
+
+func TestSchemaForBasicStruct(t *testing.T) {
+	type Person struct {
+		Name    string  `json:"name" description:"full name"`
+		Age     int     `json:"age"`
+		Score   float64 `json:"score"`
+		Active  bool    `json:"active"`
+		Ignored string  `json:"-"`
+		hidden  string  //nolint:unused
+	}
+	s := mustSchema[Person](t)
+	if s["type"] != "object" || s["additionalProperties"] != false {
+		t.Errorf("root = %v", s)
+	}
+	props := s["properties"].(map[string]any)
+	if len(props) != 4 {
+		t.Errorf("properties = %v", props)
+	}
+	if props["name"].(map[string]any)["description"] != "full name" {
+		t.Errorf("name = %v", props["name"])
+	}
+	if props["age"].(map[string]any)["type"] != "integer" ||
+		props["score"].(map[string]any)["type"] != "number" ||
+		props["active"].(map[string]any)["type"] != "boolean" {
+		t.Errorf("props = %v", props)
+	}
+	req := s["required"].([]any)
+	if len(req) != 4 {
+		t.Errorf("required = %v (all fields must be required for strict mode)", req)
+	}
+}
+
+func TestSchemaForNestedAndCollections(t *testing.T) {
+	type Inner struct {
+		V string `json:"v"`
+	}
+	type Outer struct {
+		Items  []Inner           `json:"items"`
+		Labels map[string]string `json:"labels"`
+		Blob   []byte            `json:"blob"`
+		When   time.Time         `json:"when"`
+		Extra  json.RawMessage   `json:"extra"`
+	}
+	s := mustSchema[Outer](t)
+	props := s["properties"].(map[string]any)
+
+	items := props["items"].(map[string]any)
+	if items["type"] != "array" {
+		t.Errorf("items = %v", items)
+	}
+	inner := items["items"].(map[string]any)
+	if inner["type"] != "object" || inner["additionalProperties"] != false {
+		t.Errorf("inner = %v", inner)
+	}
+	labels := props["labels"].(map[string]any)
+	if labels["type"] != "object" || labels["additionalProperties"].(map[string]any)["type"] != "string" {
+		t.Errorf("labels = %v", labels)
+	}
+	if props["blob"].(map[string]any)["type"] != "string" {
+		t.Errorf("blob = %v", props["blob"])
+	}
+	when := props["when"].(map[string]any)
+	if when["type"] != "string" || when["format"] != "date-time" {
+		t.Errorf("when = %v", when)
+	}
+	if len(props["extra"].(map[string]any)) != 0 {
+		t.Errorf("extra (RawMessage) should be the any-schema, got %v", props["extra"])
+	}
+}
+
+func TestSchemaForPointerIsNullable(t *testing.T) {
+	type Opt struct {
+		Note *string `json:"note"`
+	}
+	s := mustSchema[Opt](t)
+	note := s["properties"].(map[string]any)["note"].(map[string]any)
+	anyOf := note["anyOf"].([]any)
+	if len(anyOf) != 2 {
+		t.Fatalf("anyOf = %v", anyOf)
+	}
+	if anyOf[0].(map[string]any)["type"] != "string" || anyOf[1].(map[string]any)["type"] != "null" {
+		t.Errorf("anyOf = %v", anyOf)
+	}
+	// Still required (strict-mode style: optional == nullable).
+	if req := s["required"].([]any); len(req) != 1 {
+		t.Errorf("required = %v", req)
+	}
+}
+
+func TestSchemaForEnum(t *testing.T) {
+	type Choice struct {
+		Mood string `json:"mood" enum:"happy,sad, ambivalent"`
+	}
+	s := mustSchema[Choice](t)
+	mood := s["properties"].(map[string]any)["mood"].(map[string]any)
+	enum := mood["enum"].([]any)
+	if len(enum) != 3 || enum[2] != "ambivalent" {
+		t.Errorf("enum = %v (values must be trimmed)", enum)
+	}
+}
+
+func TestSchemaForRecursionRejected(t *testing.T) {
+	type Node struct {
+		Children []*Node `json:"children"`
+	}
+	_, err := SchemaFor[Node]()
+	if err == nil || !strings.Contains(err.Error(), "recursive") {
+		t.Errorf("err = %v, want recursion rejection", err)
+	}
+}
+
+func TestSchemaForUnsupportedKind(t *testing.T) {
+	type Bad struct {
+		Ch chan int `json:"ch"`
+	}
+	if _, err := SchemaFor[Bad](); err == nil {
+		t.Error("chan field must be rejected")
+	}
+}
diff --git a/progress.md b/progress.md
index 6ddaec1..cbdbd2d 100644
--- a/progress.md
+++ b/progress.md
@@ -1,5 +1,23 @@
 # progress
 
+## 2026-06-10 — Phase 5: agent loop, Generate[T], schema derivation
+
+**Landed:** `agent/` (ADR-0012): New(model, system, opts) with toolboxes,
+max steps (default 10), per-step request options, agent-level observers +
+per-run OnStep, WithHistory continuation (Result.Messages round-trips),
+sequential tool dispatch through panic-recovering ExecuteTool, unknown
+tools → IsError results, duplicate tool names fail loudly, partial Result
+preserved on ErrMaxSteps/model errors/cancellation. The agent.Skill
+interface ships here (instructions + tools composition is tested with a
+stub); the skill package with real implementations is Phase 6.
+`llm.SchemaFor[T]` reflect-derived strict-compatible JSON schemas
+(pointers→nullable anyOf, description/enum tags, maps/slices/time/RawMessage,
+recursion rejected) and root `majordomo.Generate[T]` (schema injection,
+fence-stripping decode, model-naming errors). 15 agent tests + schema +
+Generate suites, all hermetic.
+
+**Next:** Phase 6 — skill package + two example skills.
+
 ## 2026-06-10 — Phase 4: Google provider (official genai SDK)
 
 **Landed:** `provider/google` on google.golang.org/genai v1.59.0 (ADR-0011):