From 7dab4112ffcd6fb3f044cc77b268f268ce0a3408 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Wed, 10 Jun 2026 13:10:18 +0200 Subject: [PATCH] feat: agent run loop, Generate[T], reflect-derived schemas Phase 5: - agent/: model + system prompt + toolboxes composition; bounded tool-dispatch loop (default 10 steps); panic-proof tool execution; unknown-tool and duplicate-name handling; history continuation; step observers; partial results on ErrMaxSteps/errors (ADR-0012) - llm.SchemaFor[T]: strict-compatible JSON schemas from Go types (nullable pointers, description/enum tags, recursion rejected) - majordomo.Generate[T]: typed structured output with fence-stripping decode and model-naming errors - README agents/structured-output sections + matrix synced Co-Authored-By: Claude Fable 5 --- README.md | 47 ++++- agent/agent.go | 294 +++++++++++++++++++++++++++++++ agent/agent_test.go | 339 ++++++++++++++++++++++++++++++++++++ docs/adr/0012-agent-loop.md | 53 ++++++ docs/adr/README.md | 1 + generate.go | 71 ++++++++ generate_test.go | 89 ++++++++++ llm/jsonschema.go | 166 ++++++++++++++++++ llm/jsonschema_test.go | 140 +++++++++++++++ progress.md | 18 ++ 10 files changed, 1211 insertions(+), 7 deletions(-) create mode 100644 agent/agent.go create mode 100644 agent/agent_test.go create mode 100644 docs/adr/0012-agent-loop.md create mode 100644 generate.go create mode 100644 generate_test.go create mode 100644 llm/jsonschema.go create mode 100644 llm/jsonschema_test.go diff --git a/README.md b/README.md index fa776d8..ce5da73 100644 --- a/README.md +++ b/README.md @@ -200,13 +200,46 @@ resp, _ := m.Generate(ctx, req, majordomo.WithSchema(schemaJSON, "answer")) ``` Maps to OpenAI `response_format: json_schema`, Anthropic -`output_config.format`, and Ollama `format`. A generic `Generate[T]` helper -(schema from your struct, unmarshal into it) lands with the agent phase. +`output_config.format`, Ollama `format`, and Google `responseJsonSchema`. -## Agents & skills *(pending — Phases 5–6)* +The typed helper derives the schema from your struct (all fields required, +`additionalProperties:false`, pointers nullable; `description:"..."` and +`enum:"a,b,c"` tags supported) and unmarshals the result: -Agents = model + system prompt + toolboxes, running a tool-dispatch loop; -skills = reusable instruction+tool bundles attachable to any agent. +```go +type Verdict struct { + Guilty bool `json:"guilty"` + Why string `json:"why" description:"one-sentence rationale"` +} +v, err := majordomo.Generate[Verdict](ctx, m, req) +``` + +## Agents + +An agent is a model + system prompt + toolboxes, run as a tool-dispatch +loop until the model answers (or `MaxSteps`): + +```go +import "gitea.stevedudenhoeffer.com/steve/majordomo/agent" + +a := agent.New(m, "You are a research assistant.", + agent.WithToolbox(searchTools), + agent.WithMaxSteps(8), + agent.WithStepObserver(func(s agent.Step) { log.Printf("step %d", s.Index) }), +) +res, err := a.Run(ctx, "What changed in Go 1.26?") +// res.Output, res.Steps, res.Usage; res.Messages round-trips via +// agent.WithHistory for conversation continuation. +``` + +The loop never panics: tool handler errors and panics become error results +the model can react to; unknown tools likewise; duplicate tool names across +toolboxes fail loudly. On `agent.ErrMaxSteps` (and on model errors) the +partial result with the full transcript is still returned. + +## Skills *(pending — Phase 6)* + +Skills = reusable instruction+tool bundles attachable to any agent. ## Feature/provider support matrix @@ -229,8 +262,8 @@ Notes: Ollama has no native tool_choice — `"none"` drops the tools; Cross-cutting: Parse grammar ✅ · aliases/tiers ✅ · failover chains ✅ · health tracking/backoff ✅ · LLM_* env DSNs ✅ · media pipeline ✅ -(per-target normalization in chains) · agent loop pending · skills pending -· `Generate[T]` pending. +(per-target normalization in chains) · agent loop ✅ · `Generate[T]` + +schema derivation ✅ · skills pending. ## Development diff --git a/agent/agent.go b/agent/agent.go new file mode 100644 index 0000000..c2806ec --- /dev/null +++ b/agent/agent.go @@ -0,0 +1,294 @@ +// Package agent runs LLM-backed agents: a Model, a system prompt, and one +// or more toolboxes, executed as a tool-dispatch loop until the model +// produces a final answer (or MaxSteps intervenes). +// +// The loop never panics: tool handlers run through the panic-recovering +// executor in llm, unknown tools come back as error results the model can +// react to, and step observers receive every intermediate step. Skills +// (package skill) attach additively: their instructions extend the system +// prompt and their tools extend the merged toolset. +package agent + +import ( + "context" + "errors" + "fmt" + "strings" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// DefaultMaxSteps bounds the tool-dispatch loop when WithMaxSteps is not +// given. +const DefaultMaxSteps = 10 + +// ErrMaxSteps reports that the loop hit its step budget before the model +// produced a final answer. Run returns it alongside a non-nil *Result +// carrying the transcript so far. +var ErrMaxSteps = errors.New("agent: max steps reached without a final answer") + +// Skill is the contract skills satisfy (defined here so agent does not +// depend on the skill package; package skill provides implementations). +// Instructions are appended to the agent's system prompt; Tools (optional, +// may be nil) extend the agent's toolset. +type Skill interface { + Name() string + Instructions() string + Tools() *llm.Toolbox +} + +// Step is one completed iteration of the loop: the model's response and, +// when it requested tools, the results that were fed back. +type Step struct { + // Index is the 0-based step number. + Index int + // Response is the model output for this step. + Response *llm.Response + // Results are the executed tool outcomes (empty on the final step). + Results []llm.ToolResult +} + +// Result is the outcome of a Run. +type Result struct { + // Output is the final assistant text. + Output string + // Messages is the full transcript: prior history, the input, and every + // assistant/tool turn. Feed it back via WithHistory to continue the + // conversation. + Messages []llm.Message + // Steps records each loop iteration. + Steps []Step + // Usage is the token total across all steps. + Usage llm.Usage +} + +// Agent is a reusable model + system prompt + toolboxes (+ skills) +// composition. Configure at construction; AddSkill/AddToolbox may extend +// it later. Agents are safe to share across goroutines only after +// configuration is complete. +type Agent struct { + model llm.Model + system string + toolboxes []*llm.Toolbox + skills []Skill + maxSteps int + reqOpts []llm.Option + observers []func(Step) +} + +// Option configures an Agent at construction. +type Option func(*Agent) + +// WithToolbox attaches a toolbox. +func WithToolbox(b *llm.Toolbox) Option { + return func(a *Agent) { a.toolboxes = append(a.toolboxes, b) } +} + +// WithTools attaches loose tools (wrapped in an anonymous toolbox). +func WithTools(tools ...llm.Tool) Option { + return func(a *Agent) { a.toolboxes = append(a.toolboxes, llm.NewToolbox("", tools...)) } +} + +// WithSkill attaches a skill at construction (see also AddSkill). +func WithSkill(s Skill) Option { + return func(a *Agent) { a.skills = append(a.skills, s) } +} + +// WithMaxSteps bounds the tool-dispatch loop. +func WithMaxSteps(n int) Option { + return func(a *Agent) { a.maxSteps = n } +} + +// WithRequestOptions sets default request options (temperature, max +// tokens, ...) applied to every step of every run. +func WithRequestOptions(opts ...llm.Option) Option { + return func(a *Agent) { a.reqOpts = append(a.reqOpts, opts...) } +} + +// WithStepObserver registers a callback invoked after every completed +// step (intermediate-step streaming for UIs, tracing, usage recording). +// Observers run synchronously in Run's goroutine. +func WithStepObserver(fn func(Step)) Option { + return func(a *Agent) { a.observers = append(a.observers, fn) } +} + +// New creates an agent from a model and system prompt. +func New(model llm.Model, system string, opts ...Option) *Agent { + a := &Agent{model: model, system: system, maxSteps: DefaultMaxSteps} + for _, opt := range opts { + opt(a) + } + return a +} + +// AddSkill attaches a skill to the agent on demand. +func (a *Agent) AddSkill(s Skill) { a.skills = append(a.skills, s) } + +// AddToolbox attaches a toolbox to the agent on demand. +func (a *Agent) AddToolbox(b *llm.Toolbox) { a.toolboxes = append(a.toolboxes, b) } + +// RunOption configures one Run. +type RunOption func(*runConfig) + +type runConfig struct { + history []llm.Message + reqOpts []llm.Option + onStep []func(Step) +} + +// WithHistory seeds the run with prior conversation messages (e.g. a +// previous Result.Messages). +func WithHistory(msgs []llm.Message) RunOption { + return func(rc *runConfig) { rc.history = msgs } +} + +// WithRunRequestOptions adds request options for this run only. +func WithRunRequestOptions(opts ...llm.Option) RunOption { + return func(rc *runConfig) { rc.reqOpts = append(rc.reqOpts, opts...) } +} + +// OnStep registers a per-run step callback (in addition to agent-level +// observers). +func OnStep(fn func(Step)) RunOption { + return func(rc *runConfig) { rc.onStep = append(rc.onStep, fn) } +} + +// systemPrompt composes the agent's system prompt with each skill's +// instructions, in attachment order. +func (a *Agent) systemPrompt() string { + parts := make([]string, 0, 1+len(a.skills)) + if a.system != "" { + parts = append(parts, a.system) + } + for _, s := range a.skills { + if ins := strings.TrimSpace(s.Instructions()); ins != "" { + parts = append(parts, ins) + } + } + return strings.Join(parts, "\n\n") +} + +// mergedTools flattens toolboxes plus skill toolboxes into one toolset. +// Duplicate tool names are a configuration error and fail loudly — a +// silently shadowed tool is far harder to debug than this error. +func (a *Agent) mergedTools() (map[string]llm.Tool, []llm.Tool, error) { + byName := make(map[string]llm.Tool) + var ordered []llm.Tool + + add := func(origin string, tools []llm.Tool) error { + for _, t := range tools { + if _, exists := byName[t.Name]; exists { + return fmt.Errorf("agent: duplicate tool %q (from %s)", t.Name, origin) + } + byName[t.Name] = t + ordered = append(ordered, t) + } + return nil + } + + for _, b := range a.toolboxes { + if err := add("toolbox "+b.Name(), b.Tools()); err != nil { + return nil, nil, err + } + } + for _, s := range a.skills { + if b := s.Tools(); b != nil { + if err := add("skill "+s.Name(), b.Tools()); err != nil { + return nil, nil, err + } + } + } + return byName, ordered, nil +} + +// Run executes the loop: send the conversation; while the model requests +// tools, execute them and feed results back; stop on a final answer, +// MaxSteps, or an unrecoverable model error. +func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Result, error) { + var rc runConfig + for _, opt := range opts { + opt(&rc) + } + + byName, ordered, err := a.mergedTools() + if err != nil { + return nil, err + } + + msgs := append([]llm.Message(nil), rc.history...) + if input != "" { + msgs = append(msgs, llm.UserText(input)) + } + if len(msgs) == 0 { + return nil, errors.New("agent: empty input and no history") + } + + result := &Result{} + reqOpts := append(append([]llm.Option(nil), a.reqOpts...), rc.reqOpts...) + system := a.systemPrompt() + + for stepIdx := range a.maxSteps { + req := llm.Request{System: system, Messages: msgs, Tools: ordered} + resp, err := a.model.Generate(ctx, req, reqOpts...) + if err != nil { + result.Messages = msgs + return result, fmt.Errorf("agent: step %d: %w", stepIdx, err) + } + + msgs = append(msgs, resp.Message()) + result.Usage.Add(resp.Usage) + step := Step{Index: stepIdx, Response: resp} + + if len(resp.ToolCalls) == 0 { + // Final answer. + result.Output = resp.Text() + result.Steps = append(result.Steps, step) + result.Messages = msgs + a.notify(rc, step) + return result, nil + } + + results := make([]llm.ToolResult, 0, len(resp.ToolCalls)) + for _, call := range resp.ToolCalls { + if err := ctx.Err(); err != nil { + result.Messages = msgs + return result, err + } + tool, ok := byName[call.Name] + if !ok { + results = append(results, llm.ToolResult{ + ID: call.ID, Name: call.Name, + Content: fmt.Sprintf("unknown tool %q", call.Name), + IsError: true, + }) + continue + } + // ExecuteTool recovers panics and converts errors to IsError + // results — the loop always continues. + results = append(results, llm.ExecuteTool(ctx, tool, call)) + } + + step.Results = results + result.Steps = append(result.Steps, step) + a.notify(rc, step) + msgs = append(msgs, llm.ToolResultsMessage(results...)) + } + + result.Messages = msgs + return result, fmt.Errorf("%w (max %d)", ErrMaxSteps, a.maxSteps) +} + +// notify fans a step out to agent observers and run callbacks; observer +// panics are swallowed (the loop must never die for a UI callback). +func (a *Agent) notify(rc runConfig, step Step) { + emit := func(fn func(Step)) { + defer func() { _ = recover() }() + fn(step) + } + for _, fn := range a.observers { + emit(fn) + } + for _, fn := range rc.onStep { + emit(fn) + } +} diff --git a/agent/agent_test.go b/agent/agent_test.go new file mode 100644 index 0000000..6f57eee --- /dev/null +++ b/agent/agent_test.go @@ -0,0 +1,339 @@ +package agent + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "testing" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" + "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" +) + +func newModel(t *testing.T, fp *fake.Provider) llm.Model { + t.Helper() + m, err := fp.Model("test-model") + if err != nil { + t.Fatalf("Model: %v", err) + } + return m +} + +// toolCallReply scripts an assistant response requesting one tool call. +func toolCallReply(id, name, args string) fake.Step { + return fake.ReplyWith(llm.Response{ + ToolCalls: []llm.ToolCall{{ID: id, Name: name, Arguments: json.RawMessage(args)}}, + FinishReason: llm.FinishToolCalls, + Usage: llm.Usage{InputTokens: 10, OutputTokens: 5}, + }) +} + +func adderToolbox(t *testing.T) *llm.Toolbox { + t.Helper() + return llm.NewToolbox("math", llm.Tool{ + Name: "add", + Description: "Add two integers", + Parameters: json.RawMessage(`{"type":"object","properties":{"a":{"type":"integer"},"b":{"type":"integer"}},"required":["a","b"]}`), + Handler: func(_ context.Context, args json.RawMessage) (any, error) { + var p struct{ A, B int } + if err := json.Unmarshal(args, &p); err != nil { + return nil, err + } + return map[string]int{"sum": p.A + p.B}, nil + }, + }) +} + +func TestRunWithoutTools(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", fake.Reply("direct answer")) + + a := New(newModel(t, fp), "You are terse.") + res, err := a.Run(context.Background(), "question?") + if err != nil { + t.Fatalf("Run: %v", err) + } + if res.Output != "direct answer" { + t.Errorf("output = %q", res.Output) + } + if len(res.Steps) != 1 { + t.Errorf("steps = %d", len(res.Steps)) + } + // The system prompt reached the model. + calls := fp.Calls() + if calls[0].Request.System != "You are terse." { + t.Errorf("system = %q", calls[0].Request.System) + } +} + +func TestRunToolLoop(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", + toolCallReply("c1", "add", `{"a":2,"b":3}`), + fake.Reply("the sum is 5"), + ) + + a := New(newModel(t, fp), "do math", WithToolbox(adderToolbox(t))) + res, err := a.Run(context.Background(), "2+3?") + if err != nil { + t.Fatalf("Run: %v", err) + } + if res.Output != "the sum is 5" { + t.Errorf("output = %q", res.Output) + } + if len(res.Steps) != 2 { + t.Fatalf("steps = %d, want 2", len(res.Steps)) + } + if res.Usage.InputTokens != 11 || res.Usage.OutputTokens != 6 { + t.Errorf("usage = %+v (must sum both steps)", res.Usage) + } + + // The tool executed and its result went back to the model. + step1 := res.Steps[0] + if len(step1.Results) != 1 || step1.Results[0].IsError { + t.Fatalf("step 1 results = %+v", step1.Results) + } + if !strings.Contains(step1.Results[0].Content, `"sum":5`) { + t.Errorf("tool result = %q", step1.Results[0].Content) + } + + // Second model call must carry the tool transcript: user, assistant + // (with the call), tool results. + second := fp.Calls()[1].Request + if len(second.Messages) != 3 { + t.Fatalf("second request messages = %d, want 3", len(second.Messages)) + } + if second.Messages[1].Role != llm.RoleAssistant || len(second.Messages[1].ToolCalls) != 1 { + t.Errorf("assistant turn = %+v", second.Messages[1]) + } + toolMsg := second.Messages[2] + if toolMsg.Role != llm.RoleTool || toolMsg.ToolResults[0].ID != "c1" { + t.Errorf("tool turn = %+v", toolMsg) + } + + // The tools were offered on every step. + for i, c := range fp.Calls() { + if len(c.Request.Tools) != 1 || c.Request.Tools[0].Name != "add" { + t.Errorf("call %d tools = %+v", i, c.Request.Tools) + } + } + + // Result.Messages is the full transcript. + if len(res.Messages) != 4 { + t.Errorf("transcript = %d messages, want 4", len(res.Messages)) + } +} + +func TestRunUnknownToolContinues(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", + toolCallReply("c1", "nonexistent", `{}`), + fake.Reply("recovered"), + ) + + a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t))) + res, err := a.Run(context.Background(), "go") + if err != nil { + t.Fatalf("Run: %v", err) + } + if res.Output != "recovered" { + t.Errorf("output = %q", res.Output) + } + r := res.Steps[0].Results[0] + if !r.IsError || !strings.Contains(r.Content, "nonexistent") { + t.Errorf("unknown-tool result = %+v", r) + } +} + +func TestRunPanickingToolContinues(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", + toolCallReply("c1", "bomb", `{}`), + fake.Reply("survived"), + ) + bomb := llm.NewToolbox("danger", llm.Tool{ + Name: "bomb", + Handler: func(context.Context, json.RawMessage) (any, error) { panic("boom") }, + }) + + a := New(newModel(t, fp), "", WithToolbox(bomb)) + res, err := a.Run(context.Background(), "go") + if err != nil { + t.Fatalf("Run: %v", err) + } + if res.Output != "survived" { + t.Errorf("output = %q", res.Output) + } + r := res.Steps[0].Results[0] + if !r.IsError || !strings.Contains(r.Content, "boom") { + t.Errorf("panic result = %+v", r) + } +} + +func TestRunMaxSteps(t *testing.T) { + fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step { + return toolCallReply("c", "add", `{"a":1,"b":1}`) + })) + + a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithMaxSteps(3)) + res, err := a.Run(context.Background(), "loop forever") + if !errors.Is(err, ErrMaxSteps) { + t.Fatalf("err = %v, want ErrMaxSteps", err) + } + if res == nil || len(res.Steps) != 3 { + t.Fatalf("result = %+v, want 3 recorded steps", res) + } + if len(res.Messages) == 0 { + t.Error("transcript must be preserved on ErrMaxSteps") + } +} + +func TestDuplicateToolNamesFailLoudly(t *testing.T) { + fp := fake.New("fp") + box1 := llm.NewToolbox("a", llm.Tool{Name: "dup"}) + box2 := llm.NewToolbox("b", llm.Tool{Name: "dup"}) + + a := New(newModel(t, fp), "", WithToolbox(box1), WithToolbox(box2)) + _, err := a.Run(context.Background(), "go") + if err == nil || !strings.Contains(err.Error(), `duplicate tool "dup"`) { + t.Errorf("err = %v, want duplicate-tool error", err) + } +} + +func TestRunWithHistory(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", fake.Reply("continued")) + + history := []llm.Message{ + llm.UserText("first question"), + llm.AssistantText("first answer"), + } + a := New(newModel(t, fp), "") + res, err := a.Run(context.Background(), "follow-up", WithHistory(history)) + if err != nil { + t.Fatalf("Run: %v", err) + } + got := fp.Calls()[0].Request.Messages + if len(got) != 3 || got[0].Text() != "first question" || got[2].Text() != "follow-up" { + t.Errorf("messages = %+v", got) + } + if len(res.Messages) != 4 { + t.Errorf("transcript = %d, want history+input+answer", len(res.Messages)) + } +} + +func TestStepObservers(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", + toolCallReply("c1", "add", `{"a":1,"b":2}`), + fake.Reply("3"), + ) + + var agentSteps, runSteps []int + a := New(newModel(t, fp), "", + WithToolbox(adderToolbox(t)), + WithStepObserver(func(s Step) { agentSteps = append(agentSteps, s.Index) }), + ) + _, err := a.Run(context.Background(), "1+2?", + OnStep(func(s Step) { runSteps = append(runSteps, s.Index) }), + ) + if err != nil { + t.Fatalf("Run: %v", err) + } + if fmt.Sprint(agentSteps) != "[0 1]" || fmt.Sprint(runSteps) != "[0 1]" { + t.Errorf("agentSteps=%v runSteps=%v", agentSteps, runSteps) + } +} + +func TestObserverPanicIsSwallowed(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", fake.Reply("fine")) + + a := New(newModel(t, fp), "", WithStepObserver(func(Step) { panic("ui bug") })) + res, err := a.Run(context.Background(), "go") + if err != nil || res.Output != "fine" { + t.Errorf("res=%+v err=%v — observer panic must not kill the run", res, err) + } +} + +func TestSkillComposition(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", fake.Reply("ok")) + + sk := stubSkill{ + name: "haiku", + instructions: "Answer in haiku.", + tools: llm.NewToolbox("haiku-tools", llm.Tool{ + Name: "count_syllables", + Handler: func(context.Context, json.RawMessage) (any, error) { return 5, nil }, + }), + } + a := New(newModel(t, fp), "Base prompt.", WithSkill(sk)) + if _, err := a.Run(context.Background(), "hello"); err != nil { + t.Fatalf("Run: %v", err) + } + + req := fp.Calls()[0].Request + if req.System != "Base prompt.\n\nAnswer in haiku." { + t.Errorf("system = %q", req.System) + } + if len(req.Tools) != 1 || req.Tools[0].Name != "count_syllables" { + t.Errorf("tools = %+v", req.Tools) + } +} + +func TestAddSkillOnDemand(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", fake.Reply("a"), fake.Reply("b")) + + a := New(newModel(t, fp), "Base.") + if _, err := a.Run(context.Background(), "one"); err != nil { + t.Fatalf("Run: %v", err) + } + a.AddSkill(stubSkill{name: "later", instructions: "Later skill."}) + if _, err := a.Run(context.Background(), "two"); err != nil { + t.Fatalf("Run: %v", err) + } + + calls := fp.Calls() + if calls[0].Request.System != "Base." { + t.Errorf("first system = %q", calls[0].Request.System) + } + if calls[1].Request.System != "Base.\n\nLater skill." { + t.Errorf("second system = %q", calls[1].Request.System) + } +} + +func TestRunErrorPreservesTranscript(t *testing.T) { + fp := fake.New("fp") + fp.Enqueue("test-model", fake.Fail(errors.New("model down"))) + + a := New(newModel(t, fp), "") + res, err := a.Run(context.Background(), "go") + if err == nil || !strings.Contains(err.Error(), "model down") { + t.Fatalf("err = %v", err) + } + if res == nil || len(res.Messages) != 1 { + t.Errorf("result = %+v, want transcript with the input", res) + } +} + +func TestEmptyInputNeedsHistory(t *testing.T) { + fp := fake.New("fp") + a := New(newModel(t, fp), "") + if _, err := a.Run(context.Background(), ""); err == nil { + t.Error("empty input with no history must error") + } +} + +type stubSkill struct { + name string + instructions string + tools *llm.Toolbox +} + +func (s stubSkill) Name() string { return s.name } +func (s stubSkill) Instructions() string { return s.instructions } +func (s stubSkill) Tools() *llm.Toolbox { return s.tools } diff --git a/docs/adr/0012-agent-loop.md b/docs/adr/0012-agent-loop.md new file mode 100644 index 0000000..456126d --- /dev/null +++ b/docs/adr/0012-agent-loop.md @@ -0,0 +1,53 @@ +# ADR-0012: Agent run loop + +**Status:** Accepted — 2026-06-10 + +## Context + +Agents are the consumer-facing composition: Model + system prompt + +toolboxes (+ skills), run as a tool-dispatch loop. mort's agents need +multi-step tool use, bounded loops, per-step observation (tracing/usage +recording), conversation continuation, and a loop that survives every tool +failure. + +## Decision + +- `agent.New(model, system, opts...)` with options WithToolbox, WithTools, + WithSkill, WithMaxSteps (default 10), WithRequestOptions (per-step + generation knobs), WithStepObserver. `AddSkill`/`AddToolbox` extend an + agent after construction (configure-then-share concurrency contract). +- `Run(ctx, input, opts...)`: history seeds via WithHistory (a previous + `Result.Messages` round-trips), per-run request options, per-run OnStep + callbacks. The loop: Generate with the merged toolset → no tool calls = + final answer → otherwise execute calls sequentially, append the + assistant turn + one RoleTool message, repeat. +- **Never panics, never stalls on tool failure:** handlers run through + `llm.ExecuteTool` (panic-recovering, errors → IsError results); unknown + tool names come back as IsError results the model can react to; observer + panics are swallowed. Only model errors (already retried/failed-over by + the chain) and ctx cancellation abort the run — and even then the + partial `Result` (transcript, steps, usage) is returned alongside the + error, as it is on ErrMaxSteps. +- **Duplicate tool names across toolboxes/skills fail loudly at Run** — + a silently shadowed tool is a debugging trap. +- Skills compose additively (the `agent.Skill` interface lives in this + package so agent does not import skill): `Instructions()` append to the + system prompt in attachment order; `Tools()` join the merged toolset. +- **Intermediate-step streaming = step observers** (agent-level and + per-run), invoked synchronously after every step with the response and + tool results. Token-level streaming inside the loop was deliberately + deferred: tool-heavy steps end on buffered tool calls anyway, and mort's + observers (trace/usage recorders) want completed steps. `Model.Stream` + remains available for direct streaming outside the loop. +- `Generate[T]` + `llm.SchemaFor[T]` give typed structured output: + reflect-derived schemas (strict-compatible: all properties required, + additionalProperties:false, pointers → nullable anyOf), markdown-fence + stripping as a decode fallback, errors that name the serving model. + +## Consequences + +- An agent over a failover chain inherits retry/backoff/media + normalization transparently — no agent-level error handling for + transient provider trouble. +- Sequential tool execution is deterministic and trace-friendly; parallel + dispatch can be added later behind an option without API change. diff --git a/docs/adr/README.md b/docs/adr/README.md index 1320a97..6c8eaa6 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -15,3 +15,4 @@ One decision per file, append-only; supersede rather than rewrite. | [0009](0009-multimodal-strategy.md) | Multimodal strategy — normalize per target, enforce at provider | Accepted | | [0010](0010-tools-structured-output-mapping.md) | Tools and structured output — canonical shape, native mappings | Accepted | | [0011](0011-google-provider.md) | Google provider on the official Gen AI SDK | Accepted | +| [0012](0012-agent-loop.md) | Agent run loop | Accepted | diff --git a/generate.go b/generate.go new file mode 100644 index 0000000..b3f39fd --- /dev/null +++ b/generate.go @@ -0,0 +1,71 @@ +package majordomo + +import ( + "context" + "encoding/json" + "fmt" + "reflect" + "strings" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// SchemaFor re-exports llm.SchemaFor: a JSON Schema derived from a Go type, +// suitable for WithSchema and tool parameters. +func SchemaFor[T any]() (json.RawMessage, error) { return llm.SchemaFor[T]() } + +// Generate performs a structured-output request and unmarshals the result +// into T: the schema is derived from T (llm.SchemaFor), injected via the +// provider's native structured-output mechanism, and the response text is +// decoded into a T value. +// +// type Verdict struct { +// Guilty bool `json:"guilty"` +// Why string `json:"why" description:"one-sentence rationale"` +// } +// v, err := majordomo.Generate[Verdict](ctx, m, req) +func Generate[T any](ctx context.Context, m Model, req Request, opts ...Option) (T, error) { + var zero T + schema, err := llm.SchemaFor[T]() + if err != nil { + return zero, err + } + + name := "response" + if t := reflect.TypeFor[T](); t.Name() != "" { + name = strings.ToLower(t.Name()) + } + + resp, err := m.Generate(ctx, req, append(opts, llm.WithSchema(schema, name))...) + if err != nil { + return zero, err + } + + text := strings.TrimSpace(resp.Text()) + if text == "" { + return zero, fmt.Errorf("majordomo: structured response from %s is empty (finish: %s)", resp.Model, resp.FinishReason) + } + // Defensive: some models wrap JSON in a markdown fence despite the + // schema constraint. + text = stripFence(text) + + var out T + if err := json.Unmarshal([]byte(text), &out); err != nil { + return zero, fmt.Errorf("majordomo: decode structured response from %s: %w (text: %.200s)", resp.Model, err, text) + } + return out, nil +} + +// stripFence removes a surrounding markdown code fence, if present. +func stripFence(s string) string { + if !strings.HasPrefix(s, "```") { + return s + } + s = strings.TrimPrefix(s, "```") + // Drop an optional language tag on the opening fence line. + if i := strings.IndexByte(s, '\n'); i >= 0 { + s = s[i+1:] + } + s = strings.TrimSuffix(strings.TrimSpace(s), "```") + return strings.TrimSpace(s) +} diff --git a/generate_test.go b/generate_test.go new file mode 100644 index 0000000..2b123ac --- /dev/null +++ b/generate_test.go @@ -0,0 +1,89 @@ +package majordomo + +import ( + "context" + "encoding/json" + "strings" + "testing" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" + "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" +) + +type verdict struct { + Guilty bool `json:"guilty"` + Why string `json:"why"` +} + +func TestGenerateTyped(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("judge", fake.Reply(`{"guilty":true,"why":"caught red-handed"}`)) + + m, _ := r.Parse("fp/judge") + v, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}}) + if err != nil { + t.Fatalf("Generate: %v", err) + } + if !v.Guilty || v.Why != "caught red-handed" { + t.Errorf("verdict = %+v", v) + } + + // The schema derived from the struct reached the provider, named after + // the type. + req := fp.Calls()[0].Request + if req.SchemaName != "verdict" { + t.Errorf("schema name = %q", req.SchemaName) + } + var schema map[string]any + if err := json.Unmarshal(req.Schema, &schema); err != nil { + t.Fatalf("schema: %v", err) + } + props := schema["properties"].(map[string]any) + if _, ok := props["guilty"]; !ok { + t.Errorf("schema = %v", schema) + } +} + +func TestGenerateStripsMarkdownFence(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("judge", fake.Reply("```json\n{\"guilty\":false,\"why\":\"alibi\"}\n```")) + + m, _ := r.Parse("fp/judge") + v, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}}) + if err != nil { + t.Fatalf("Generate: %v", err) + } + if v.Guilty || v.Why != "alibi" { + t.Errorf("verdict = %+v", v) + } +} + +func TestGenerateDecodeErrorNamesModel(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("judge", fake.Reply("not json at all")) + + m, _ := r.Parse("fp/judge") + _, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}}) + if err == nil || !strings.Contains(err.Error(), "fp/judge") { + t.Errorf("err = %v, want decode error naming the model", err) + } +} + +func TestGenerateEmptyResponse(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("judge", fake.ReplyWith(llm.Response{FinishReason: llm.FinishLength})) + + m, _ := r.Parse("fp/judge") + _, err := Generate[verdict](context.Background(), m, Request{Messages: []Message{UserText("verdict?")}}) + if err == nil || !strings.Contains(err.Error(), "empty") { + t.Errorf("err = %v, want empty-response error", err) + } +} diff --git a/llm/jsonschema.go b/llm/jsonschema.go new file mode 100644 index 0000000..6ea2731 --- /dev/null +++ b/llm/jsonschema.go @@ -0,0 +1,166 @@ +package llm + +import ( + "encoding/json" + "fmt" + "reflect" + "slices" + "strings" + "time" +) + +// SchemaFor derives a JSON Schema from Go type T for structured output. +// +// The emitted schema is strict-mode-compatible across providers: every +// object lists all its properties as required and sets +// additionalProperties:false; optionality is expressed by pointer fields, +// which become nullable (anyOf with null) and may simply be returned as +// null by the model. +// +// Field tags: `json:"name"` / `json:"-"` control naming and omission; +// `description:"..."` documents a field for the model; `enum:"a,b,c"` +// constrains a string field to fixed values. +// +// Supported kinds: strings, bools, integer and float kinds, structs +// (nested), slices/arrays, map[string]V, pointers (nullable), time.Time +// (string, date-time), json.RawMessage and interface{} (any). Recursive +// types are rejected — no provider's structured-output mode accepts them. +func SchemaFor[T any]() (json.RawMessage, error) { + t := reflect.TypeFor[T]() + schema, err := schemaOf(t, "", "", nil) + if err != nil { + return nil, fmt.Errorf("llm: SchemaFor[%s]: %w", t, err) + } + out, err := json.Marshal(schema) + if err != nil { + return nil, fmt.Errorf("llm: SchemaFor[%s]: encode: %w", t, err) + } + return out, nil +} + +var ( + timeType = reflect.TypeFor[time.Time]() + rawType = reflect.TypeFor[json.RawMessage]() +) + +// schemaOf builds the schema map for one type. visiting tracks the struct +// types on the current path for cycle detection. +func schemaOf(t reflect.Type, description, enum string, visiting []reflect.Type) (map[string]any, error) { + s := make(map[string]any) + if description != "" { + s["description"] = description + } + + switch { + case t == timeType: + s["type"] = "string" + s["format"] = "date-time" + return s, nil + case t == rawType: + // Any JSON value. + return s, nil + } + + switch t.Kind() { + case reflect.Pointer: + inner, err := schemaOf(t.Elem(), "", enum, visiting) + if err != nil { + return nil, err + } + s["anyOf"] = []any{inner, map[string]any{"type": "null"}} + return s, nil + + case reflect.String: + s["type"] = "string" + if enum != "" { + var vals []any + for v := range strings.SplitSeq(enum, ",") { + vals = append(vals, strings.TrimSpace(v)) + } + s["enum"] = vals + } + return s, nil + + case reflect.Bool: + s["type"] = "boolean" + return s, nil + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + s["type"] = "integer" + return s, nil + + case reflect.Float32, reflect.Float64: + s["type"] = "number" + return s, nil + + case reflect.Slice, reflect.Array: + if t.Elem().Kind() == reflect.Uint8 { + // []byte: base64 text on the wire. + s["type"] = "string" + return s, nil + } + items, err := schemaOf(t.Elem(), "", "", visiting) + if err != nil { + return nil, err + } + s["type"] = "array" + s["items"] = items + return s, nil + + case reflect.Map: + if t.Key().Kind() != reflect.String { + return nil, fmt.Errorf("map key type %s unsupported (only string keys)", t.Key()) + } + values, err := schemaOf(t.Elem(), "", "", visiting) + if err != nil { + return nil, err + } + s["type"] = "object" + s["additionalProperties"] = values + return s, nil + + case reflect.Interface: + // Any JSON value. + return s, nil + + case reflect.Struct: + if slices.Contains(visiting, t) { + return nil, fmt.Errorf("recursive type %s (structured-output schemas cannot recurse)", t) + } + visiting = append(visiting, t) + + properties := make(map[string]any) + required := []string{} + for i := range t.NumField() { + f := t.Field(i) + if !f.IsExported() { + continue + } + name := f.Name + if tag, ok := f.Tag.Lookup("json"); ok { + base, _, _ := strings.Cut(tag, ",") + if base == "-" { + continue + } + if base != "" { + name = base + } + } + fs, err := schemaOf(f.Type, f.Tag.Get("description"), f.Tag.Get("enum"), visiting) + if err != nil { + return nil, fmt.Errorf("field %s: %w", f.Name, err) + } + properties[name] = fs + required = append(required, name) + } + s["type"] = "object" + s["properties"] = properties + s["required"] = required + s["additionalProperties"] = false + return s, nil + + default: + return nil, fmt.Errorf("kind %s unsupported in structured-output schemas", t.Kind()) + } +} diff --git a/llm/jsonschema_test.go b/llm/jsonschema_test.go new file mode 100644 index 0000000..ba8d448 --- /dev/null +++ b/llm/jsonschema_test.go @@ -0,0 +1,140 @@ +package llm + +import ( + "encoding/json" + "strings" + "testing" + "time" +) + +func mustSchema[T any](t *testing.T) map[string]any { + t.Helper() + raw, err := SchemaFor[T]() + if err != nil { + t.Fatalf("SchemaFor: %v", err) + } + var m map[string]any + if err := json.Unmarshal(raw, &m); err != nil { + t.Fatalf("schema is not valid JSON: %v", err) + } + return m +} + +func TestSchemaForBasicStruct(t *testing.T) { + type Person struct { + Name string `json:"name" description:"full name"` + Age int `json:"age"` + Score float64 `json:"score"` + Active bool `json:"active"` + Ignored string `json:"-"` + hidden string //nolint:unused + } + s := mustSchema[Person](t) + if s["type"] != "object" || s["additionalProperties"] != false { + t.Errorf("root = %v", s) + } + props := s["properties"].(map[string]any) + if len(props) != 4 { + t.Errorf("properties = %v", props) + } + if props["name"].(map[string]any)["description"] != "full name" { + t.Errorf("name = %v", props["name"]) + } + if props["age"].(map[string]any)["type"] != "integer" || + props["score"].(map[string]any)["type"] != "number" || + props["active"].(map[string]any)["type"] != "boolean" { + t.Errorf("props = %v", props) + } + req := s["required"].([]any) + if len(req) != 4 { + t.Errorf("required = %v (all fields must be required for strict mode)", req) + } +} + +func TestSchemaForNestedAndCollections(t *testing.T) { + type Inner struct { + V string `json:"v"` + } + type Outer struct { + Items []Inner `json:"items"` + Labels map[string]string `json:"labels"` + Blob []byte `json:"blob"` + When time.Time `json:"when"` + Extra json.RawMessage `json:"extra"` + } + s := mustSchema[Outer](t) + props := s["properties"].(map[string]any) + + items := props["items"].(map[string]any) + if items["type"] != "array" { + t.Errorf("items = %v", items) + } + inner := items["items"].(map[string]any) + if inner["type"] != "object" || inner["additionalProperties"] != false { + t.Errorf("inner = %v", inner) + } + labels := props["labels"].(map[string]any) + if labels["type"] != "object" || labels["additionalProperties"].(map[string]any)["type"] != "string" { + t.Errorf("labels = %v", labels) + } + if props["blob"].(map[string]any)["type"] != "string" { + t.Errorf("blob = %v", props["blob"]) + } + when := props["when"].(map[string]any) + if when["type"] != "string" || when["format"] != "date-time" { + t.Errorf("when = %v", when) + } + if len(props["extra"].(map[string]any)) != 0 { + t.Errorf("extra (RawMessage) should be the any-schema, got %v", props["extra"]) + } +} + +func TestSchemaForPointerIsNullable(t *testing.T) { + type Opt struct { + Note *string `json:"note"` + } + s := mustSchema[Opt](t) + note := s["properties"].(map[string]any)["note"].(map[string]any) + anyOf := note["anyOf"].([]any) + if len(anyOf) != 2 { + t.Fatalf("anyOf = %v", anyOf) + } + if anyOf[0].(map[string]any)["type"] != "string" || anyOf[1].(map[string]any)["type"] != "null" { + t.Errorf("anyOf = %v", anyOf) + } + // Still required (strict-mode style: optional == nullable). + if req := s["required"].([]any); len(req) != 1 { + t.Errorf("required = %v", req) + } +} + +func TestSchemaForEnum(t *testing.T) { + type Choice struct { + Mood string `json:"mood" enum:"happy,sad, ambivalent"` + } + s := mustSchema[Choice](t) + mood := s["properties"].(map[string]any)["mood"].(map[string]any) + enum := mood["enum"].([]any) + if len(enum) != 3 || enum[2] != "ambivalent" { + t.Errorf("enum = %v (values must be trimmed)", enum) + } +} + +func TestSchemaForRecursionRejected(t *testing.T) { + type Node struct { + Children []*Node `json:"children"` + } + _, err := SchemaFor[Node]() + if err == nil || !strings.Contains(err.Error(), "recursive") { + t.Errorf("err = %v, want recursion rejection", err) + } +} + +func TestSchemaForUnsupportedKind(t *testing.T) { + type Bad struct { + Ch chan int `json:"ch"` + } + if _, err := SchemaFor[Bad](); err == nil { + t.Error("chan field must be rejected") + } +} diff --git a/progress.md b/progress.md index 6ddaec1..cbdbd2d 100644 --- a/progress.md +++ b/progress.md @@ -1,5 +1,23 @@ # progress +## 2026-06-10 — Phase 5: agent loop, Generate[T], schema derivation + +**Landed:** `agent/` (ADR-0012): New(model, system, opts) with toolboxes, +max steps (default 10), per-step request options, agent-level observers + +per-run OnStep, WithHistory continuation (Result.Messages round-trips), +sequential tool dispatch through panic-recovering ExecuteTool, unknown +tools → IsError results, duplicate tool names fail loudly, partial Result +preserved on ErrMaxSteps/model errors/cancellation. The agent.Skill +interface ships here (instructions + tools composition is tested with a +stub); the skill package with real implementations is Phase 6. +`llm.SchemaFor[T]` reflect-derived strict-compatible JSON schemas +(pointers→nullable anyOf, description/enum tags, maps/slices/time/RawMessage, +recursion rejected) and root `majordomo.Generate[T]` (schema injection, +fence-stripping decode, model-naming errors). 15 agent tests + schema + +Generate suites, all hermetic. + +**Next:** Phase 6 — skill package + two example skills. + ## 2026-06-10 — Phase 4: Google provider (official genai SDK) **Landed:** `provider/google` on google.golang.org/genai v1.59.0 (ADR-0011):