feat: conversion-driven extensions — resolvers, DefineTool, hooks, ops controls
CI / Tidy (push) Successful in 9m31s
CI / Build & Test (push) Successful in 10m13s

Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers;
DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired
through anthropic/openai/google; WithPromptCaching (Anthropic
cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer,
WithCompactor, WithToolErrorLimits + ErrToolLoop); health
Bench/Unbench/Snapshot; ChainConfig.Observer failover events.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 13:30:06 +02:00
parent 04b21fdad2
commit 0147a79d18
21 changed files with 767 additions and 29 deletions
+114 -10
View File
@@ -27,6 +27,11 @@ const DefaultMaxSteps = 10
// carrying the transcript so far.
var ErrMaxSteps = errors.New("agent: max steps reached without a final answer")
// ErrToolLoop reports that the loop tripped a tool-error guard
// (consecutive all-error steps or identical repeated calls; see
// WithToolErrorLimits). Run returns it alongside the partial *Result.
var ErrToolLoop = errors.New("agent: tool-error guard tripped")
// Skill is the contract skills satisfy (defined here so agent does not
// depend on the skill package; package skill provides implementations).
// Instructions are appended to the agent's system prompt; Tools (optional,
@@ -67,13 +72,17 @@ type Result struct {
// it later. Agents are safe to share across goroutines only after
// configuration is complete.
type Agent struct {
model llm.Model
system string
toolboxes []*llm.Toolbox
skills []Skill
maxSteps int
reqOpts []llm.Option
observers []func(Step)
model llm.Model
system string
toolboxes []*llm.Toolbox
skills []Skill
maxSteps int
maxStepsFunc func() int
compactor func(ctx context.Context, msgs []llm.Message) ([]llm.Message, error)
maxConsecutiveToolErrors int
maxSameCallRepeats int
reqOpts []llm.Option
observers []func(Step)
}
// Option configures an Agent at construction.
@@ -99,6 +108,34 @@ func WithMaxSteps(n int) Option {
return func(a *Agent) { a.maxSteps = n }
}
// WithMaxStepsFunc makes the step ceiling dynamic: the function is
// consulted before every step, so a supervisor can extend (or shrink) a
// running agent's budget. It overrides WithMaxSteps while non-nil; a
// non-positive return falls back to the static value.
func WithMaxStepsFunc(fn func() int) Option {
return func(a *Agent) { a.maxStepsFunc = fn }
}
// WithCompactor installs a context-compaction hook, called with the full
// message slice before every model call; whatever it returns is sent
// instead (e.g. summarize the middle of a long transcript). A compactor
// error is non-fatal: the original messages are used.
func WithCompactor(fn func(ctx context.Context, msgs []llm.Message) ([]llm.Message, error)) Option {
return func(a *Agent) { a.compactor = fn }
}
// WithToolErrorLimits installs loop guards: maxConsecutiveErrors bounds
// successive steps whose tool results were ALL errors, and
// maxSameCallRepeats bounds identical (name + arguments) tool calls within
// one run. Either guard tripping ends the run with ErrToolLoop and the
// partial result. Zero disables a guard.
func WithToolErrorLimits(maxConsecutiveErrors, maxSameCallRepeats int) Option {
return func(a *Agent) {
a.maxConsecutiveToolErrors = maxConsecutiveErrors
a.maxSameCallRepeats = maxSameCallRepeats
}
}
// WithRequestOptions sets default request options (temperature, max
// tokens, ...) applied to every step of every run.
func WithRequestOptions(opts ...llm.Option) Option {
@@ -134,6 +171,7 @@ type runConfig struct {
history []llm.Message
reqOpts []llm.Option
onStep []func(Step)
steer func() []llm.Message
}
// WithHistory seeds the run with prior conversation messages (e.g. a
@@ -153,6 +191,15 @@ func OnStep(fn func(Step)) RunOption {
return func(rc *runConfig) { rc.onStep = append(rc.onStep, fn) }
}
// WithSteer installs a steering source for this run: the function is
// drained before every step and any returned messages are appended to the
// conversation — the mechanism for a supervisor nudging a running agent
// ("wrap up", "focus on X"). It is called from Run's goroutine; the
// function owns its own synchronization.
func WithSteer(fn func() []llm.Message) RunOption {
return func(rc *runConfig) { rc.steer = fn }
}
// systemPrompt composes the agent's system prompt with each skill's
// instructions, in attachment order.
func (a *Agent) systemPrompt() string {
@@ -227,8 +274,34 @@ func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Resu
reqOpts := append(append([]llm.Option(nil), a.reqOpts...), rc.reqOpts...)
system := a.systemPrompt()
for stepIdx := range a.maxSteps {
req := llm.Request{System: system, Messages: msgs, Tools: ordered}
// Loop-guard state (WithToolErrorLimits).
consecutiveErrorSteps := 0
callCounts := make(map[string]int)
maxSteps := func() int {
if a.maxStepsFunc != nil {
if n := a.maxStepsFunc(); n > 0 {
return n
}
}
return a.maxSteps
}
for stepIdx := 0; stepIdx < maxSteps(); stepIdx++ {
// Steering: drain supervisor nudges into the conversation.
if rc.steer != nil {
msgs = append(msgs, rc.steer()...)
}
sendMsgs := msgs
if a.compactor != nil {
// Compaction failures are non-fatal: send the original.
if compacted, err := a.compactor(ctx, msgs); err == nil && compacted != nil {
sendMsgs = compacted
}
}
req := llm.Request{System: system, Messages: sendMsgs, Tools: ordered}
resp, err := a.model.Generate(ctx, req, reqOpts...)
if err != nil {
result.Messages = msgs
@@ -249,11 +322,19 @@ func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Resu
}
results := make([]llm.ToolResult, 0, len(resp.ToolCalls))
repeatTripped := ""
for _, call := range resp.ToolCalls {
if err := ctx.Err(); err != nil {
result.Messages = msgs
return result, err
}
if a.maxSameCallRepeats > 0 {
sig := call.Name + "\x00" + string(call.Arguments)
callCounts[sig]++
if callCounts[sig] > a.maxSameCallRepeats {
repeatTripped = call.Name
}
}
tool, ok := byName[call.Name]
if !ok {
results = append(results, llm.ToolResult{
@@ -272,10 +353,33 @@ func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Resu
result.Steps = append(result.Steps, step)
a.notify(rc, step)
msgs = append(msgs, llm.ToolResultsMessage(results...))
if repeatTripped != "" {
result.Messages = msgs
return result, fmt.Errorf("%w: %q called identically more than %d times",
ErrToolLoop, repeatTripped, a.maxSameCallRepeats)
}
allErrors := len(results) > 0
for _, r := range results {
if !r.IsError {
allErrors = false
break
}
}
if allErrors {
consecutiveErrorSteps++
if a.maxConsecutiveToolErrors > 0 && consecutiveErrorSteps >= a.maxConsecutiveToolErrors {
result.Messages = msgs
return result, fmt.Errorf("%w: %d consecutive steps with only failing tool calls",
ErrToolLoop, consecutiveErrorSteps)
}
} else {
consecutiveErrorSteps = 0
}
}
result.Messages = msgs
return result, fmt.Errorf("%w (max %d)", ErrMaxSteps, a.maxSteps)
return result, fmt.Errorf("%w (max %d)", ErrMaxSteps, maxSteps())
}
// notify fans a step out to agent observers and run callbacks; observer
+175
View File
@@ -0,0 +1,175 @@
package agent
import (
"context"
"encoding/json"
"errors"
"strings"
"sync/atomic"
"testing"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
)
// TestMaxStepsFuncExtendsBudget: a supervisor raising the ceiling mid-run
// lets the loop continue past the static budget.
func TestMaxStepsFuncExtendsBudget(t *testing.T) {
fp := fake.New("fp")
fp.Enqueue("test-model",
toolCallReply("c1", "add", `{"a":1,"b":1}`),
toolCallReply("c2", "add", `{"a":2,"b":2}`),
toolCallReply("c3", "add", `{"a":3,"b":3}`),
fake.Reply("done"),
)
var ceiling atomic.Int64
ceiling.Store(2)
a := New(newModel(t, fp), "",
WithToolbox(adderToolbox(t)),
WithMaxSteps(2),
WithMaxStepsFunc(func() int { return int(ceiling.Load()) }),
WithStepObserver(func(s Step) {
if s.Index == 1 {
ceiling.Store(10) // the "critic" extends the budget
}
}),
)
res, err := a.Run(context.Background(), "go")
if err != nil {
t.Fatalf("Run: %v (budget should have been extended)", err)
}
if res.Output != "done" || len(res.Steps) != 4 {
t.Errorf("output=%q steps=%d", res.Output, len(res.Steps))
}
}
// TestSteerInjectsMessages: steering messages appear in the conversation
// before the next model call.
func TestSteerInjectsMessages(t *testing.T) {
fp := fake.New("fp")
fp.Enqueue("test-model",
toolCallReply("c1", "add", `{"a":1,"b":1}`),
fake.Reply("ok"),
)
var pending []llm.Message
pending = append(pending, llm.UserText("SUPERVISOR: wrap it up"))
a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)))
_, err := a.Run(context.Background(), "go", WithSteer(func() []llm.Message {
out := pending
pending = nil
return out
}))
if err != nil {
t.Fatalf("Run: %v", err)
}
first := fp.Calls()[0].Request.Messages
if len(first) != 2 || !strings.Contains(first[1].Text(), "SUPERVISOR") {
t.Errorf("first call messages = %+v, want steered message", first)
}
// Drained: second call must not duplicate it.
second := fp.Calls()[1].Request.Messages
count := 0
for _, m := range second {
if strings.Contains(m.Text(), "SUPERVISOR") {
count++
}
}
if count != 1 {
t.Errorf("steer message appears %d times in second call, want 1", count)
}
}
// TestCompactorShrinksOutboundContext: the model sees the compacted view;
// the canonical transcript keeps everything.
func TestCompactorShrinksOutboundContext(t *testing.T) {
fp := fake.New("fp")
fp.Enqueue("test-model", fake.Reply("answer"))
history := []llm.Message{
llm.UserText("old 1"), llm.AssistantText("old reply 1"),
llm.UserText("old 2"), llm.AssistantText("old reply 2"),
}
a := New(newModel(t, fp), "", WithCompactor(func(_ context.Context, msgs []llm.Message) ([]llm.Message, error) {
// Keep only the last message, prefixed by a synthetic summary.
return append([]llm.Message{llm.UserText("[summary of earlier conversation]")}, msgs[len(msgs)-1]), nil
}))
res, err := a.Run(context.Background(), "new question", WithHistory(history))
if err != nil {
t.Fatalf("Run: %v", err)
}
sent := fp.Calls()[0].Request.Messages
if len(sent) != 2 || !strings.Contains(sent[0].Text(), "summary") {
t.Errorf("sent = %+v, want compacted view", sent)
}
if len(res.Messages) != 6 {
t.Errorf("transcript = %d messages, want full uncompacted history", len(res.Messages))
}
}
// TestCompactorErrorIsNonFatal: a failing compactor falls back to the
// original messages.
func TestCompactorErrorIsNonFatal(t *testing.T) {
fp := fake.New("fp")
fp.Enqueue("test-model", fake.Reply("fine"))
a := New(newModel(t, fp), "", WithCompactor(func(context.Context, []llm.Message) ([]llm.Message, error) {
return nil, errors.New("summarizer down")
}))
res, err := a.Run(context.Background(), "go")
if err != nil || res.Output != "fine" {
t.Errorf("res=%v err=%v", res, err)
}
if len(fp.Calls()[0].Request.Messages) != 1 {
t.Error("original messages must be sent when compaction fails")
}
}
// TestConsecutiveToolErrorGuard: steps whose tools ALL fail trip the guard.
func TestConsecutiveToolErrorGuard(t *testing.T) {
fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step {
return toolCallReply("c", "bomb", `{}`)
}))
bomb := llm.NewToolbox("danger", llm.Tool{
Name: "bomb",
Handler: func(context.Context, json.RawMessage) (any, error) { return nil, errors.New("always fails") },
})
a := New(newModel(t, fp), "", WithToolbox(bomb), WithToolErrorLimits(2, 0), WithMaxSteps(10))
res, err := a.Run(context.Background(), "go")
if !errors.Is(err, ErrToolLoop) {
t.Fatalf("err = %v, want ErrToolLoop", err)
}
if len(res.Steps) != 2 {
t.Errorf("steps = %d, want guard to trip after 2", len(res.Steps))
}
}
// TestSameCallRepeatGuard: identical (name+args) calls beyond the limit
// trip the guard; varied calls do not.
func TestSameCallRepeatGuard(t *testing.T) {
fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step {
return toolCallReply("c", "add", `{"a":1,"b":1}`)
}))
a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithToolErrorLimits(0, 3), WithMaxSteps(10))
_, err := a.Run(context.Background(), "go")
if !errors.Is(err, ErrToolLoop) || !strings.Contains(err.Error(), `"add"`) {
t.Fatalf("err = %v, want repeat-guard ErrToolLoop naming add", err)
}
// Varied arguments never trip it.
n := 0
fp2 := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step {
n++
if n > 4 {
return fake.Reply("done")
}
return toolCallReply("c", "add", `{"a":1,"b":`+string(rune('0'+n))+`}`)
}))
a2 := New(newModel(t, fp2), "", WithToolbox(adderToolbox(t)), WithToolErrorLimits(0, 3), WithMaxSteps(10))
if _, err := a2.Run(context.Background(), "go"); err != nil {
t.Errorf("varied calls must not trip the guard: %v", err)
}
}