1fd7109a42
The agent loop took the final answer only from the terminal (no-tool-call)
turn. Models that "front-load" their answer into an earlier turn that also
calls a tool — then close with a trivial pointer like "(Already answered
above.)" — had their real answer discarded and the pointer delivered. This
recurs across several open-weight models (glm-5.2, etc.); well-behaved models
(Claude/GPT) defer their answer to the terminal turn and are unaffected.
finalOutput() now falls back to the last substantive assistant content in the
transcript when the terminal text is weak (empty, or a short back-reference).
The predicate is narrow and back-reference-gated so short-but-correct answers
("42", "It's down, restarting now.") are never overridden; recovery only picks
a prior turn that reads like a real answer, not a preamble. Zero extra model
calls. Terminal-answer behavior for normal runs is unchanged.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
401 lines
13 KiB
Go
401 lines
13 KiB
Go
// Package agent runs LLM-backed agents: a Model, a system prompt, and one
|
|
// or more toolboxes, executed as a tool-dispatch loop until the model
|
|
// produces a final answer (or MaxSteps intervenes).
|
|
//
|
|
// The loop never panics: tool handlers run through the panic-recovering
|
|
// executor in llm, unknown tools come back as error results the model can
|
|
// react to, and step observers receive every intermediate step. Skills
|
|
// (package skill) attach additively: their instructions extend the system
|
|
// prompt and their tools extend the merged toolset.
|
|
package agent
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// DefaultMaxSteps bounds the tool-dispatch loop when WithMaxSteps is not
|
|
// given.
|
|
const DefaultMaxSteps = 10
|
|
|
|
// ErrMaxSteps reports that the loop hit its step budget before the model
|
|
// produced a final answer. Run returns it alongside a non-nil *Result
|
|
// carrying the transcript so far.
|
|
var ErrMaxSteps = errors.New("agent: max steps reached without a final answer")
|
|
|
|
// ErrToolLoop reports that the loop tripped a tool-error guard
|
|
// (consecutive all-error steps or identical repeated calls; see
|
|
// WithToolErrorLimits). Run returns it alongside the partial *Result.
|
|
var ErrToolLoop = errors.New("agent: tool-error guard tripped")
|
|
|
|
// Skill is the contract skills satisfy (defined here so agent does not
|
|
// depend on the skill package; package skill provides implementations).
|
|
// Instructions are appended to the agent's system prompt; Tools (optional,
|
|
// may be nil) extend the agent's toolset.
|
|
type Skill interface {
|
|
Name() string
|
|
Instructions() string
|
|
Tools() *llm.Toolbox
|
|
}
|
|
|
|
// Step is one completed iteration of the loop: the model's response and,
|
|
// when it requested tools, the results that were fed back.
|
|
type Step struct {
|
|
// Index is the 0-based step number.
|
|
Index int
|
|
// Response is the model output for this step.
|
|
Response *llm.Response
|
|
// Results are the executed tool outcomes (empty on the final step).
|
|
Results []llm.ToolResult
|
|
}
|
|
|
|
// Result is the outcome of a Run.
|
|
type Result struct {
|
|
// Output is the final assistant text.
|
|
Output string
|
|
// Messages is the full transcript: prior history, the input, and every
|
|
// assistant/tool turn. Feed it back via WithHistory to continue the
|
|
// conversation.
|
|
Messages []llm.Message
|
|
// Steps records each loop iteration.
|
|
Steps []Step
|
|
// Usage is the token total across all steps.
|
|
Usage llm.Usage
|
|
}
|
|
|
|
// Agent is a reusable model + system prompt + toolboxes (+ skills)
|
|
// composition. Configure at construction; AddSkill/AddToolbox may extend
|
|
// it later. Agents are safe to share across goroutines only after
|
|
// configuration is complete.
|
|
type Agent struct {
|
|
model llm.Model
|
|
system string
|
|
toolboxes []*llm.Toolbox
|
|
skills []Skill
|
|
maxSteps int
|
|
maxStepsFunc func() int
|
|
compactor func(ctx context.Context, msgs []llm.Message) ([]llm.Message, error)
|
|
maxConsecutiveToolErrors int
|
|
maxSameCallRepeats int
|
|
reqOpts []llm.Option
|
|
observers []func(Step)
|
|
}
|
|
|
|
// Option configures an Agent at construction.
|
|
type Option func(*Agent)
|
|
|
|
// WithToolbox attaches a toolbox.
|
|
func WithToolbox(b *llm.Toolbox) Option {
|
|
return func(a *Agent) { a.toolboxes = append(a.toolboxes, b) }
|
|
}
|
|
|
|
// WithTools attaches loose tools (wrapped in an anonymous toolbox).
|
|
func WithTools(tools ...llm.Tool) Option {
|
|
return func(a *Agent) { a.toolboxes = append(a.toolboxes, llm.NewToolbox("", tools...)) }
|
|
}
|
|
|
|
// WithSkill attaches a skill at construction (see also AddSkill).
|
|
func WithSkill(s Skill) Option {
|
|
return func(a *Agent) { a.skills = append(a.skills, s) }
|
|
}
|
|
|
|
// WithMaxSteps bounds the tool-dispatch loop.
|
|
func WithMaxSteps(n int) Option {
|
|
return func(a *Agent) { a.maxSteps = n }
|
|
}
|
|
|
|
// WithMaxStepsFunc makes the step ceiling dynamic: the function is
|
|
// consulted before every step, so a supervisor can extend (or shrink) a
|
|
// running agent's budget. It overrides WithMaxSteps while non-nil; a
|
|
// non-positive return falls back to the static value.
|
|
func WithMaxStepsFunc(fn func() int) Option {
|
|
return func(a *Agent) { a.maxStepsFunc = fn }
|
|
}
|
|
|
|
// WithCompactor installs a context-compaction hook, called with the full
|
|
// message slice before every model call; whatever it returns is sent
|
|
// instead (e.g. summarize the middle of a long transcript). A compactor
|
|
// error is non-fatal: the original messages are used.
|
|
func WithCompactor(fn func(ctx context.Context, msgs []llm.Message) ([]llm.Message, error)) Option {
|
|
return func(a *Agent) { a.compactor = fn }
|
|
}
|
|
|
|
// WithToolErrorLimits installs loop guards: maxConsecutiveErrors bounds
|
|
// successive steps whose tool results were ALL errors, and
|
|
// maxSameCallRepeats bounds identical (name + arguments) tool calls within
|
|
// one run. Either guard tripping ends the run with ErrToolLoop and the
|
|
// partial result. Zero disables a guard.
|
|
func WithToolErrorLimits(maxConsecutiveErrors, maxSameCallRepeats int) Option {
|
|
return func(a *Agent) {
|
|
a.maxConsecutiveToolErrors = maxConsecutiveErrors
|
|
a.maxSameCallRepeats = maxSameCallRepeats
|
|
}
|
|
}
|
|
|
|
// WithRequestOptions sets default request options (temperature, max
|
|
// tokens, ...) applied to every step of every run.
|
|
func WithRequestOptions(opts ...llm.Option) Option {
|
|
return func(a *Agent) { a.reqOpts = append(a.reqOpts, opts...) }
|
|
}
|
|
|
|
// WithStepObserver registers a callback invoked after every completed
|
|
// step (intermediate-step streaming for UIs, tracing, usage recording).
|
|
// Observers run synchronously in Run's goroutine.
|
|
func WithStepObserver(fn func(Step)) Option {
|
|
return func(a *Agent) { a.observers = append(a.observers, fn) }
|
|
}
|
|
|
|
// New creates an agent from a model and system prompt.
|
|
func New(model llm.Model, system string, opts ...Option) *Agent {
|
|
a := &Agent{model: model, system: system, maxSteps: DefaultMaxSteps}
|
|
for _, opt := range opts {
|
|
opt(a)
|
|
}
|
|
return a
|
|
}
|
|
|
|
// AddSkill attaches a skill to the agent on demand.
|
|
func (a *Agent) AddSkill(s Skill) { a.skills = append(a.skills, s) }
|
|
|
|
// AddToolbox attaches a toolbox to the agent on demand.
|
|
func (a *Agent) AddToolbox(b *llm.Toolbox) { a.toolboxes = append(a.toolboxes, b) }
|
|
|
|
// RunOption configures one Run.
|
|
type RunOption func(*runConfig)
|
|
|
|
type runConfig struct {
|
|
history []llm.Message
|
|
reqOpts []llm.Option
|
|
onStep []func(Step)
|
|
steer func() []llm.Message
|
|
}
|
|
|
|
// WithHistory seeds the run with prior conversation messages (e.g. a
|
|
// previous Result.Messages).
|
|
func WithHistory(msgs []llm.Message) RunOption {
|
|
return func(rc *runConfig) { rc.history = msgs }
|
|
}
|
|
|
|
// WithRunRequestOptions adds request options for this run only.
|
|
func WithRunRequestOptions(opts ...llm.Option) RunOption {
|
|
return func(rc *runConfig) { rc.reqOpts = append(rc.reqOpts, opts...) }
|
|
}
|
|
|
|
// OnStep registers a per-run step callback (in addition to agent-level
|
|
// observers).
|
|
func OnStep(fn func(Step)) RunOption {
|
|
return func(rc *runConfig) { rc.onStep = append(rc.onStep, fn) }
|
|
}
|
|
|
|
// WithSteer installs a steering source for this run: the function is
|
|
// drained before every step and any returned messages are appended to the
|
|
// conversation — the mechanism for a supervisor nudging a running agent
|
|
// ("wrap up", "focus on X"). It is called from Run's goroutine; the
|
|
// function owns its own synchronization.
|
|
func WithSteer(fn func() []llm.Message) RunOption {
|
|
return func(rc *runConfig) { rc.steer = fn }
|
|
}
|
|
|
|
// systemPrompt composes the agent's system prompt with each skill's
|
|
// instructions, in attachment order.
|
|
func (a *Agent) systemPrompt() string {
|
|
parts := make([]string, 0, 1+len(a.skills))
|
|
if a.system != "" {
|
|
parts = append(parts, a.system)
|
|
}
|
|
for _, s := range a.skills {
|
|
if ins := strings.TrimSpace(s.Instructions()); ins != "" {
|
|
parts = append(parts, ins)
|
|
}
|
|
}
|
|
return strings.Join(parts, "\n\n")
|
|
}
|
|
|
|
// mergedTools flattens toolboxes plus skill toolboxes into one toolset.
|
|
// Duplicate tool names are a configuration error and fail loudly — a
|
|
// silently shadowed tool is far harder to debug than this error.
|
|
func (a *Agent) mergedTools() (map[string]llm.Tool, []llm.Tool, error) {
|
|
byName := make(map[string]llm.Tool)
|
|
var ordered []llm.Tool
|
|
|
|
add := func(origin string, tools []llm.Tool) error {
|
|
for _, t := range tools {
|
|
if _, exists := byName[t.Name]; exists {
|
|
return fmt.Errorf("agent: duplicate tool %q (from %s)", t.Name, origin)
|
|
}
|
|
byName[t.Name] = t
|
|
ordered = append(ordered, t)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
for _, b := range a.toolboxes {
|
|
if err := add("toolbox "+b.Name(), b.Tools()); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
for _, s := range a.skills {
|
|
if b := s.Tools(); b != nil {
|
|
if err := add("skill "+s.Name(), b.Tools()); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
}
|
|
return byName, ordered, nil
|
|
}
|
|
|
|
// Run executes the loop: send the conversation; while the model requests
|
|
// tools, execute them and feed results back; stop on a final answer,
|
|
// MaxSteps, or an unrecoverable model error.
|
|
func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Result, error) {
|
|
var rc runConfig
|
|
for _, opt := range opts {
|
|
opt(&rc)
|
|
}
|
|
|
|
byName, ordered, err := a.mergedTools()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
msgs := append([]llm.Message(nil), rc.history...)
|
|
if input != "" {
|
|
msgs = append(msgs, llm.UserText(input))
|
|
}
|
|
if len(msgs) == 0 {
|
|
return nil, errors.New("agent: empty input and no history")
|
|
}
|
|
|
|
result := &Result{}
|
|
reqOpts := append(append([]llm.Option(nil), a.reqOpts...), rc.reqOpts...)
|
|
system := a.systemPrompt()
|
|
|
|
// Loop-guard state (WithToolErrorLimits).
|
|
consecutiveErrorSteps := 0
|
|
callCounts := make(map[string]int)
|
|
|
|
maxSteps := func() int {
|
|
if a.maxStepsFunc != nil {
|
|
if n := a.maxStepsFunc(); n > 0 {
|
|
return n
|
|
}
|
|
}
|
|
return a.maxSteps
|
|
}
|
|
|
|
for stepIdx := 0; stepIdx < maxSteps(); stepIdx++ {
|
|
// Steering: drain supervisor nudges into the conversation.
|
|
if rc.steer != nil {
|
|
msgs = append(msgs, rc.steer()...)
|
|
}
|
|
|
|
sendMsgs := msgs
|
|
if a.compactor != nil {
|
|
// Compaction failures are non-fatal: send the original.
|
|
if compacted, err := a.compactor(ctx, msgs); err == nil && compacted != nil {
|
|
sendMsgs = compacted
|
|
}
|
|
}
|
|
|
|
req := llm.Request{System: system, Messages: sendMsgs, Tools: ordered}
|
|
resp, err := a.model.Generate(ctx, req, reqOpts...)
|
|
if err != nil {
|
|
result.Messages = msgs
|
|
return result, fmt.Errorf("agent: step %d: %w", stepIdx, err)
|
|
}
|
|
|
|
msgs = append(msgs, resp.Message())
|
|
result.Usage.Add(resp.Usage)
|
|
step := Step{Index: stepIdx, Response: resp}
|
|
|
|
if len(resp.ToolCalls) == 0 {
|
|
// Final answer. Usually this terminal turn's text; but if the model
|
|
// front-loaded its answer into an earlier tool-call turn and closed
|
|
// with a trivial pointer, recover that earlier content instead.
|
|
result.Output = finalOutput(msgs, resp.Text())
|
|
result.Steps = append(result.Steps, step)
|
|
result.Messages = msgs
|
|
a.notify(rc, step)
|
|
return result, nil
|
|
}
|
|
|
|
results := make([]llm.ToolResult, 0, len(resp.ToolCalls))
|
|
repeatTripped := ""
|
|
for _, call := range resp.ToolCalls {
|
|
if err := ctx.Err(); err != nil {
|
|
result.Messages = msgs
|
|
return result, err
|
|
}
|
|
if a.maxSameCallRepeats > 0 {
|
|
sig := call.Name + "\x00" + string(call.Arguments)
|
|
callCounts[sig]++
|
|
if callCounts[sig] > a.maxSameCallRepeats {
|
|
repeatTripped = call.Name
|
|
}
|
|
}
|
|
tool, ok := byName[call.Name]
|
|
if !ok {
|
|
results = append(results, llm.ToolResult{
|
|
ID: call.ID, Name: call.Name,
|
|
Content: fmt.Sprintf("unknown tool %q", call.Name),
|
|
IsError: true,
|
|
})
|
|
continue
|
|
}
|
|
// ExecuteTool recovers panics and converts errors to IsError
|
|
// results — the loop always continues.
|
|
results = append(results, llm.ExecuteTool(ctx, tool, call))
|
|
}
|
|
|
|
step.Results = results
|
|
result.Steps = append(result.Steps, step)
|
|
a.notify(rc, step)
|
|
msgs = append(msgs, llm.ToolResultsMessage(results...))
|
|
|
|
if repeatTripped != "" {
|
|
result.Messages = msgs
|
|
return result, fmt.Errorf("%w: %q called identically more than %d times",
|
|
ErrToolLoop, repeatTripped, a.maxSameCallRepeats)
|
|
}
|
|
allErrors := len(results) > 0
|
|
for _, r := range results {
|
|
if !r.IsError {
|
|
allErrors = false
|
|
break
|
|
}
|
|
}
|
|
if allErrors {
|
|
consecutiveErrorSteps++
|
|
if a.maxConsecutiveToolErrors > 0 && consecutiveErrorSteps >= a.maxConsecutiveToolErrors {
|
|
result.Messages = msgs
|
|
return result, fmt.Errorf("%w: %d consecutive steps with only failing tool calls",
|
|
ErrToolLoop, consecutiveErrorSteps)
|
|
}
|
|
} else {
|
|
consecutiveErrorSteps = 0
|
|
}
|
|
}
|
|
|
|
result.Messages = msgs
|
|
return result, fmt.Errorf("%w (max %d)", ErrMaxSteps, maxSteps())
|
|
}
|
|
|
|
// notify fans a step out to agent observers and run callbacks; observer
|
|
// panics are swallowed (the loop must never die for a UI callback).
|
|
func (a *Agent) notify(rc runConfig, step Step) {
|
|
emit := func(fn func(Step)) {
|
|
defer func() { _ = recover() }()
|
|
fn(step)
|
|
}
|
|
for _, fn := range a.observers {
|
|
emit(fn)
|
|
}
|
|
for _, fn := range rc.onStep {
|
|
emit(fn)
|
|
}
|
|
}
|