majordomo/agent/agent.go

// Package agent runs LLM-backed agents: a Model, a system prompt, and one
// or more toolboxes, executed as a tool-dispatch loop until the model
// produces a final answer (or MaxSteps intervenes).
//
// The loop never panics: tool handlers run through the panic-recovering
// executor in llm, unknown tools come back as error results the model can
// react to, and step observers receive every intermediate step. Skills
// (package skill) attach additively: their instructions extend the system
// prompt and their tools extend the merged toolset.
package agent

import (
	"context"
	"errors"
	"fmt"
	"strings"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

// DefaultMaxSteps bounds the tool-dispatch loop when WithMaxSteps is not
// given.
const DefaultMaxSteps = 10

// ErrMaxSteps reports that the loop hit its step budget before the model
// produced a final answer. Run returns it alongside a non-nil *Result
// carrying the transcript so far.
var ErrMaxSteps = errors.New("agent: max steps reached without a final answer")

// Skill is the contract skills satisfy (defined here so agent does not
// depend on the skill package; package skill provides implementations).
// Instructions are appended to the agent's system prompt; Tools (optional,
// may be nil) extend the agent's toolset.
type Skill interface {
	Name() string
	Instructions() string
	Tools() *llm.Toolbox
}

// Step is one completed iteration of the loop: the model's response and,
// when it requested tools, the results that were fed back.
type Step struct {
	// Index is the 0-based step number.
	Index int
	// Response is the model output for this step.
	Response *llm.Response
	// Results are the executed tool outcomes (empty on the final step).
	Results []llm.ToolResult
}

// Result is the outcome of a Run.
type Result struct {
	// Output is the final assistant text.
	Output string
	// Messages is the full transcript: prior history, the input, and every
	// assistant/tool turn. Feed it back via WithHistory to continue the
	// conversation.
	Messages []llm.Message
	// Steps records each loop iteration.
	Steps []Step
	// Usage is the token total across all steps.
	Usage llm.Usage
}

// Agent is a reusable model + system prompt + toolboxes (+ skills)
// composition. Configure at construction; AddSkill/AddToolbox may extend
// it later. Agents are safe to share across goroutines only after
// configuration is complete.
type Agent struct {
	model     llm.Model
	system    string
	toolboxes []*llm.Toolbox
	skills    []Skill
	maxSteps  int
	reqOpts   []llm.Option
	observers []func(Step)
}

// Option configures an Agent at construction.
type Option func(*Agent)

// WithToolbox attaches a toolbox.
func WithToolbox(b *llm.Toolbox) Option {
	return func(a *Agent) { a.toolboxes = append(a.toolboxes, b) }
}

// WithTools attaches loose tools (wrapped in an anonymous toolbox).
func WithTools(tools ...llm.Tool) Option {
	return func(a *Agent) { a.toolboxes = append(a.toolboxes, llm.NewToolbox("", tools...)) }
}

// WithSkill attaches a skill at construction (see also AddSkill).
func WithSkill(s Skill) Option {
	return func(a *Agent) { a.skills = append(a.skills, s) }
}

// WithMaxSteps bounds the tool-dispatch loop.
func WithMaxSteps(n int) Option {
	return func(a *Agent) { a.maxSteps = n }
}

// WithRequestOptions sets default request options (temperature, max
// tokens, ...) applied to every step of every run.
func WithRequestOptions(opts ...llm.Option) Option {
	return func(a *Agent) { a.reqOpts = append(a.reqOpts, opts...) }
}

// WithStepObserver registers a callback invoked after every completed
// step (intermediate-step streaming for UIs, tracing, usage recording).
// Observers run synchronously in Run's goroutine.
func WithStepObserver(fn func(Step)) Option {
	return func(a *Agent) { a.observers = append(a.observers, fn) }
}

// New creates an agent from a model and system prompt.
func New(model llm.Model, system string, opts ...Option) *Agent {
	a := &Agent{model: model, system: system, maxSteps: DefaultMaxSteps}
	for _, opt := range opts {
		opt(a)
	}
	return a
}

// AddSkill attaches a skill to the agent on demand.
func (a *Agent) AddSkill(s Skill) { a.skills = append(a.skills, s) }

// AddToolbox attaches a toolbox to the agent on demand.
func (a *Agent) AddToolbox(b *llm.Toolbox) { a.toolboxes = append(a.toolboxes, b) }

// RunOption configures one Run.
type RunOption func(*runConfig)

type runConfig struct {
	history []llm.Message
	reqOpts []llm.Option
	onStep  []func(Step)
}

// WithHistory seeds the run with prior conversation messages (e.g. a
// previous Result.Messages).
func WithHistory(msgs []llm.Message) RunOption {
	return func(rc *runConfig) { rc.history = msgs }
}

// WithRunRequestOptions adds request options for this run only.
func WithRunRequestOptions(opts ...llm.Option) RunOption {
	return func(rc *runConfig) { rc.reqOpts = append(rc.reqOpts, opts...) }
}

// OnStep registers a per-run step callback (in addition to agent-level
// observers).
func OnStep(fn func(Step)) RunOption {
	return func(rc *runConfig) { rc.onStep = append(rc.onStep, fn) }
}

// systemPrompt composes the agent's system prompt with each skill's
// instructions, in attachment order.
func (a *Agent) systemPrompt() string {
	parts := make([]string, 0, 1+len(a.skills))
	if a.system != "" {
		parts = append(parts, a.system)
	}
	for _, s := range a.skills {
		if ins := strings.TrimSpace(s.Instructions()); ins != "" {
			parts = append(parts, ins)
		}
	}
	return strings.Join(parts, "\n\n")
}

// mergedTools flattens toolboxes plus skill toolboxes into one toolset.
// Duplicate tool names are a configuration error and fail loudly — a
// silently shadowed tool is far harder to debug than this error.
func (a *Agent) mergedTools() (map[string]llm.Tool, []llm.Tool, error) {
	byName := make(map[string]llm.Tool)
	var ordered []llm.Tool

	add := func(origin string, tools []llm.Tool) error {
		for _, t := range tools {
			if _, exists := byName[t.Name]; exists {
				return fmt.Errorf("agent: duplicate tool %q (from %s)", t.Name, origin)
			}
			byName[t.Name] = t
			ordered = append(ordered, t)
		}
		return nil
	}

	for _, b := range a.toolboxes {
		if err := add("toolbox "+b.Name(), b.Tools()); err != nil {
			return nil, nil, err
		}
	}
	for _, s := range a.skills {
		if b := s.Tools(); b != nil {
			if err := add("skill "+s.Name(), b.Tools()); err != nil {
				return nil, nil, err
			}
		}
	}
	return byName, ordered, nil
}

// Run executes the loop: send the conversation; while the model requests
// tools, execute them and feed results back; stop on a final answer,
// MaxSteps, or an unrecoverable model error.
func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Result, error) {
	var rc runConfig
	for _, opt := range opts {
		opt(&rc)
	}

	byName, ordered, err := a.mergedTools()
	if err != nil {
		return nil, err
	}

	msgs := append([]llm.Message(nil), rc.history...)
	if input != "" {
		msgs = append(msgs, llm.UserText(input))
	}
	if len(msgs) == 0 {
		return nil, errors.New("agent: empty input and no history")
	}

	result := &Result{}
	reqOpts := append(append([]llm.Option(nil), a.reqOpts...), rc.reqOpts...)
	system := a.systemPrompt()

	for stepIdx := range a.maxSteps {
		req := llm.Request{System: system, Messages: msgs, Tools: ordered}
		resp, err := a.model.Generate(ctx, req, reqOpts...)
		if err != nil {
			result.Messages = msgs
			return result, fmt.Errorf("agent: step %d: %w", stepIdx, err)
		}

		msgs = append(msgs, resp.Message())
		result.Usage.Add(resp.Usage)
		step := Step{Index: stepIdx, Response: resp}

		if len(resp.ToolCalls) == 0 {
			// Final answer.
			result.Output = resp.Text()
			result.Steps = append(result.Steps, step)
			result.Messages = msgs
			a.notify(rc, step)
			return result, nil
		}

		results := make([]llm.ToolResult, 0, len(resp.ToolCalls))
		for _, call := range resp.ToolCalls {
			if err := ctx.Err(); err != nil {
				result.Messages = msgs
				return result, err
			}
			tool, ok := byName[call.Name]
			if !ok {
				results = append(results, llm.ToolResult{
					ID: call.ID, Name: call.Name,
					Content: fmt.Sprintf("unknown tool %q", call.Name),
					IsError: true,
				})
				continue
			}
			// ExecuteTool recovers panics and converts errors to IsError
			// results — the loop always continues.
			results = append(results, llm.ExecuteTool(ctx, tool, call))
		}

		step.Results = results
		result.Steps = append(result.Steps, step)
		a.notify(rc, step)
		msgs = append(msgs, llm.ToolResultsMessage(results...))
	}

	result.Messages = msgs
	return result, fmt.Errorf("%w (max %d)", ErrMaxSteps, a.maxSteps)
}

// notify fans a step out to agent observers and run callbacks; observer
// panics are swallowed (the loop must never die for a UI callback).
func (a *Agent) notify(rc runConfig, step Step) {
	emit := func(fn func(Step)) {
		defer func() { _ = recover() }()
		fn(step)
	}
	for _, fn := range a.observers {
		emit(fn)
	}
	for _, fn := range rc.onStep {
		emit(fn)
	}
}