feat: foundations — canonical types, Parse grammar, env DSNs, health, chains

Phase 1 of the majordomo build:
- llm/ canonical contract (messages, parts, tools, capabilities, streaming,
  Model/Provider, error classification)
- health/ clock-injected tracker (threshold bench, exponential capped
  cooldown, reset-on-success)
- root Registry + Parse (verbatim model ids, inline recursive alias
  expansion with cycle detection, chain dedup), LLM_* env-DSN providers
  (go-llm parity: lazy fallback + eager LoadEnv), health-aware chain
  executor behind the Model interface
- provider/fake scriptable test provider; hermetic test suite incl. the
  trailing-thinking chain and foreman:// env loading
- ADRs 0001-0008, CLAUDE.md, README (honest matrix), CI workflow,
  docs/phase-1-design.md

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 12:35:23 +02:00
parent 3025044817
commit dcd004289f
42 changed files with 3863 additions and 0 deletions
+45
View File
@@ -0,0 +1,45 @@
package llm
import "slices"
// Capabilities declares what a model (or provider) supports and the limits
// it imposes. Providers declare defaults; individual models may override.
// The media pipeline normalizes image inputs against these values before a
// request is serialized.
//
// Zero-value semantics:
// - MaxImagesPerReq == 0 means image input is NOT supported.
// - MaxImageBytes / MaxImageDimension / ContextWindow == 0 mean
// "no declared limit", not zero.
// - AllowedImageMIME empty means any MIME type is acceptable
// (only meaningful when images are supported at all).
type Capabilities struct {
// MaxImageBytes is the largest single image payload, in bytes.
MaxImageBytes int
// MaxImageDimension is the largest allowed width or height, in pixels.
MaxImageDimension int
// AllowedImageMIME lists acceptable image content types
// (e.g. "image/jpeg", "image/png").
AllowedImageMIME []string
// MaxImagesPerReq is the most images one request may carry; 0 = images
// unsupported.
MaxImagesPerReq int
SupportsTools bool
SupportsStructured bool
SupportsStreaming bool
// ContextWindow is the model's context size in tokens, when known.
ContextWindow int
}
// SupportsImages reports whether the target accepts image input.
func (c Capabilities) SupportsImages() bool { return c.MaxImagesPerReq > 0 }
// MIMEAllowed reports whether the given image MIME type is acceptable.
func (c Capabilities) MIMEAllowed(mime string) bool {
if len(c.AllowedImageMIME) == 0 {
return true
}
return slices.Contains(c.AllowedImageMIME, mime)
}
+39
View File
@@ -0,0 +1,39 @@
package llm
// Part is one piece of message content: text, an image, or future media
// kinds. The set of implementations is closed (sealed by the unexported
// method) so providers can switch exhaustively over content kinds.
//
// Why: providers need a finite, known content vocabulary to serialize into
// their wire formats; an open interface would silently drop unknown content.
type Part interface {
isPart()
}
// TextPart is plain text content.
type TextPart struct {
Text string
}
func (TextPart) isPart() {}
// ImagePart is image content carried as raw bytes plus a MIME type.
//
// Why bytes-only (no URL form): the media pipeline must be able to inspect,
// downscale, and re-encode every image to fit the target's capabilities, and
// that requires the bytes. Callers with a URL fetch it themselves; majordomo
// does not download remote content on a caller's behalf.
type ImagePart struct {
// MIME is the image content type, e.g. "image/png" or "image/jpeg".
MIME string
// Data is the raw, unencoded image bytes (providers base64 as needed).
Data []byte
}
func (ImagePart) isPart() {}
// Text constructs a text content part.
func Text(s string) Part { return TextPart{Text: s} }
// Image constructs an image content part from raw bytes.
func Image(mime string, data []byte) Part { return ImagePart{MIME: mime, Data: data} }
+119
View File
@@ -0,0 +1,119 @@
package llm
import (
"context"
"errors"
"fmt"
"net"
"net/http"
"strings"
"syscall"
)
// ErrorClass buckets errors for retry/failover decisions.
type ErrorClass int
const (
// ClassTransient errors may succeed on retry or on another target:
// rate limits, server errors, timeouts, connection failures.
ClassTransient ErrorClass = iota
// ClassPermanent errors will not improve on retry of the same request:
// malformed requests, auth failures, model-not-found.
ClassPermanent
)
// ErrModelNotFound marks a permanent "this target does not know this model"
// condition. Chains advance past it without penalizing the target's health.
var ErrModelNotFound = errors.New("model not found")
// APIError is a structured provider error carrying enough context to
// classify it and to debug it.
type APIError struct {
// Provider and Model identify the target that failed.
Provider string
Model string
// Status is the HTTP status code, or 0 when the failure was not an HTTP
// response (connection error, decode error, ...).
Status int
// Code is the provider-specific error code, when one was supplied.
Code string
// Message is the provider's human-readable error message.
Message string
// Err is the wrapped underlying cause, if any.
Err error
}
func (e *APIError) Error() string {
var b strings.Builder
fmt.Fprintf(&b, "%s/%s", e.Provider, e.Model)
if e.Status != 0 {
fmt.Fprintf(&b, ": HTTP %d", e.Status)
}
if e.Code != "" {
fmt.Fprintf(&b, " [%s]", e.Code)
}
if e.Message != "" {
fmt.Fprintf(&b, ": %s", e.Message)
}
if e.Err != nil {
fmt.Fprintf(&b, ": %v", e.Err)
}
return b.String()
}
func (e *APIError) Unwrap() error {
if e.Err != nil {
return e.Err
}
if e.Status == http.StatusNotFound {
return ErrModelNotFound
}
return nil
}
// Classify buckets an error as transient or permanent.
//
// The default policy (overridable via health configuration):
// - context.Canceled is permanent — the caller gave up; retrying defies
// their intent. context.DeadlineExceeded is transient.
// - Network timeouts, refused/reset connections, and DNS failures are
// transient ("high demand" conditions).
// - HTTP 400/401/403/404/405/422 (and ErrModelNotFound) are permanent;
// 408/429 and all 5xx are transient.
// - Anything unrecognized is transient: when in doubt, failing over to the
// next target in a chain can only help availability.
func Classify(err error) ErrorClass {
if err == nil {
return ClassTransient
}
if errors.Is(err, context.Canceled) {
return ClassPermanent
}
if errors.Is(err, context.DeadlineExceeded) {
return ClassTransient
}
if errors.Is(err, ErrModelNotFound) {
return ClassPermanent
}
if errors.Is(err, syscall.ECONNREFUSED) || errors.Is(err, syscall.ECONNRESET) {
return ClassTransient
}
if _, ok := errors.AsType[net.Error](err); ok {
return ClassTransient
}
if apiErr, ok := errors.AsType[*APIError](err); ok && apiErr.Status != 0 {
switch {
case apiErr.Status == http.StatusRequestTimeout, // 408
apiErr.Status == http.StatusTooManyRequests, // 429
apiErr.Status >= 500:
return ClassTransient
case apiErr.Status >= 400:
return ClassPermanent
}
}
return ClassTransient
}
+84
View File
@@ -0,0 +1,84 @@
package llm
import (
"context"
"errors"
"fmt"
"net"
"strings"
"syscall"
"testing"
)
type fakeNetErr struct{ timeout bool }
func (e fakeNetErr) Error() string { return "fake net error" }
func (e fakeNetErr) Timeout() bool { return e.timeout }
func (e fakeNetErr) Temporary() bool { return true }
var _ net.Error = fakeNetErr{}
func TestClassify(t *testing.T) {
tests := []struct {
name string
err error
want ErrorClass
}{
{"canceled is permanent", context.Canceled, ClassPermanent},
{"deadline is transient", context.DeadlineExceeded, ClassTransient},
{"wrapped canceled", fmt.Errorf("call: %w", context.Canceled), ClassPermanent},
{"model not found", fmt.Errorf("x: %w", ErrModelNotFound), ClassPermanent},
{"conn refused", syscall.ECONNREFUSED, ClassTransient},
{"conn reset", fmt.Errorf("write: %w", syscall.ECONNRESET), ClassTransient},
{"net timeout", fakeNetErr{timeout: true}, ClassTransient},
{"http 429", &APIError{Status: 429}, ClassTransient},
{"http 408", &APIError{Status: 408}, ClassTransient},
{"http 500", &APIError{Status: 500}, ClassTransient},
{"http 503", &APIError{Status: 503}, ClassTransient},
{"http 529", &APIError{Status: 529}, ClassTransient},
{"http 400", &APIError{Status: 400}, ClassPermanent},
{"http 401", &APIError{Status: 401}, ClassPermanent},
{"http 403", &APIError{Status: 403}, ClassPermanent},
{"http 404", &APIError{Status: 404}, ClassPermanent},
{"http 422", &APIError{Status: 422}, ClassPermanent},
{"wrapped api error", fmt.Errorf("call: %w", &APIError{Status: 503}), ClassTransient},
{"unknown defaults transient", errors.New("mystery"), ClassTransient},
{"non-http api error defaults transient", &APIError{Message: "decode failed"}, ClassTransient},
}
for _, tt := range tests {
if got := Classify(tt.err); got != tt.want {
t.Errorf("%s: Classify = %v, want %v", tt.name, got, tt.want)
}
}
}
func TestAPIError404UnwrapsToModelNotFound(t *testing.T) {
err := &APIError{Provider: "openai", Model: "nope", Status: 404}
if !errors.Is(err, ErrModelNotFound) {
t.Error("404 APIError should match ErrModelNotFound")
}
if errors.Is(&APIError{Status: 500}, ErrModelNotFound) {
t.Error("500 APIError must not match ErrModelNotFound")
}
}
func TestAPIErrorMessage(t *testing.T) {
err := &APIError{
Provider: "anthropic", Model: "opus-4.8",
Status: 429, Code: "rate_limit_error", Message: "slow down",
}
got := err.Error()
for _, frag := range []string{"anthropic/opus-4.8", "429", "rate_limit_error", "slow down"} {
if !strings.Contains(got, frag) {
t.Errorf("error string %q missing %q", got, frag)
}
}
}
func TestAPIErrorUnwrapsCause(t *testing.T) {
cause := errors.New("boom")
err := &APIError{Provider: "p", Model: "m", Err: cause}
if !errors.Is(err, cause) {
t.Error("APIError should unwrap to its cause")
}
}
+12
View File
@@ -0,0 +1,12 @@
// Package llm defines majordomo's canonical, provider-agnostic contract:
// messages and content parts, requests and responses, tools, capabilities,
// streaming, and the Model/Provider interfaces every backend implements.
//
// Why: provider implementations (openai, anthropic, google, ollama, foreman,
// and any client-defined backend) must share one vocabulary without importing
// each other or the root package. This package is the dependency leaf — it
// imports nothing else in the module, and everything else imports it.
//
// Most consumers never import this package directly: the root majordomo
// package re-exports every type here via type aliases.
package llm
+71
View File
@@ -0,0 +1,71 @@
package llm
import "strings"
// Role identifies the author of a message.
type Role string
const (
RoleSystem Role = "system"
RoleUser Role = "user"
RoleAssistant Role = "assistant"
RoleTool Role = "tool"
)
// Message is one turn in a conversation.
//
// Exactly which fields are populated depends on the role: user and system
// messages carry Parts; assistant messages carry Parts and/or ToolCalls;
// tool messages carry ToolResults. Providers translate this canonical shape
// to and from their wire formats.
type Message struct {
Role Role
// Parts is the message content (text, images, ...).
Parts []Part
// ToolCalls are tool invocations requested by the assistant
// (meaningful only when Role == RoleAssistant).
ToolCalls []ToolCall
// ToolResults carry the outcomes of earlier ToolCalls
// (meaningful only when Role == RoleTool).
ToolResults []ToolResult
}
// Text returns the concatenation of all text parts in the message.
func (m Message) Text() string {
var b strings.Builder
for _, p := range m.Parts {
if t, ok := p.(TextPart); ok {
b.WriteString(t.Text)
}
}
return b.String()
}
// SystemText constructs a system message with one text part.
func SystemText(s string) Message {
return Message{Role: RoleSystem, Parts: []Part{Text(s)}}
}
// UserText constructs a user message with one text part.
func UserText(s string) Message {
return Message{Role: RoleUser, Parts: []Part{Text(s)}}
}
// UserParts constructs a user message from arbitrary content parts
// (e.g. text plus images).
func UserParts(parts ...Part) Message {
return Message{Role: RoleUser, Parts: parts}
}
// AssistantText constructs an assistant message with one text part.
func AssistantText(s string) Message {
return Message{Role: RoleAssistant, Parts: []Part{Text(s)}}
}
// ToolResultsMessage constructs a tool message carrying one or more results.
func ToolResultsMessage(results ...ToolResult) Message {
return Message{Role: RoleTool, ToolResults: results}
}
+62
View File
@@ -0,0 +1,62 @@
package llm
import "testing"
func TestMessageText(t *testing.T) {
m := UserParts(Text("a "), Image("image/png", []byte{1}), Text("b"))
if got := m.Text(); got != "a b" {
t.Errorf("Text = %q, want %q", got, "a b")
}
}
func TestConstructors(t *testing.T) {
if m := SystemText("s"); m.Role != RoleSystem || m.Text() != "s" {
t.Errorf("SystemText = %+v", m)
}
if m := UserText("u"); m.Role != RoleUser || m.Text() != "u" {
t.Errorf("UserText = %+v", m)
}
if m := AssistantText("a"); m.Role != RoleAssistant || m.Text() != "a" {
t.Errorf("AssistantText = %+v", m)
}
m := ToolResultsMessage(ToolResult{ID: "1", Content: "ok"})
if m.Role != RoleTool || len(m.ToolResults) != 1 {
t.Errorf("ToolResultsMessage = %+v", m)
}
}
func TestResponseTextAndMessage(t *testing.T) {
r := &Response{
Parts: []Part{Text("hello "), Text("world")},
ToolCalls: []ToolCall{{ID: "1", Name: "t"}},
}
if got := r.Text(); got != "hello world" {
t.Errorf("Text = %q", got)
}
m := r.Message()
if m.Role != RoleAssistant || m.Text() != "hello world" || len(m.ToolCalls) != 1 {
t.Errorf("Message = %+v", m)
}
}
func TestUsageAccumulation(t *testing.T) {
u := Usage{InputTokens: 10, OutputTokens: 5}
u.Add(Usage{InputTokens: 1, OutputTokens: 2})
if u.InputTokens != 11 || u.OutputTokens != 7 || u.Total() != 18 {
t.Errorf("usage = %+v", u)
}
}
func TestCapabilitiesHelpers(t *testing.T) {
c := Capabilities{}
if c.SupportsImages() {
t.Error("zero MaxImagesPerReq must mean images unsupported")
}
if !c.MIMEAllowed("image/png") {
t.Error("empty AllowedImageMIME must allow any type")
}
c = Capabilities{MaxImagesPerReq: 2, AllowedImageMIME: []string{"image/jpeg"}}
if !c.SupportsImages() || c.MIMEAllowed("image/png") || !c.MIMEAllowed("image/jpeg") {
t.Errorf("capabilities helpers misbehave: %+v", c)
}
}
+58
View File
@@ -0,0 +1,58 @@
package llm
import "context"
// Model is the canonical generation interface. A Model may be a single
// provider-bound target or a failover chain — the two are interchangeable
// and callers never branch on which they got.
type Model interface {
// Generate performs one request/response round trip.
Generate(ctx context.Context, req Request, opts ...Option) (*Response, error)
// Stream performs one request with incremental delivery.
Stream(ctx context.Context, req Request, opts ...Option) (Stream, error)
// Capabilities reports what this model supports. For chains this is the
// head element's capabilities (the preferred target); per-attempt media
// normalization always uses the actual target's capabilities.
Capabilities() Capabilities
}
// ModelOption configures a Model at construction time (Provider.Model).
type ModelOption func(*ModelConfig)
// ModelConfig carries per-model construction settings shared by all
// providers.
type ModelConfig struct {
// Capabilities, when non-nil, overrides the provider's default
// capabilities for this model.
Capabilities *Capabilities
}
// ApplyModelOptions folds options into a config.
func ApplyModelOptions(opts []ModelOption) ModelConfig {
var cfg ModelConfig
for _, opt := range opts {
opt(&cfg)
}
return cfg
}
// WithCapabilities overrides the provider's default capabilities for one
// model (e.g. a vision-capable tag on an otherwise text-only provider).
func WithCapabilities(caps Capabilities) ModelOption {
return func(cfg *ModelConfig) { cfg.Capabilities = &caps }
}
// Provider mints Models bound to one backend. Implementations translate the
// canonical Request/Response to and from their wire format and enforce their
// declared Capabilities.
type Provider interface {
// Name is the registry identifier used in "provider/model" specs.
Name() string
// Model returns a Model bound to the given id. The id is whatever the
// backend accepts — majordomo passes it through verbatim and never
// validates it against a catalog.
Model(id string, opts ...ModelOption) (Model, error)
}
+98
View File
@@ -0,0 +1,98 @@
package llm
import "encoding/json"
// Request is the canonical generation request. Providers translate it to
// their wire format and enforce their declared Capabilities against it.
type Request struct {
// System is the system prompt. Providers map it to their native system
// mechanism (top-level system field, system message, SystemInstruction).
// Any RoleSystem messages in Messages are folded in after this field.
System string
// Messages is the conversation so far, oldest first.
Messages []Message
// Tools the model may call.
Tools []Tool
// ToolChoice constrains tool use: "" or "auto" lets the model decide,
// "none" forbids tool calls, "required" forces some tool call, and any
// other value names the one tool the model must call.
ToolChoice string
// Schema, when non-nil, is a JSON Schema object the response must
// conform to (structured output). Providers map it to their native
// mechanism. SchemaName names the schema for providers that require one.
Schema json.RawMessage
SchemaName string
// Sampling and limit knobs. Pointer fields distinguish "unset" (provider
// default) from an explicit zero.
Temperature *float64
TopP *float64
// MaxTokens caps the response length; 0 means provider default.
MaxTokens int
// StopSequences halt generation when emitted.
StopSequences []string
}
// Option mutates a Request before it is sent. Options passed to Generate or
// Stream are applied to a copy of the request, so a Request value can be
// safely reused across calls.
type Option func(*Request)
// WithSystem sets the system prompt.
func WithSystem(s string) Option { return func(r *Request) { r.System = s } }
// WithTools appends tools to the request.
func WithTools(tools ...Tool) Option {
return func(r *Request) { r.Tools = append(r.Tools, tools...) }
}
// WithToolbox appends every tool in the toolbox to the request.
func WithToolbox(b *Toolbox) Option {
return func(r *Request) { r.Tools = append(r.Tools, b.Tools()...) }
}
// WithToolChoice sets the tool-choice policy ("auto", "none", "required",
// or a specific tool name).
func WithToolChoice(choice string) Option {
return func(r *Request) { r.ToolChoice = choice }
}
// WithSchema requests structured output conforming to the given JSON Schema.
// name is optional; providers that require a schema name fall back to
// "response" when it is empty.
func WithSchema(schema json.RawMessage, name string) Option {
return func(r *Request) { r.Schema = schema; r.SchemaName = name }
}
// WithTemperature sets the sampling temperature.
func WithTemperature(t float64) Option {
return func(r *Request) { r.Temperature = &t }
}
// WithTopP sets nucleus-sampling top-p.
func WithTopP(p float64) Option {
return func(r *Request) { r.TopP = &p }
}
// WithMaxTokens caps the response length.
func WithMaxTokens(n int) Option { return func(r *Request) { r.MaxTokens = n } }
// WithStopSequences sets stop sequences.
func WithStopSequences(stops ...string) Option {
return func(r *Request) { r.StopSequences = stops }
}
// Apply returns a copy of the request with all options applied. Providers
// and wrappers call this once at the top of Generate/Stream.
func (r Request) Apply(opts ...Option) Request {
for _, opt := range opts {
opt(&r)
}
return r
}
+73
View File
@@ -0,0 +1,73 @@
package llm
import "strings"
// FinishReason explains why generation stopped.
type FinishReason string
const (
// FinishStop: the model completed its answer (or hit a stop sequence).
FinishStop FinishReason = "stop"
// FinishLength: the MaxTokens (or context) limit was hit.
FinishLength FinishReason = "length"
// FinishToolCalls: the model stopped to request tool invocations.
FinishToolCalls FinishReason = "tool_calls"
// FinishContentFilter: the provider suppressed content.
FinishContentFilter FinishReason = "content_filter"
// FinishOther: any provider-specific reason not mapped above.
FinishOther FinishReason = "other"
)
// Usage reports token accounting for one request.
type Usage struct {
InputTokens int
OutputTokens int
}
// Total returns input plus output tokens.
func (u Usage) Total() int { return u.InputTokens + u.OutputTokens }
// Add accumulates another usage record (used by agents summing steps).
func (u *Usage) Add(o Usage) {
u.InputTokens += o.InputTokens
u.OutputTokens += o.OutputTokens
}
// Response is the canonical generation result.
type Response struct {
// Parts is the response content (text, and for multimodal-output models,
// other media).
Parts []Part
// ToolCalls are the tool invocations the model requested, if any.
ToolCalls []ToolCall
FinishReason FinishReason
Usage Usage
// Model identifies the resolved target that produced this response as
// "provider/model-id". With failover chains this names the element that
// actually served the request.
Model string
// Raw is the provider-native response object, an escape hatch for
// provider-specific fields. May be nil; never required for normal use.
Raw any
}
// Text returns the concatenation of all text parts in the response.
func (r *Response) Text() string {
var b strings.Builder
for _, p := range r.Parts {
if t, ok := p.(TextPart); ok {
b.WriteString(t.Text)
}
}
return b.String()
}
// Message converts the response into an assistant message suitable for
// appending to a conversation history.
func (r *Response) Message() Message {
return Message{Role: RoleAssistant, Parts: r.Parts, ToolCalls: r.ToolCalls}
}
+28
View File
@@ -0,0 +1,28 @@
package llm
// StreamEvent is one increment of a streaming response.
//
// Exactly one field group is meaningful per event: a text delta, a completed
// tool call, or the final response. Tool-call arguments are buffered by the
// provider until complete — consumers never see partial JSON.
type StreamEvent struct {
// TextDelta is a fragment of assistant text.
TextDelta string
// ToolCall, when non-nil, is a fully-assembled tool call.
ToolCall *ToolCall
// Response, when non-nil, is the final accumulated response (content,
// tool calls, finish reason, usage). It is always the last event.
Response *Response
}
// Stream delivers a response incrementally.
//
// Next returns io.EOF after the final event (the one carrying Response).
// Close releases the underlying connection and is safe to call at any time,
// including after io.EOF or concurrently with Next returning.
type Stream interface {
Next() (StreamEvent, error)
Close() error
}
+165
View File
@@ -0,0 +1,165 @@
package llm
import (
"context"
"encoding/json"
"fmt"
)
// Tool is a callable capability exposed to a model: a name, a description,
// JSON-Schema parameters, and a Go handler. Providers map this one canonical
// shape onto their native function-calling formats.
type Tool struct {
Name string
Description string
// Parameters is a JSON Schema object describing the tool's arguments.
// nil means the tool takes no arguments.
Parameters json.RawMessage
// Handler executes the tool. args is the raw JSON arguments object the
// model supplied. The returned value is JSON-encoded into the ToolResult.
Handler func(ctx context.Context, args json.RawMessage) (any, error)
}
// ToolCall is a model's request to invoke a tool.
type ToolCall struct {
// ID is the provider-assigned call id; majordomo synthesizes one for
// providers that do not supply ids. ToolResult.ID must echo it.
ID string
Name string
// Arguments is the raw JSON arguments object.
Arguments json.RawMessage
}
// ToolResult is the outcome of executing a ToolCall, sent back to the model.
type ToolResult struct {
// ID matches the originating ToolCall.ID.
ID string
Name string
// Content is the result serialized as text (JSON for structured values).
Content string
// IsError marks the result as a failure; the content then describes the
// error so the model can react (retry, apologize, try another tool).
IsError bool
}
// Toolbox is a named, ordered set of tools.
//
// Why: agents compose their available tools from several sources (multiple
// toolboxes plus skills); a small named container with duplicate detection
// keeps that merge explicit and debuggable.
type Toolbox struct {
name string
order []string
tools map[string]Tool
}
// NewToolbox creates a toolbox with the given name and initial tools.
// Duplicate tool names panic: toolboxes are assembled at startup, and a
// silently shadowed tool is a programming error worth failing loudly on.
func NewToolbox(name string, tools ...Tool) *Toolbox {
b := &Toolbox{name: name, tools: make(map[string]Tool, len(tools))}
for _, t := range tools {
if err := b.Add(t); err != nil {
panic(err)
}
}
return b
}
// Name returns the toolbox name.
func (b *Toolbox) Name() string { return b.name }
// Add registers a tool, rejecting empty or duplicate names.
func (b *Toolbox) Add(t Tool) error {
if t.Name == "" {
return fmt.Errorf("toolbox %q: tool with empty name", b.name)
}
if _, exists := b.tools[t.Name]; exists {
return fmt.Errorf("toolbox %q: duplicate tool %q", b.name, t.Name)
}
b.tools[t.Name] = t
b.order = append(b.order, t.Name)
return nil
}
// Tools returns the tools in insertion order.
func (b *Toolbox) Tools() []Tool {
out := make([]Tool, 0, len(b.order))
for _, name := range b.order {
out = append(out, b.tools[name])
}
return out
}
// Get returns the named tool.
func (b *Toolbox) Get(name string) (Tool, bool) {
t, ok := b.tools[name]
return t, ok
}
// Execute runs the named tool for the given call and packages the outcome as
// a ToolResult. It never panics and never returns an error: handler errors
// and panics become IsError results so an agent loop can always continue.
func (b *Toolbox) Execute(ctx context.Context, call ToolCall) ToolResult {
t, ok := b.tools[call.Name]
if !ok {
return ToolResult{
ID: call.ID, Name: call.Name,
Content: fmt.Sprintf("unknown tool %q", call.Name),
IsError: true,
}
}
return ExecuteTool(ctx, t, call)
}
// ExecuteTool runs a single tool for the given call, recovering panics and
// converting errors into IsError results.
func ExecuteTool(ctx context.Context, t Tool, call ToolCall) (res ToolResult) {
res = ToolResult{ID: call.ID, Name: call.Name}
defer func() {
if r := recover(); r != nil {
res.Content = fmt.Sprintf("tool %q panicked: %v", call.Name, r)
res.IsError = true
}
}()
if t.Handler == nil {
res.Content = fmt.Sprintf("tool %q has no handler", call.Name)
res.IsError = true
return res
}
args := call.Arguments
if len(args) == 0 {
args = json.RawMessage("{}")
}
out, err := t.Handler(ctx, args)
if err != nil {
res.Content = err.Error()
res.IsError = true
return res
}
switch v := out.(type) {
case nil:
res.Content = "null"
case string:
res.Content = v
case json.RawMessage:
res.Content = string(v)
default:
enc, err := json.Marshal(v)
if err != nil {
res.Content = fmt.Sprintf("tool %q returned unencodable value: %v", call.Name, err)
res.IsError = true
return res
}
res.Content = string(enc)
}
return res
}
+98
View File
@@ -0,0 +1,98 @@
package llm
import (
"context"
"encoding/json"
"errors"
"strings"
"testing"
)
func TestToolboxAddRejectsDuplicatesAndEmptyNames(t *testing.T) {
b := NewToolbox("box")
if err := b.Add(Tool{Name: "a"}); err != nil {
t.Fatalf("Add: %v", err)
}
if err := b.Add(Tool{Name: "a"}); err == nil {
t.Error("duplicate name should error")
}
if err := b.Add(Tool{}); err == nil {
t.Error("empty name should error")
}
}
func TestToolboxOrderPreserved(t *testing.T) {
b := NewToolbox("box", Tool{Name: "z"}, Tool{Name: "a"}, Tool{Name: "m"})
var names []string
for _, tool := range b.Tools() {
names = append(names, tool.Name)
}
if got, want := strings.Join(names, ","), "z,a,m"; got != want {
t.Errorf("order = %s, want %s", got, want)
}
}
func TestExecuteUnknownTool(t *testing.T) {
b := NewToolbox("box")
res := b.Execute(context.Background(), ToolCall{ID: "1", Name: "missing"})
if !res.IsError || !strings.Contains(res.Content, "missing") {
t.Errorf("result = %+v, want unknown-tool error", res)
}
}
func TestExecuteHandlerOutcomes(t *testing.T) {
echo := func(v any, err error) Tool {
return Tool{Name: "t", Handler: func(context.Context, json.RawMessage) (any, error) { return v, err }}
}
tests := []struct {
name string
tool Tool
wantContent string
wantErr bool
}{
{"string passthrough", echo("plain", nil), "plain", false},
{"struct json-encoded", echo(struct {
N int `json:"n"`
}{4}, nil), `{"n":4}`, false},
{"raw message passthrough", echo(json.RawMessage(`{"k":1}`), nil), `{"k":1}`, false},
{"nil becomes null", echo(nil, nil), "null", false},
{"handler error", echo(nil, errors.New("boom")), "boom", true},
{"unencodable value", echo(func() {}, nil), "unencodable", true},
{"no handler", Tool{Name: "t"}, "no handler", true},
}
for _, tt := range tests {
res := ExecuteTool(context.Background(), tt.tool, ToolCall{ID: "c1", Name: "t"})
if res.IsError != tt.wantErr {
t.Errorf("%s: IsError = %v, want %v (%+v)", tt.name, res.IsError, tt.wantErr, res)
}
if !strings.Contains(res.Content, tt.wantContent) {
t.Errorf("%s: content = %q, want it to contain %q", tt.name, res.Content, tt.wantContent)
}
if res.ID != "c1" {
t.Errorf("%s: result ID = %q, want c1", tt.name, res.ID)
}
}
}
func TestExecuteRecoversPanic(t *testing.T) {
tool := Tool{Name: "t", Handler: func(context.Context, json.RawMessage) (any, error) {
panic("kaboom")
}}
res := ExecuteTool(context.Background(), tool, ToolCall{ID: "1", Name: "t"})
if !res.IsError || !strings.Contains(res.Content, "kaboom") {
t.Errorf("result = %+v, want recovered panic error", res)
}
}
func TestExecuteEmptyArgsBecomeEmptyObject(t *testing.T) {
var got json.RawMessage
tool := Tool{Name: "t", Handler: func(_ context.Context, args json.RawMessage) (any, error) {
got = args
return "ok", nil
}}
ExecuteTool(context.Background(), tool, ToolCall{ID: "1", Name: "t"})
if string(got) != "{}" {
t.Errorf("args = %q, want {}", got)
}
}