feat: agent run loop, Generate[T], reflect-derived schemas
Phase 5: - agent/: model + system prompt + toolboxes composition; bounded tool-dispatch loop (default 10 steps); panic-proof tool execution; unknown-tool and duplicate-name handling; history continuation; step observers; partial results on ErrMaxSteps/errors (ADR-0012) - llm.SchemaFor[T]: strict-compatible JSON schemas from Go types (nullable pointers, description/enum tags, recursion rejected) - majordomo.Generate[T]: typed structured output with fence-stripping decode and model-naming errors - README agents/structured-output sections + matrix synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+294
@@ -0,0 +1,294 @@
|
||||
// Package agent runs LLM-backed agents: a Model, a system prompt, and one
|
||||
// or more toolboxes, executed as a tool-dispatch loop until the model
|
||||
// produces a final answer (or MaxSteps intervenes).
|
||||
//
|
||||
// The loop never panics: tool handlers run through the panic-recovering
|
||||
// executor in llm, unknown tools come back as error results the model can
|
||||
// react to, and step observers receive every intermediate step. Skills
|
||||
// (package skill) attach additively: their instructions extend the system
|
||||
// prompt and their tools extend the merged toolset.
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// DefaultMaxSteps bounds the tool-dispatch loop when WithMaxSteps is not
|
||||
// given.
|
||||
const DefaultMaxSteps = 10
|
||||
|
||||
// ErrMaxSteps reports that the loop hit its step budget before the model
|
||||
// produced a final answer. Run returns it alongside a non-nil *Result
|
||||
// carrying the transcript so far.
|
||||
var ErrMaxSteps = errors.New("agent: max steps reached without a final answer")
|
||||
|
||||
// Skill is the contract skills satisfy (defined here so agent does not
|
||||
// depend on the skill package; package skill provides implementations).
|
||||
// Instructions are appended to the agent's system prompt; Tools (optional,
|
||||
// may be nil) extend the agent's toolset.
|
||||
type Skill interface {
|
||||
Name() string
|
||||
Instructions() string
|
||||
Tools() *llm.Toolbox
|
||||
}
|
||||
|
||||
// Step is one completed iteration of the loop: the model's response and,
|
||||
// when it requested tools, the results that were fed back.
|
||||
type Step struct {
|
||||
// Index is the 0-based step number.
|
||||
Index int
|
||||
// Response is the model output for this step.
|
||||
Response *llm.Response
|
||||
// Results are the executed tool outcomes (empty on the final step).
|
||||
Results []llm.ToolResult
|
||||
}
|
||||
|
||||
// Result is the outcome of a Run.
|
||||
type Result struct {
|
||||
// Output is the final assistant text.
|
||||
Output string
|
||||
// Messages is the full transcript: prior history, the input, and every
|
||||
// assistant/tool turn. Feed it back via WithHistory to continue the
|
||||
// conversation.
|
||||
Messages []llm.Message
|
||||
// Steps records each loop iteration.
|
||||
Steps []Step
|
||||
// Usage is the token total across all steps.
|
||||
Usage llm.Usage
|
||||
}
|
||||
|
||||
// Agent is a reusable model + system prompt + toolboxes (+ skills)
|
||||
// composition. Configure at construction; AddSkill/AddToolbox may extend
|
||||
// it later. Agents are safe to share across goroutines only after
|
||||
// configuration is complete.
|
||||
type Agent struct {
|
||||
model llm.Model
|
||||
system string
|
||||
toolboxes []*llm.Toolbox
|
||||
skills []Skill
|
||||
maxSteps int
|
||||
reqOpts []llm.Option
|
||||
observers []func(Step)
|
||||
}
|
||||
|
||||
// Option configures an Agent at construction.
|
||||
type Option func(*Agent)
|
||||
|
||||
// WithToolbox attaches a toolbox.
|
||||
func WithToolbox(b *llm.Toolbox) Option {
|
||||
return func(a *Agent) { a.toolboxes = append(a.toolboxes, b) }
|
||||
}
|
||||
|
||||
// WithTools attaches loose tools (wrapped in an anonymous toolbox).
|
||||
func WithTools(tools ...llm.Tool) Option {
|
||||
return func(a *Agent) { a.toolboxes = append(a.toolboxes, llm.NewToolbox("", tools...)) }
|
||||
}
|
||||
|
||||
// WithSkill attaches a skill at construction (see also AddSkill).
|
||||
func WithSkill(s Skill) Option {
|
||||
return func(a *Agent) { a.skills = append(a.skills, s) }
|
||||
}
|
||||
|
||||
// WithMaxSteps bounds the tool-dispatch loop.
|
||||
func WithMaxSteps(n int) Option {
|
||||
return func(a *Agent) { a.maxSteps = n }
|
||||
}
|
||||
|
||||
// WithRequestOptions sets default request options (temperature, max
|
||||
// tokens, ...) applied to every step of every run.
|
||||
func WithRequestOptions(opts ...llm.Option) Option {
|
||||
return func(a *Agent) { a.reqOpts = append(a.reqOpts, opts...) }
|
||||
}
|
||||
|
||||
// WithStepObserver registers a callback invoked after every completed
|
||||
// step (intermediate-step streaming for UIs, tracing, usage recording).
|
||||
// Observers run synchronously in Run's goroutine.
|
||||
func WithStepObserver(fn func(Step)) Option {
|
||||
return func(a *Agent) { a.observers = append(a.observers, fn) }
|
||||
}
|
||||
|
||||
// New creates an agent from a model and system prompt.
|
||||
func New(model llm.Model, system string, opts ...Option) *Agent {
|
||||
a := &Agent{model: model, system: system, maxSteps: DefaultMaxSteps}
|
||||
for _, opt := range opts {
|
||||
opt(a)
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// AddSkill attaches a skill to the agent on demand.
|
||||
func (a *Agent) AddSkill(s Skill) { a.skills = append(a.skills, s) }
|
||||
|
||||
// AddToolbox attaches a toolbox to the agent on demand.
|
||||
func (a *Agent) AddToolbox(b *llm.Toolbox) { a.toolboxes = append(a.toolboxes, b) }
|
||||
|
||||
// RunOption configures one Run.
|
||||
type RunOption func(*runConfig)
|
||||
|
||||
type runConfig struct {
|
||||
history []llm.Message
|
||||
reqOpts []llm.Option
|
||||
onStep []func(Step)
|
||||
}
|
||||
|
||||
// WithHistory seeds the run with prior conversation messages (e.g. a
|
||||
// previous Result.Messages).
|
||||
func WithHistory(msgs []llm.Message) RunOption {
|
||||
return func(rc *runConfig) { rc.history = msgs }
|
||||
}
|
||||
|
||||
// WithRunRequestOptions adds request options for this run only.
|
||||
func WithRunRequestOptions(opts ...llm.Option) RunOption {
|
||||
return func(rc *runConfig) { rc.reqOpts = append(rc.reqOpts, opts...) }
|
||||
}
|
||||
|
||||
// OnStep registers a per-run step callback (in addition to agent-level
|
||||
// observers).
|
||||
func OnStep(fn func(Step)) RunOption {
|
||||
return func(rc *runConfig) { rc.onStep = append(rc.onStep, fn) }
|
||||
}
|
||||
|
||||
// systemPrompt composes the agent's system prompt with each skill's
|
||||
// instructions, in attachment order.
|
||||
func (a *Agent) systemPrompt() string {
|
||||
parts := make([]string, 0, 1+len(a.skills))
|
||||
if a.system != "" {
|
||||
parts = append(parts, a.system)
|
||||
}
|
||||
for _, s := range a.skills {
|
||||
if ins := strings.TrimSpace(s.Instructions()); ins != "" {
|
||||
parts = append(parts, ins)
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, "\n\n")
|
||||
}
|
||||
|
||||
// mergedTools flattens toolboxes plus skill toolboxes into one toolset.
|
||||
// Duplicate tool names are a configuration error and fail loudly — a
|
||||
// silently shadowed tool is far harder to debug than this error.
|
||||
func (a *Agent) mergedTools() (map[string]llm.Tool, []llm.Tool, error) {
|
||||
byName := make(map[string]llm.Tool)
|
||||
var ordered []llm.Tool
|
||||
|
||||
add := func(origin string, tools []llm.Tool) error {
|
||||
for _, t := range tools {
|
||||
if _, exists := byName[t.Name]; exists {
|
||||
return fmt.Errorf("agent: duplicate tool %q (from %s)", t.Name, origin)
|
||||
}
|
||||
byName[t.Name] = t
|
||||
ordered = append(ordered, t)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, b := range a.toolboxes {
|
||||
if err := add("toolbox "+b.Name(), b.Tools()); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
for _, s := range a.skills {
|
||||
if b := s.Tools(); b != nil {
|
||||
if err := add("skill "+s.Name(), b.Tools()); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return byName, ordered, nil
|
||||
}
|
||||
|
||||
// Run executes the loop: send the conversation; while the model requests
|
||||
// tools, execute them and feed results back; stop on a final answer,
|
||||
// MaxSteps, or an unrecoverable model error.
|
||||
func (a *Agent) Run(ctx context.Context, input string, opts ...RunOption) (*Result, error) {
|
||||
var rc runConfig
|
||||
for _, opt := range opts {
|
||||
opt(&rc)
|
||||
}
|
||||
|
||||
byName, ordered, err := a.mergedTools()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
msgs := append([]llm.Message(nil), rc.history...)
|
||||
if input != "" {
|
||||
msgs = append(msgs, llm.UserText(input))
|
||||
}
|
||||
if len(msgs) == 0 {
|
||||
return nil, errors.New("agent: empty input and no history")
|
||||
}
|
||||
|
||||
result := &Result{}
|
||||
reqOpts := append(append([]llm.Option(nil), a.reqOpts...), rc.reqOpts...)
|
||||
system := a.systemPrompt()
|
||||
|
||||
for stepIdx := range a.maxSteps {
|
||||
req := llm.Request{System: system, Messages: msgs, Tools: ordered}
|
||||
resp, err := a.model.Generate(ctx, req, reqOpts...)
|
||||
if err != nil {
|
||||
result.Messages = msgs
|
||||
return result, fmt.Errorf("agent: step %d: %w", stepIdx, err)
|
||||
}
|
||||
|
||||
msgs = append(msgs, resp.Message())
|
||||
result.Usage.Add(resp.Usage)
|
||||
step := Step{Index: stepIdx, Response: resp}
|
||||
|
||||
if len(resp.ToolCalls) == 0 {
|
||||
// Final answer.
|
||||
result.Output = resp.Text()
|
||||
result.Steps = append(result.Steps, step)
|
||||
result.Messages = msgs
|
||||
a.notify(rc, step)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
results := make([]llm.ToolResult, 0, len(resp.ToolCalls))
|
||||
for _, call := range resp.ToolCalls {
|
||||
if err := ctx.Err(); err != nil {
|
||||
result.Messages = msgs
|
||||
return result, err
|
||||
}
|
||||
tool, ok := byName[call.Name]
|
||||
if !ok {
|
||||
results = append(results, llm.ToolResult{
|
||||
ID: call.ID, Name: call.Name,
|
||||
Content: fmt.Sprintf("unknown tool %q", call.Name),
|
||||
IsError: true,
|
||||
})
|
||||
continue
|
||||
}
|
||||
// ExecuteTool recovers panics and converts errors to IsError
|
||||
// results — the loop always continues.
|
||||
results = append(results, llm.ExecuteTool(ctx, tool, call))
|
||||
}
|
||||
|
||||
step.Results = results
|
||||
result.Steps = append(result.Steps, step)
|
||||
a.notify(rc, step)
|
||||
msgs = append(msgs, llm.ToolResultsMessage(results...))
|
||||
}
|
||||
|
||||
result.Messages = msgs
|
||||
return result, fmt.Errorf("%w (max %d)", ErrMaxSteps, a.maxSteps)
|
||||
}
|
||||
|
||||
// notify fans a step out to agent observers and run callbacks; observer
|
||||
// panics are swallowed (the loop must never die for a UI callback).
|
||||
func (a *Agent) notify(rc runConfig, step Step) {
|
||||
emit := func(fn func(Step)) {
|
||||
defer func() { _ = recover() }()
|
||||
fn(step)
|
||||
}
|
||||
for _, fn := range a.observers {
|
||||
emit(fn)
|
||||
}
|
||||
for _, fn := range rc.onStep {
|
||||
emit(fn)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,339 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake"
|
||||
)
|
||||
|
||||
func newModel(t *testing.T, fp *fake.Provider) llm.Model {
|
||||
t.Helper()
|
||||
m, err := fp.Model("test-model")
|
||||
if err != nil {
|
||||
t.Fatalf("Model: %v", err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// toolCallReply scripts an assistant response requesting one tool call.
|
||||
func toolCallReply(id, name, args string) fake.Step {
|
||||
return fake.ReplyWith(llm.Response{
|
||||
ToolCalls: []llm.ToolCall{{ID: id, Name: name, Arguments: json.RawMessage(args)}},
|
||||
FinishReason: llm.FinishToolCalls,
|
||||
Usage: llm.Usage{InputTokens: 10, OutputTokens: 5},
|
||||
})
|
||||
}
|
||||
|
||||
func adderToolbox(t *testing.T) *llm.Toolbox {
|
||||
t.Helper()
|
||||
return llm.NewToolbox("math", llm.Tool{
|
||||
Name: "add",
|
||||
Description: "Add two integers",
|
||||
Parameters: json.RawMessage(`{"type":"object","properties":{"a":{"type":"integer"},"b":{"type":"integer"}},"required":["a","b"]}`),
|
||||
Handler: func(_ context.Context, args json.RawMessage) (any, error) {
|
||||
var p struct{ A, B int }
|
||||
if err := json.Unmarshal(args, &p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return map[string]int{"sum": p.A + p.B}, nil
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestRunWithoutTools(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model", fake.Reply("direct answer"))
|
||||
|
||||
a := New(newModel(t, fp), "You are terse.")
|
||||
res, err := a.Run(context.Background(), "question?")
|
||||
if err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
if res.Output != "direct answer" {
|
||||
t.Errorf("output = %q", res.Output)
|
||||
}
|
||||
if len(res.Steps) != 1 {
|
||||
t.Errorf("steps = %d", len(res.Steps))
|
||||
}
|
||||
// The system prompt reached the model.
|
||||
calls := fp.Calls()
|
||||
if calls[0].Request.System != "You are terse." {
|
||||
t.Errorf("system = %q", calls[0].Request.System)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunToolLoop(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model",
|
||||
toolCallReply("c1", "add", `{"a":2,"b":3}`),
|
||||
fake.Reply("the sum is 5"),
|
||||
)
|
||||
|
||||
a := New(newModel(t, fp), "do math", WithToolbox(adderToolbox(t)))
|
||||
res, err := a.Run(context.Background(), "2+3?")
|
||||
if err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
if res.Output != "the sum is 5" {
|
||||
t.Errorf("output = %q", res.Output)
|
||||
}
|
||||
if len(res.Steps) != 2 {
|
||||
t.Fatalf("steps = %d, want 2", len(res.Steps))
|
||||
}
|
||||
if res.Usage.InputTokens != 11 || res.Usage.OutputTokens != 6 {
|
||||
t.Errorf("usage = %+v (must sum both steps)", res.Usage)
|
||||
}
|
||||
|
||||
// The tool executed and its result went back to the model.
|
||||
step1 := res.Steps[0]
|
||||
if len(step1.Results) != 1 || step1.Results[0].IsError {
|
||||
t.Fatalf("step 1 results = %+v", step1.Results)
|
||||
}
|
||||
if !strings.Contains(step1.Results[0].Content, `"sum":5`) {
|
||||
t.Errorf("tool result = %q", step1.Results[0].Content)
|
||||
}
|
||||
|
||||
// Second model call must carry the tool transcript: user, assistant
|
||||
// (with the call), tool results.
|
||||
second := fp.Calls()[1].Request
|
||||
if len(second.Messages) != 3 {
|
||||
t.Fatalf("second request messages = %d, want 3", len(second.Messages))
|
||||
}
|
||||
if second.Messages[1].Role != llm.RoleAssistant || len(second.Messages[1].ToolCalls) != 1 {
|
||||
t.Errorf("assistant turn = %+v", second.Messages[1])
|
||||
}
|
||||
toolMsg := second.Messages[2]
|
||||
if toolMsg.Role != llm.RoleTool || toolMsg.ToolResults[0].ID != "c1" {
|
||||
t.Errorf("tool turn = %+v", toolMsg)
|
||||
}
|
||||
|
||||
// The tools were offered on every step.
|
||||
for i, c := range fp.Calls() {
|
||||
if len(c.Request.Tools) != 1 || c.Request.Tools[0].Name != "add" {
|
||||
t.Errorf("call %d tools = %+v", i, c.Request.Tools)
|
||||
}
|
||||
}
|
||||
|
||||
// Result.Messages is the full transcript.
|
||||
if len(res.Messages) != 4 {
|
||||
t.Errorf("transcript = %d messages, want 4", len(res.Messages))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunUnknownToolContinues(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model",
|
||||
toolCallReply("c1", "nonexistent", `{}`),
|
||||
fake.Reply("recovered"),
|
||||
)
|
||||
|
||||
a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)))
|
||||
res, err := a.Run(context.Background(), "go")
|
||||
if err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
if res.Output != "recovered" {
|
||||
t.Errorf("output = %q", res.Output)
|
||||
}
|
||||
r := res.Steps[0].Results[0]
|
||||
if !r.IsError || !strings.Contains(r.Content, "nonexistent") {
|
||||
t.Errorf("unknown-tool result = %+v", r)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunPanickingToolContinues(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model",
|
||||
toolCallReply("c1", "bomb", `{}`),
|
||||
fake.Reply("survived"),
|
||||
)
|
||||
bomb := llm.NewToolbox("danger", llm.Tool{
|
||||
Name: "bomb",
|
||||
Handler: func(context.Context, json.RawMessage) (any, error) { panic("boom") },
|
||||
})
|
||||
|
||||
a := New(newModel(t, fp), "", WithToolbox(bomb))
|
||||
res, err := a.Run(context.Background(), "go")
|
||||
if err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
if res.Output != "survived" {
|
||||
t.Errorf("output = %q", res.Output)
|
||||
}
|
||||
r := res.Steps[0].Results[0]
|
||||
if !r.IsError || !strings.Contains(r.Content, "boom") {
|
||||
t.Errorf("panic result = %+v", r)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunMaxSteps(t *testing.T) {
|
||||
fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step {
|
||||
return toolCallReply("c", "add", `{"a":1,"b":1}`)
|
||||
}))
|
||||
|
||||
a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithMaxSteps(3))
|
||||
res, err := a.Run(context.Background(), "loop forever")
|
||||
if !errors.Is(err, ErrMaxSteps) {
|
||||
t.Fatalf("err = %v, want ErrMaxSteps", err)
|
||||
}
|
||||
if res == nil || len(res.Steps) != 3 {
|
||||
t.Fatalf("result = %+v, want 3 recorded steps", res)
|
||||
}
|
||||
if len(res.Messages) == 0 {
|
||||
t.Error("transcript must be preserved on ErrMaxSteps")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDuplicateToolNamesFailLoudly(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
box1 := llm.NewToolbox("a", llm.Tool{Name: "dup"})
|
||||
box2 := llm.NewToolbox("b", llm.Tool{Name: "dup"})
|
||||
|
||||
a := New(newModel(t, fp), "", WithToolbox(box1), WithToolbox(box2))
|
||||
_, err := a.Run(context.Background(), "go")
|
||||
if err == nil || !strings.Contains(err.Error(), `duplicate tool "dup"`) {
|
||||
t.Errorf("err = %v, want duplicate-tool error", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunWithHistory(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model", fake.Reply("continued"))
|
||||
|
||||
history := []llm.Message{
|
||||
llm.UserText("first question"),
|
||||
llm.AssistantText("first answer"),
|
||||
}
|
||||
a := New(newModel(t, fp), "")
|
||||
res, err := a.Run(context.Background(), "follow-up", WithHistory(history))
|
||||
if err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
got := fp.Calls()[0].Request.Messages
|
||||
if len(got) != 3 || got[0].Text() != "first question" || got[2].Text() != "follow-up" {
|
||||
t.Errorf("messages = %+v", got)
|
||||
}
|
||||
if len(res.Messages) != 4 {
|
||||
t.Errorf("transcript = %d, want history+input+answer", len(res.Messages))
|
||||
}
|
||||
}
|
||||
|
||||
func TestStepObservers(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model",
|
||||
toolCallReply("c1", "add", `{"a":1,"b":2}`),
|
||||
fake.Reply("3"),
|
||||
)
|
||||
|
||||
var agentSteps, runSteps []int
|
||||
a := New(newModel(t, fp), "",
|
||||
WithToolbox(adderToolbox(t)),
|
||||
WithStepObserver(func(s Step) { agentSteps = append(agentSteps, s.Index) }),
|
||||
)
|
||||
_, err := a.Run(context.Background(), "1+2?",
|
||||
OnStep(func(s Step) { runSteps = append(runSteps, s.Index) }),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
if fmt.Sprint(agentSteps) != "[0 1]" || fmt.Sprint(runSteps) != "[0 1]" {
|
||||
t.Errorf("agentSteps=%v runSteps=%v", agentSteps, runSteps)
|
||||
}
|
||||
}
|
||||
|
||||
func TestObserverPanicIsSwallowed(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model", fake.Reply("fine"))
|
||||
|
||||
a := New(newModel(t, fp), "", WithStepObserver(func(Step) { panic("ui bug") }))
|
||||
res, err := a.Run(context.Background(), "go")
|
||||
if err != nil || res.Output != "fine" {
|
||||
t.Errorf("res=%+v err=%v — observer panic must not kill the run", res, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkillComposition(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model", fake.Reply("ok"))
|
||||
|
||||
sk := stubSkill{
|
||||
name: "haiku",
|
||||
instructions: "Answer in haiku.",
|
||||
tools: llm.NewToolbox("haiku-tools", llm.Tool{
|
||||
Name: "count_syllables",
|
||||
Handler: func(context.Context, json.RawMessage) (any, error) { return 5, nil },
|
||||
}),
|
||||
}
|
||||
a := New(newModel(t, fp), "Base prompt.", WithSkill(sk))
|
||||
if _, err := a.Run(context.Background(), "hello"); err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
|
||||
req := fp.Calls()[0].Request
|
||||
if req.System != "Base prompt.\n\nAnswer in haiku." {
|
||||
t.Errorf("system = %q", req.System)
|
||||
}
|
||||
if len(req.Tools) != 1 || req.Tools[0].Name != "count_syllables" {
|
||||
t.Errorf("tools = %+v", req.Tools)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddSkillOnDemand(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model", fake.Reply("a"), fake.Reply("b"))
|
||||
|
||||
a := New(newModel(t, fp), "Base.")
|
||||
if _, err := a.Run(context.Background(), "one"); err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
a.AddSkill(stubSkill{name: "later", instructions: "Later skill."})
|
||||
if _, err := a.Run(context.Background(), "two"); err != nil {
|
||||
t.Fatalf("Run: %v", err)
|
||||
}
|
||||
|
||||
calls := fp.Calls()
|
||||
if calls[0].Request.System != "Base." {
|
||||
t.Errorf("first system = %q", calls[0].Request.System)
|
||||
}
|
||||
if calls[1].Request.System != "Base.\n\nLater skill." {
|
||||
t.Errorf("second system = %q", calls[1].Request.System)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunErrorPreservesTranscript(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
fp.Enqueue("test-model", fake.Fail(errors.New("model down")))
|
||||
|
||||
a := New(newModel(t, fp), "")
|
||||
res, err := a.Run(context.Background(), "go")
|
||||
if err == nil || !strings.Contains(err.Error(), "model down") {
|
||||
t.Fatalf("err = %v", err)
|
||||
}
|
||||
if res == nil || len(res.Messages) != 1 {
|
||||
t.Errorf("result = %+v, want transcript with the input", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmptyInputNeedsHistory(t *testing.T) {
|
||||
fp := fake.New("fp")
|
||||
a := New(newModel(t, fp), "")
|
||||
if _, err := a.Run(context.Background(), ""); err == nil {
|
||||
t.Error("empty input with no history must error")
|
||||
}
|
||||
}
|
||||
|
||||
type stubSkill struct {
|
||||
name string
|
||||
instructions string
|
||||
tools *llm.Toolbox
|
||||
}
|
||||
|
||||
func (s stubSkill) Name() string { return s.name }
|
||||
func (s stubSkill) Instructions() string { return s.instructions }
|
||||
func (s stubSkill) Tools() *llm.Toolbox { return s.tools }
|
||||
Reference in New Issue
Block a user