package agent import ( "context" "encoding/json" "errors" "fmt" "strings" "testing" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" ) func newModel(t *testing.T, fp *fake.Provider) llm.Model { t.Helper() m, err := fp.Model("test-model") if err != nil { t.Fatalf("Model: %v", err) } return m } // toolCallReply scripts an assistant response requesting one tool call. func toolCallReply(id, name, args string) fake.Step { return fake.ReplyWith(llm.Response{ ToolCalls: []llm.ToolCall{{ID: id, Name: name, Arguments: json.RawMessage(args)}}, FinishReason: llm.FinishToolCalls, Usage: llm.Usage{InputTokens: 10, OutputTokens: 5}, }) } func adderToolbox(t *testing.T) *llm.Toolbox { t.Helper() return llm.NewToolbox("math", llm.Tool{ Name: "add", Description: "Add two integers", Parameters: json.RawMessage(`{"type":"object","properties":{"a":{"type":"integer"},"b":{"type":"integer"}},"required":["a","b"]}`), Handler: func(_ context.Context, args json.RawMessage) (any, error) { var p struct{ A, B int } if err := json.Unmarshal(args, &p); err != nil { return nil, err } return map[string]int{"sum": p.A + p.B}, nil }, }) } func TestRunWithoutTools(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Reply("direct answer")) a := New(newModel(t, fp), "You are terse.") res, err := a.Run(context.Background(), "question?") if err != nil { t.Fatalf("Run: %v", err) } if res.Output != "direct answer" { t.Errorf("output = %q", res.Output) } if len(res.Steps) != 1 { t.Errorf("steps = %d", len(res.Steps)) } // The system prompt reached the model. calls := fp.Calls() if calls[0].Request.System != "You are terse." { t.Errorf("system = %q", calls[0].Request.System) } } func TestRunToolLoop(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", toolCallReply("c1", "add", `{"a":2,"b":3}`), fake.Reply("the sum is 5"), ) a := New(newModel(t, fp), "do math", WithToolbox(adderToolbox(t))) res, err := a.Run(context.Background(), "2+3?") if err != nil { t.Fatalf("Run: %v", err) } if res.Output != "the sum is 5" { t.Errorf("output = %q", res.Output) } if len(res.Steps) != 2 { t.Fatalf("steps = %d, want 2", len(res.Steps)) } if res.Usage.InputTokens != 11 || res.Usage.OutputTokens != 6 { t.Errorf("usage = %+v (must sum both steps)", res.Usage) } // The tool executed and its result went back to the model. step1 := res.Steps[0] if len(step1.Results) != 1 || step1.Results[0].IsError { t.Fatalf("step 1 results = %+v", step1.Results) } if !strings.Contains(step1.Results[0].Content, `"sum":5`) { t.Errorf("tool result = %q", step1.Results[0].Content) } // Second model call must carry the tool transcript: user, assistant // (with the call), tool results. second := fp.Calls()[1].Request if len(second.Messages) != 3 { t.Fatalf("second request messages = %d, want 3", len(second.Messages)) } if second.Messages[1].Role != llm.RoleAssistant || len(second.Messages[1].ToolCalls) != 1 { t.Errorf("assistant turn = %+v", second.Messages[1]) } toolMsg := second.Messages[2] if toolMsg.Role != llm.RoleTool || toolMsg.ToolResults[0].ID != "c1" { t.Errorf("tool turn = %+v", toolMsg) } // The tools were offered on every step. for i, c := range fp.Calls() { if len(c.Request.Tools) != 1 || c.Request.Tools[0].Name != "add" { t.Errorf("call %d tools = %+v", i, c.Request.Tools) } } // Result.Messages is the full transcript. if len(res.Messages) != 4 { t.Errorf("transcript = %d messages, want 4", len(res.Messages)) } } func TestRunUnknownToolContinues(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", toolCallReply("c1", "nonexistent", `{}`), fake.Reply("recovered"), ) a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t))) res, err := a.Run(context.Background(), "go") if err != nil { t.Fatalf("Run: %v", err) } if res.Output != "recovered" { t.Errorf("output = %q", res.Output) } r := res.Steps[0].Results[0] if !r.IsError || !strings.Contains(r.Content, "nonexistent") { t.Errorf("unknown-tool result = %+v", r) } } func TestRunPanickingToolContinues(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", toolCallReply("c1", "bomb", `{}`), fake.Reply("survived"), ) bomb := llm.NewToolbox("danger", llm.Tool{ Name: "bomb", Handler: func(context.Context, json.RawMessage) (any, error) { panic("boom") }, }) a := New(newModel(t, fp), "", WithToolbox(bomb)) res, err := a.Run(context.Background(), "go") if err != nil { t.Fatalf("Run: %v", err) } if res.Output != "survived" { t.Errorf("output = %q", res.Output) } r := res.Steps[0].Results[0] if !r.IsError || !strings.Contains(r.Content, "boom") { t.Errorf("panic result = %+v", r) } } func TestRunMaxSteps(t *testing.T) { fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step { return toolCallReply("c", "add", `{"a":1,"b":1}`) })) a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithMaxSteps(3)) res, err := a.Run(context.Background(), "loop forever") if !errors.Is(err, ErrMaxSteps) { t.Fatalf("err = %v, want ErrMaxSteps", err) } if res == nil || len(res.Steps) != 3 { t.Fatalf("result = %+v, want 3 recorded steps", res) } if len(res.Messages) == 0 { t.Error("transcript must be preserved on ErrMaxSteps") } } func TestDuplicateToolNamesFailLoudly(t *testing.T) { fp := fake.New("fp") box1 := llm.NewToolbox("a", llm.Tool{Name: "dup"}) box2 := llm.NewToolbox("b", llm.Tool{Name: "dup"}) a := New(newModel(t, fp), "", WithToolbox(box1), WithToolbox(box2)) _, err := a.Run(context.Background(), "go") if err == nil || !strings.Contains(err.Error(), `duplicate tool "dup"`) { t.Errorf("err = %v, want duplicate-tool error", err) } } func TestRunWithHistory(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Reply("continued")) history := []llm.Message{ llm.UserText("first question"), llm.AssistantText("first answer"), } a := New(newModel(t, fp), "") res, err := a.Run(context.Background(), "follow-up", WithHistory(history)) if err != nil { t.Fatalf("Run: %v", err) } got := fp.Calls()[0].Request.Messages if len(got) != 3 || got[0].Text() != "first question" || got[2].Text() != "follow-up" { t.Errorf("messages = %+v", got) } if len(res.Messages) != 4 { t.Errorf("transcript = %d, want history+input+answer", len(res.Messages)) } } func TestStepObservers(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", toolCallReply("c1", "add", `{"a":1,"b":2}`), fake.Reply("3"), ) var agentSteps, runSteps []int a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithStepObserver(func(s Step) { agentSteps = append(agentSteps, s.Index) }), ) _, err := a.Run(context.Background(), "1+2?", OnStep(func(s Step) { runSteps = append(runSteps, s.Index) }), ) if err != nil { t.Fatalf("Run: %v", err) } if fmt.Sprint(agentSteps) != "[0 1]" || fmt.Sprint(runSteps) != "[0 1]" { t.Errorf("agentSteps=%v runSteps=%v", agentSteps, runSteps) } } func TestObserverPanicIsSwallowed(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Reply("fine")) a := New(newModel(t, fp), "", WithStepObserver(func(Step) { panic("ui bug") })) res, err := a.Run(context.Background(), "go") if err != nil || res.Output != "fine" { t.Errorf("res=%+v err=%v — observer panic must not kill the run", res, err) } } func TestSkillComposition(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Reply("ok")) sk := stubSkill{ name: "haiku", instructions: "Answer in haiku.", tools: llm.NewToolbox("haiku-tools", llm.Tool{ Name: "count_syllables", Handler: func(context.Context, json.RawMessage) (any, error) { return 5, nil }, }), } a := New(newModel(t, fp), "Base prompt.", WithSkill(sk)) if _, err := a.Run(context.Background(), "hello"); err != nil { t.Fatalf("Run: %v", err) } req := fp.Calls()[0].Request if req.System != "Base prompt.\n\nAnswer in haiku." { t.Errorf("system = %q", req.System) } if len(req.Tools) != 1 || req.Tools[0].Name != "count_syllables" { t.Errorf("tools = %+v", req.Tools) } } func TestAddSkillOnDemand(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Reply("a"), fake.Reply("b")) a := New(newModel(t, fp), "Base.") if _, err := a.Run(context.Background(), "one"); err != nil { t.Fatalf("Run: %v", err) } a.AddSkill(stubSkill{name: "later", instructions: "Later skill."}) if _, err := a.Run(context.Background(), "two"); err != nil { t.Fatalf("Run: %v", err) } calls := fp.Calls() if calls[0].Request.System != "Base." { t.Errorf("first system = %q", calls[0].Request.System) } if calls[1].Request.System != "Base.\n\nLater skill." { t.Errorf("second system = %q", calls[1].Request.System) } } func TestRunErrorPreservesTranscript(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Fail(errors.New("model down"))) a := New(newModel(t, fp), "") res, err := a.Run(context.Background(), "go") if err == nil || !strings.Contains(err.Error(), "model down") { t.Fatalf("err = %v", err) } if res == nil || len(res.Messages) != 1 { t.Errorf("result = %+v, want transcript with the input", res) } } func TestEmptyInputNeedsHistory(t *testing.T) { fp := fake.New("fp") a := New(newModel(t, fp), "") if _, err := a.Run(context.Background(), ""); err == nil { t.Error("empty input with no history must error") } } type stubSkill struct { name string instructions string tools *llm.Toolbox } func (s stubSkill) Name() string { return s.name } func (s stubSkill) Instructions() string { return s.instructions } func (s stubSkill) Tools() *llm.Toolbox { return s.tools }