package agent import ( "context" "encoding/json" "errors" "strings" "sync/atomic" "testing" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" ) // TestMaxStepsFuncExtendsBudget: a supervisor raising the ceiling mid-run // lets the loop continue past the static budget. func TestMaxStepsFuncExtendsBudget(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", toolCallReply("c1", "add", `{"a":1,"b":1}`), toolCallReply("c2", "add", `{"a":2,"b":2}`), toolCallReply("c3", "add", `{"a":3,"b":3}`), fake.Reply("done"), ) var ceiling atomic.Int64 ceiling.Store(2) a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithMaxSteps(2), WithMaxStepsFunc(func() int { return int(ceiling.Load()) }), WithStepObserver(func(s Step) { if s.Index == 1 { ceiling.Store(10) // the "critic" extends the budget } }), ) res, err := a.Run(context.Background(), "go") if err != nil { t.Fatalf("Run: %v (budget should have been extended)", err) } if res.Output != "done" || len(res.Steps) != 4 { t.Errorf("output=%q steps=%d", res.Output, len(res.Steps)) } } // TestSteerInjectsMessages: steering messages appear in the conversation // before the next model call. func TestSteerInjectsMessages(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", toolCallReply("c1", "add", `{"a":1,"b":1}`), fake.Reply("ok"), ) var pending []llm.Message pending = append(pending, llm.UserText("SUPERVISOR: wrap it up")) a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t))) _, err := a.Run(context.Background(), "go", WithSteer(func() []llm.Message { out := pending pending = nil return out })) if err != nil { t.Fatalf("Run: %v", err) } first := fp.Calls()[0].Request.Messages if len(first) != 2 || !strings.Contains(first[1].Text(), "SUPERVISOR") { t.Errorf("first call messages = %+v, want steered message", first) } // Drained: second call must not duplicate it. second := fp.Calls()[1].Request.Messages count := 0 for _, m := range second { if strings.Contains(m.Text(), "SUPERVISOR") { count++ } } if count != 1 { t.Errorf("steer message appears %d times in second call, want 1", count) } } // TestCompactorShrinksOutboundContext: the model sees the compacted view; // the canonical transcript keeps everything. func TestCompactorShrinksOutboundContext(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Reply("answer")) history := []llm.Message{ llm.UserText("old 1"), llm.AssistantText("old reply 1"), llm.UserText("old 2"), llm.AssistantText("old reply 2"), } a := New(newModel(t, fp), "", WithCompactor(func(_ context.Context, msgs []llm.Message) ([]llm.Message, error) { // Keep only the last message, prefixed by a synthetic summary. return append([]llm.Message{llm.UserText("[summary of earlier conversation]")}, msgs[len(msgs)-1]), nil })) res, err := a.Run(context.Background(), "new question", WithHistory(history)) if err != nil { t.Fatalf("Run: %v", err) } sent := fp.Calls()[0].Request.Messages if len(sent) != 2 || !strings.Contains(sent[0].Text(), "summary") { t.Errorf("sent = %+v, want compacted view", sent) } if len(res.Messages) != 6 { t.Errorf("transcript = %d messages, want full uncompacted history", len(res.Messages)) } } // TestCompactorErrorIsNonFatal: a failing compactor falls back to the // original messages. func TestCompactorErrorIsNonFatal(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", fake.Reply("fine")) a := New(newModel(t, fp), "", WithCompactor(func(context.Context, []llm.Message) ([]llm.Message, error) { return nil, errors.New("summarizer down") })) res, err := a.Run(context.Background(), "go") if err != nil || res.Output != "fine" { t.Errorf("res=%v err=%v", res, err) } if len(fp.Calls()[0].Request.Messages) != 1 { t.Error("original messages must be sent when compaction fails") } } // TestConsecutiveToolErrorGuard: steps whose tools ALL fail trip the guard. func TestConsecutiveToolErrorGuard(t *testing.T) { fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step { return toolCallReply("c", "bomb", `{}`) })) bomb := llm.NewToolbox("danger", llm.Tool{ Name: "bomb", Handler: func(context.Context, json.RawMessage) (any, error) { return nil, errors.New("always fails") }, }) a := New(newModel(t, fp), "", WithToolbox(bomb), WithToolErrorLimits(2, 0), WithMaxSteps(10)) res, err := a.Run(context.Background(), "go") if !errors.Is(err, ErrToolLoop) { t.Fatalf("err = %v, want ErrToolLoop", err) } if len(res.Steps) != 2 { t.Errorf("steps = %d, want guard to trip after 2", len(res.Steps)) } } // TestSameCallRepeatGuard: identical (name+args) calls beyond the limit // trip the guard; varied calls do not. func TestSameCallRepeatGuard(t *testing.T) { fp := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step { return toolCallReply("c", "add", `{"a":1,"b":1}`) })) a := New(newModel(t, fp), "", WithToolbox(adderToolbox(t)), WithToolErrorLimits(0, 3), WithMaxSteps(10)) _, err := a.Run(context.Background(), "go") if !errors.Is(err, ErrToolLoop) || !strings.Contains(err.Error(), `"add"`) { t.Fatalf("err = %v, want repeat-guard ErrToolLoop naming add", err) } // Varied arguments never trip it. n := 0 fp2 := fake.New("fp", fake.WithDefault(func(string, llm.Request) fake.Step { n++ if n > 4 { return fake.Reply("done") } return toolCallReply("c", "add", `{"a":1,"b":`+string(rune('0'+n))+`}`) })) a2 := New(newModel(t, fp2), "", WithToolbox(adderToolbox(t)), WithToolErrorLimits(0, 3), WithMaxSteps(10)) if _, err := a2.Run(context.Background(), "go"); err != nil { t.Errorf("varied calls must not trip the guard: %v", err) } }