package agent import ( "context" "encoding/json" "strings" "testing" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" ) func TestIsWeakFinal(t *testing.T) { long := strings.Repeat("As I said, this is the full answer. ", 6) // >120, contains "as i said" cases := []struct { name string in string want bool }{ {"empty", "", true}, {"whitespace", " \n\t ", true}, {"already-answered", "(Already answered above.)", true}, {"see-above", "see above", true}, {"as-i-said-short", "As I said, it's 60 minutes.", true}, {"crisp-number", "42", false}, {"crisp-yes", "Yes.", false}, {"crisp-status", "It's down, restarting now.", false}, {"long-with-as-i-said", long, false}, // >120 chars: not weak despite the phrase } for _, c := range cases { t.Run(c.name, func(t *testing.T) { if got := isWeakFinal(c.in); got != c.want { t.Errorf("isWeakFinal(%q) = %v, want %v", c.in, got, c.want) } }) } } func asst(text string, tools ...llm.ToolCall) llm.Message { m := llm.Message{Role: llm.RoleAssistant} if text != "" { m.Parts = []llm.Part{llm.Text(text)} } m.ToolCalls = tools return m } func TestFinalOutput(t *testing.T) { cite := []llm.ToolCall{{ID: "c1", Name: "cite", Arguments: json.RawMessage(`{}`)}} longAnswer := strings.TrimSpace(strings.Repeat("Free group calls are capped at sixty minutes. ", 6)) // >200 tests := []struct { name string msgs []llm.Message terminal string want string }{ { name: "front-loaded answer recovered over back-ref closer", msgs: []llm.Message{ llm.UserText("q?"), asst(longAnswer, cite...), llm.ToolResultsMessage(llm.ToolResult{ID: "c1", Name: "cite", Content: "ok"}), asst("(Already answered above.)"), }, terminal: "(Already answered above.)", want: longAnswer, }, { name: "empty terminal recovers prior substantive answer", msgs: []llm.Message{ llm.UserText("q?"), asst(longAnswer, cite...), llm.ToolResultsMessage(llm.ToolResult{ID: "c1", Name: "cite", Content: "ok"}), asst(""), }, terminal: "", want: longAnswer, }, { name: "healthy terminal answer is unchanged", msgs: []llm.Message{ llm.UserText("q?"), asst("Let me check.", cite...), llm.ToolResultsMessage(llm.ToolResult{ID: "c1", Name: "cite", Content: "ok"}), asst(longAnswer), }, terminal: longAnswer, want: longAnswer, }, { name: "short crisp answer not overridden by a short preamble prior", msgs: []llm.Message{ llm.UserText("is it up?"), asst("Let me check the server status.", cite...), llm.ToolResultsMessage(llm.ToolResult{ID: "c1", Name: "cite", Content: "ok"}), asst("It's down, restarting now."), }, terminal: "It's down, restarting now.", // not weak → returned as-is want: "It's down, restarting now.", }, { name: "weak terminal but only a preamble prior: no recovery", msgs: []llm.Message{ llm.UserText("q?"), asst("Let me look that up for you.", cite...), llm.ToolResultsMessage(llm.ToolResult{ID: "c1", Name: "cite", Content: "ok"}), asst("(see above)"), }, terminal: "(see above)", want: "(see above)", // preamble excluded; falls back to terminal }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { if got := finalOutput(tc.msgs, tc.terminal); got != tc.want { t.Errorf("finalOutput = %q, want %q", got, tc.want) } }) } } func citeToolbox(t *testing.T) *llm.Toolbox { t.Helper() return llm.NewToolbox("sources", llm.Tool{ Name: "cite", Description: "Record a citation.", Parameters: json.RawMessage(`{"type":"object","properties":{}}`), Handler: func(_ context.Context, _ json.RawMessage) (any, error) { return map[string]bool{"ok": true}, nil }, }) } // TestRun_RecoversFrontLoadedAnswer reproduces the glm-5.2 shape end-to-end: a // turn carrying the full answer text AND a tool call, then a degenerate // terminal turn. The recovered answer must be delivered with no extra model // call (zero-cost recovery from the transcript). func TestRun_RecoversFrontLoadedAnswer(t *testing.T) { longAnswer := strings.TrimSpace(strings.Repeat("Free group calls are capped at sixty minutes. ", 6)) fp := fake.New("fp") fp.Enqueue("test-model", fake.ReplyWith(llm.Response{ Parts: []llm.Part{llm.Text(longAnswer)}, ToolCalls: []llm.ToolCall{{ID: "c1", Name: "cite", Arguments: json.RawMessage(`{}`)}}, FinishReason: llm.FinishToolCalls, Usage: llm.Usage{InputTokens: 10, OutputTokens: 5}, }), fake.Reply("(Already answered above.)"), ) a := New(newModel(t, fp), "sys", WithToolbox(citeToolbox(t))) res, err := a.Run(context.Background(), "is there a meet time limit?") if err != nil { t.Fatalf("Run: %v", err) } if res.Output != longAnswer { t.Errorf("Output = %q, want recovered front-loaded answer", res.Output) } if n := len(fp.Calls()); n != 2 { t.Errorf("model calls = %d, want 2 (no extra nudge turn)", n) } } // TestRun_HealthyTerminalUnchanged guards against regressing the normal case: // a deferred answer in the terminal turn is delivered verbatim. func TestRun_HealthyTerminalUnchanged(t *testing.T) { fp := fake.New("fp") fp.Enqueue("test-model", toolCallReply("c1", "cite", `{}`), fake.Reply("The limit is 60 minutes for free group calls."), ) a := New(newModel(t, fp), "sys", WithToolbox(citeToolbox(t))) res, err := a.Run(context.Background(), "q?") if err != nil { t.Fatalf("Run: %v", err) } if res.Output != "The limit is 60 minutes for free group calls." { t.Errorf("Output = %q, want terminal answer unchanged", res.Output) } }