package llmmeta import ( "context" "errors" "strings" "sync" "testing" llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) // fakeStorage records every MetaCall handed to RecordMetaCall and // makes them available to tests via the captured slice. type fakeStorage struct { mu sync.Mutex calls []MetaCall err error } func (f *fakeStorage) RecordMetaCall(_ context.Context, call MetaCall) error { f.mu.Lock() defer f.mu.Unlock() f.calls = append(f.calls, call) return f.err } func (f *fakeStorage) snapshot() []MetaCall { f.mu.Lock() defer f.mu.Unlock() out := make([]MetaCall, len(f.calls)) copy(out, f.calls) return out } // TestCall_TierNotAllowed: a tier not in the allowlist returns the // rejection without recording a ledger row — the call did not happen. func TestCall_TierNotAllowed(t *testing.T) { store := &fakeStorage{} convars := ConvarReaderFunc(func(_ context.Context) []string { return []string{"fast"} }) h := New(store, convars) res, err := h.Call(context.Background(), CallSpec{ Tier: "thinking", UserPrompt: "hello", ToolName: "summarize", }) if err != nil { t.Fatalf("unexpected err: %v", err) } if res.Success { t.Errorf("expected Success=false") } if res.ErrorKind != ErrorKindTierNotAllowed { t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindTierNotAllowed) } if len(store.snapshot()) != 0 { t.Errorf("expected NO ledger row for tier_not_allowed, got %d", len(store.snapshot())) } } // TestCall_TierAllowedHappyText: a permitted tier yields a successful // text call AND records a ledger row. func TestCall_TierAllowedHappyText(t *testing.T) { store := &fakeStorage{} convars := ConvarReaderFunc(func(_ context.Context) []string { return []string{"fast"} }) h := New(store, convars) restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) { return "summary text here", Tokens{InputTokens: 50, OutputTokens: 12}, nil }) defer restore() res, err := h.Call(context.Background(), CallSpec{ Tier: "fast", UserPrompt: "summarise the following ...", ToolName: "summarize", ResponseFormat: "text", RunID: "run-1", SkillID: "sk-1", }) if err != nil { t.Fatalf("unexpected err: %v", err) } if !res.Success { t.Errorf("expected Success=true; got ErrorKind=%q", res.ErrorKind) } if res.Text != "summary text here" { t.Errorf("Text = %q, want %q", res.Text, "summary text here") } if res.InputTokens != 50 || res.OutputTokens != 12 { t.Errorf("token counts wrong: in=%d out=%d", res.InputTokens, res.OutputTokens) } if got := len(store.snapshot()); got != 1 { t.Fatalf("expected 1 ledger row, got %d", got) } row := store.snapshot()[0] if !row.Success { t.Errorf("ledger Success = false, want true") } if row.ToolName != "summarize" { t.Errorf("ledger ToolName = %q", row.ToolName) } if row.RunID != "run-1" { t.Errorf("ledger RunID = %q", row.RunID) } if row.InputTokens != 50 || row.OutputTokens != 12 { t.Errorf("ledger token counts wrong: in=%d out=%d", row.InputTokens, row.OutputTokens) } } // TestCall_JSONFirstAttemptParses: JSON-format request, response is // valid JSON on first try; result.Parsed populated. func TestCall_JSONFirstAttemptParses(t *testing.T) { store := &fakeStorage{} h := New(store, nil) restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) { return `{"foo":"bar","n":42}`, Tokens{InputTokens: 10, OutputTokens: 5}, nil }) defer restore() res, _ := h.Call(context.Background(), CallSpec{ UserPrompt: "extract entities", ToolName: "extract_entities", ResponseFormat: "json", RetryOnMalformedJSON: true, SkillID: "sk-2", }) if !res.Success || res.ErrorKind != "" { t.Fatalf("expected success, got %+v", res) } m, ok := res.Parsed.(map[string]any) if !ok { t.Fatalf("Parsed not a map: %T %v", res.Parsed, res.Parsed) } if m["foo"] != "bar" { t.Errorf("Parsed[foo] = %v", m["foo"]) } } // TestCall_JSONRetryPath: first response is malformed JSON; second // response (after stricter prompt) parses cleanly. func TestCall_JSONRetryPath(t *testing.T) { store := &fakeStorage{} h := New(store, nil) calls := 0 restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, prompt string, _ []llm.Option) (string, Tokens, error) { calls++ if calls == 1 { return "Here is your JSON: {oh no I forgot to format it", Tokens{InputTokens: 8, OutputTokens: 12}, nil } // Verify stricter prompt prefix appeared on retry. if !strings.Contains(prompt, "Return ONLY valid JSON") { t.Errorf("retry prompt missing stricter prefix: %q", prompt) } return `{"key":"value"}`, Tokens{InputTokens: 14, OutputTokens: 6}, nil }) defer restore() res, _ := h.Call(context.Background(), CallSpec{ UserPrompt: "extract", ToolName: "extract_entities", ResponseFormat: "json", RetryOnMalformedJSON: true, }) if !res.Success || res.ErrorKind != "" { t.Fatalf("expected success, got %+v", res) } if calls != 2 { t.Errorf("expected 2 LLM calls, got %d", calls) } m, _ := res.Parsed.(map[string]any) if m["key"] != "value" { t.Errorf("Parsed = %v", res.Parsed) } // Token counts should reflect both attempts. if res.InputTokens != 22 || res.OutputTokens != 18 { t.Errorf("combined tokens wrong: in=%d out=%d", res.InputTokens, res.OutputTokens) } } // TestCall_JSONRetryFailsTwice: second attempt also fails to parse. // Surfaces ErrorKind=malformed_json AND keeps Success=true so the // caller can fall back to result.Text. func TestCall_JSONRetryFailsTwice(t *testing.T) { store := &fakeStorage{} h := New(store, nil) restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) { return "still not JSON", Tokens{InputTokens: 10, OutputTokens: 4}, nil }) defer restore() res, _ := h.Call(context.Background(), CallSpec{ UserPrompt: "extract", ToolName: "extract_entities", ResponseFormat: "json", RetryOnMalformedJSON: true, }) if !res.Success { t.Errorf("expected Success=true (fall-back-to-text), got Success=false") } if res.ErrorKind != ErrorKindMalformedJSON { t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindMalformedJSON) } if res.Parsed != nil { t.Errorf("Parsed = %v, want nil after failed retry", res.Parsed) } rows := store.snapshot() if len(rows) != 1 { t.Fatalf("expected 1 ledger row, got %d", len(rows)) } if !rows[0].Success || rows[0].ErrorKind != ErrorKindMalformedJSON { t.Errorf("ledger row mismatch: %+v", rows[0]) } } // TestCall_LLMUnavailable: transport error from the model.Generate // call is surfaced as ErrorKind=llm_unavailable AND records a ledger // row. func TestCall_LLMUnavailable(t *testing.T) { store := &fakeStorage{} h := New(store, nil) restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) { return "", Tokens{}, errors.New("network error") }) defer restore() res, _ := h.Call(context.Background(), CallSpec{ UserPrompt: "hi", ToolName: "summarize", }) if res.Success { t.Errorf("expected Success=false") } if res.ErrorKind != ErrorKindLLMUnavailable { t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindLLMUnavailable) } rows := store.snapshot() if len(rows) != 1 { t.Fatalf("expected 1 ledger row, got %d", len(rows)) } } // TestCall_EmptyUserPromptErrors: programmer-error guard. func TestCall_EmptyUserPromptErrors(t *testing.T) { h := New(&fakeStorage{}, nil) _, err := h.Call(context.Background(), CallSpec{ToolName: "summarize"}) if err == nil { t.Fatal("expected error for empty user_prompt") } } // TestCall_JSONWithCodeFenceParses: tolerance for the first-attempt // response wrapped in a ```json ... ``` fence. The retry path uses a // stricter prompt; this test pins the first-attempt tolerance so // callers don't waste a round-trip on a benign formatting wrapper. func TestCall_JSONWithCodeFenceParses(t *testing.T) { store := &fakeStorage{} h := New(store, nil) restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) { return "```json\n{\"x\":1}\n```", Tokens{InputTokens: 5, OutputTokens: 4}, nil }) defer restore() res, _ := h.Call(context.Background(), CallSpec{ UserPrompt: "extract", ToolName: "extract_entities", ResponseFormat: "json", RetryOnMalformedJSON: true, }) if res.ErrorKind != "" { t.Errorf("unexpected ErrorKind %q (fenced JSON should parse on first attempt)", res.ErrorKind) } m, _ := res.Parsed.(map[string]any) if m["x"] != float64(1) { t.Errorf("Parsed[x] = %v, want 1", m["x"]) } }