executus/llmmeta/helper_test.go

package llmmeta

import (
	"context"
	"errors"
	"strings"
	"sync"
	"testing"

	llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

// fakeStorage records every MetaCall handed to RecordMetaCall and
// makes them available to tests via the captured slice.
type fakeStorage struct {
	mu    sync.Mutex
	calls []MetaCall
	err   error
}

func (f *fakeStorage) RecordMetaCall(_ context.Context, call MetaCall) error {
	f.mu.Lock()
	defer f.mu.Unlock()
	f.calls = append(f.calls, call)
	return f.err
}

func (f *fakeStorage) snapshot() []MetaCall {
	f.mu.Lock()
	defer f.mu.Unlock()
	out := make([]MetaCall, len(f.calls))
	copy(out, f.calls)
	return out
}

// TestCall_TierNotAllowed: a tier not in the allowlist returns the
// rejection without recording a ledger row — the call did not happen.
func TestCall_TierNotAllowed(t *testing.T) {
	store := &fakeStorage{}
	convars := ConvarReaderFunc(func(_ context.Context) []string {
		return []string{"fast"}
	})
	h := New(store, convars)

	res, err := h.Call(context.Background(), CallSpec{
		Tier:       "thinking",
		UserPrompt: "hello",
		ToolName:   "summarize",
	})
	if err != nil {
		t.Fatalf("unexpected err: %v", err)
	}
	if res.Success {
		t.Errorf("expected Success=false")
	}
	if res.ErrorKind != ErrorKindTierNotAllowed {
		t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindTierNotAllowed)
	}
	if len(store.snapshot()) != 0 {
		t.Errorf("expected NO ledger row for tier_not_allowed, got %d", len(store.snapshot()))
	}
}

// TestCall_TierAllowedHappyText: a permitted tier yields a successful
// text call AND records a ledger row.
func TestCall_TierAllowedHappyText(t *testing.T) {
	store := &fakeStorage{}
	convars := ConvarReaderFunc(func(_ context.Context) []string {
		return []string{"fast"}
	})
	h := New(store, convars)
	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
		return "summary text here", Tokens{InputTokens: 50, OutputTokens: 12}, nil
	})
	defer restore()

	res, err := h.Call(context.Background(), CallSpec{
		Tier:           "fast",
		UserPrompt:     "summarise the following ...",
		ToolName:       "summarize",
		ResponseFormat: "text",
		RunID:          "run-1",
		SkillID:        "sk-1",
	})
	if err != nil {
		t.Fatalf("unexpected err: %v", err)
	}
	if !res.Success {
		t.Errorf("expected Success=true; got ErrorKind=%q", res.ErrorKind)
	}
	if res.Text != "summary text here" {
		t.Errorf("Text = %q, want %q", res.Text, "summary text here")
	}
	if res.InputTokens != 50 || res.OutputTokens != 12 {
		t.Errorf("token counts wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
	}
	if got := len(store.snapshot()); got != 1 {
		t.Fatalf("expected 1 ledger row, got %d", got)
	}
	row := store.snapshot()[0]
	if !row.Success {
		t.Errorf("ledger Success = false, want true")
	}
	if row.ToolName != "summarize" {
		t.Errorf("ledger ToolName = %q", row.ToolName)
	}
	if row.RunID != "run-1" {
		t.Errorf("ledger RunID = %q", row.RunID)
	}
	if row.InputTokens != 50 || row.OutputTokens != 12 {
		t.Errorf("ledger token counts wrong: in=%d out=%d",
			row.InputTokens, row.OutputTokens)
	}
}

// TestCall_JSONFirstAttemptParses: JSON-format request, response is
// valid JSON on first try; result.Parsed populated.
func TestCall_JSONFirstAttemptParses(t *testing.T) {
	store := &fakeStorage{}
	h := New(store, nil)
	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
		return `{"foo":"bar","n":42}`, Tokens{InputTokens: 10, OutputTokens: 5}, nil
	})
	defer restore()

	res, _ := h.Call(context.Background(), CallSpec{
		UserPrompt:           "extract entities",
		ToolName:             "extract_entities",
		ResponseFormat:       "json",
		RetryOnMalformedJSON: true,
		SkillID:              "sk-2",
	})
	if !res.Success || res.ErrorKind != "" {
		t.Fatalf("expected success, got %+v", res)
	}
	m, ok := res.Parsed.(map[string]any)
	if !ok {
		t.Fatalf("Parsed not a map: %T %v", res.Parsed, res.Parsed)
	}
	if m["foo"] != "bar" {
		t.Errorf("Parsed[foo] = %v", m["foo"])
	}
}

// TestCall_JSONRetryPath: first response is malformed JSON; second
// response (after stricter prompt) parses cleanly.
func TestCall_JSONRetryPath(t *testing.T) {
	store := &fakeStorage{}
	h := New(store, nil)
	calls := 0
	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, prompt string, _ []llm.Option) (string, Tokens, error) {
		calls++
		if calls == 1 {
			return "Here is your JSON: {oh no I forgot to format it", Tokens{InputTokens: 8, OutputTokens: 12}, nil
		}
		// Verify stricter prompt prefix appeared on retry.
		if !strings.Contains(prompt, "Return ONLY valid JSON") {
			t.Errorf("retry prompt missing stricter prefix: %q", prompt)
		}
		return `{"key":"value"}`, Tokens{InputTokens: 14, OutputTokens: 6}, nil
	})
	defer restore()

	res, _ := h.Call(context.Background(), CallSpec{
		UserPrompt:           "extract",
		ToolName:             "extract_entities",
		ResponseFormat:       "json",
		RetryOnMalformedJSON: true,
	})
	if !res.Success || res.ErrorKind != "" {
		t.Fatalf("expected success, got %+v", res)
	}
	if calls != 2 {
		t.Errorf("expected 2 LLM calls, got %d", calls)
	}
	m, _ := res.Parsed.(map[string]any)
	if m["key"] != "value" {
		t.Errorf("Parsed = %v", res.Parsed)
	}
	// Token counts should reflect both attempts.
	if res.InputTokens != 22 || res.OutputTokens != 18 {
		t.Errorf("combined tokens wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
	}
}

// TestCall_JSONRetryFailsTwice: second attempt also fails to parse.
// Surfaces ErrorKind=malformed_json AND keeps Success=true so the
// caller can fall back to result.Text.
func TestCall_JSONRetryFailsTwice(t *testing.T) {
	store := &fakeStorage{}
	h := New(store, nil)
	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
		return "still not JSON", Tokens{InputTokens: 10, OutputTokens: 4}, nil
	})
	defer restore()

	res, _ := h.Call(context.Background(), CallSpec{
		UserPrompt:           "extract",
		ToolName:             "extract_entities",
		ResponseFormat:       "json",
		RetryOnMalformedJSON: true,
	})
	if !res.Success {
		t.Errorf("expected Success=true (fall-back-to-text), got Success=false")
	}
	if res.ErrorKind != ErrorKindMalformedJSON {
		t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindMalformedJSON)
	}
	if res.Parsed != nil {
		t.Errorf("Parsed = %v, want nil after failed retry", res.Parsed)
	}
	rows := store.snapshot()
	if len(rows) != 1 {
		t.Fatalf("expected 1 ledger row, got %d", len(rows))
	}
	if !rows[0].Success || rows[0].ErrorKind != ErrorKindMalformedJSON {
		t.Errorf("ledger row mismatch: %+v", rows[0])
	}
}

// TestCall_LLMUnavailable: transport error from the model.Generate
// call is surfaced as ErrorKind=llm_unavailable AND records a ledger
// row.
func TestCall_LLMUnavailable(t *testing.T) {
	store := &fakeStorage{}
	h := New(store, nil)
	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
		return "", Tokens{}, errors.New("network error")
	})
	defer restore()

	res, _ := h.Call(context.Background(), CallSpec{
		UserPrompt: "hi",
		ToolName:   "summarize",
	})
	if res.Success {
		t.Errorf("expected Success=false")
	}
	if res.ErrorKind != ErrorKindLLMUnavailable {
		t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindLLMUnavailable)
	}
	rows := store.snapshot()
	if len(rows) != 1 {
		t.Fatalf("expected 1 ledger row, got %d", len(rows))
	}
}

// TestCall_EmptyUserPromptErrors: programmer-error guard.
func TestCall_EmptyUserPromptErrors(t *testing.T) {
	h := New(&fakeStorage{}, nil)
	_, err := h.Call(context.Background(), CallSpec{ToolName: "summarize"})
	if err == nil {
		t.Fatal("expected error for empty user_prompt")
	}
}

// TestCall_JSONWithCodeFenceParses: tolerance for the first-attempt
// response wrapped in a ```json ... ``` fence. The retry path uses a
// stricter prompt; this test pins the first-attempt tolerance so
// callers don't waste a round-trip on a benign formatting wrapper.
func TestCall_JSONWithCodeFenceParses(t *testing.T) {
	store := &fakeStorage{}
	h := New(store, nil)
	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
		return "```json\n{\"x\":1}\n```", Tokens{InputTokens: 5, OutputTokens: 4}, nil
	})
	defer restore()

	res, _ := h.Call(context.Background(), CallSpec{
		UserPrompt:           "extract",
		ToolName:             "extract_entities",
		ResponseFormat:       "json",
		RetryOnMalformedJSON: true,
	})
	if res.ErrorKind != "" {
		t.Errorf("unexpected ErrorKind %q (fenced JSON should parse on first attempt)", res.ErrorKind)
	}
	m, _ := res.Parsed.(map[string]any)
	if m["x"] != float64(1) {
		t.Errorf("Parsed[x] = %v, want 1", m["x"])
	}
}