P1: model layer (convar->config inversion) + llmmeta

Lifts mort's pkg/logic/llms into executus/model, decoupled from mort: - tiers.go: the tier resolver now reads a host-supplied config.Source under "model.tier.<name>" with host-supplied fallbacks (Configure(cfg, defaults, ttl)), instead of convar.Manager. Tier NAMES + specs are host config; the resolution mechanism (cache, reasoning-suffix dialect, chain validation) is generic. No tier names hard-coded in the harness. - sink.go: usage/trace recording inverted off mort's llmusage/llmtrace into UsageSink / TraceSink seams + a model-owned Span, with nil-safe context attribution helpers (WithModel/WithTraceID/WithUsageTool/WithUsageUser). Both sinks optional (nil = off) so a light host records nothing. - lane decoration repointed to executus/lane; utils.Errorf -> fmt.Errorf. - call.go keeps GenerateWith[T] (instrumented structured output) — this is the structured-output primitive; no separate structured/ package. - llmmeta moved over model/ (the meta-LLM helper: tier allowlist + JSON retry + ledger). Its tests configure a minimal tier table via TestMain. New tests cover the inversion: config overrides fallback, tier registration, reasoning-suffix survival, nested-tier rejection, nil-sink no-ops. Full module: go build/vet/test -race green; core go.sum still free of gorm/redis/discordgo/sqlite. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 19:47:13 -04:00
parent 741d7816ed
commit b424261aca
17 changed files with 3698 additions and 3 deletions
@@ -0,0 +1,282 @@
+package llmmeta
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"sync"
+	"testing"
+
+	llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// fakeStorage records every MetaCall handed to RecordMetaCall and
+// makes them available to tests via the captured slice.
+type fakeStorage struct {
+	mu    sync.Mutex
+	calls []MetaCall
+	err   error
+}
+
+func (f *fakeStorage) RecordMetaCall(_ context.Context, call MetaCall) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	f.calls = append(f.calls, call)
+	return f.err
+}
+
+func (f *fakeStorage) snapshot() []MetaCall {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	out := make([]MetaCall, len(f.calls))
+	copy(out, f.calls)
+	return out
+}
+
+// TestCall_TierNotAllowed: a tier not in the allowlist returns the
+// rejection without recording a ledger row — the call did not happen.
+func TestCall_TierNotAllowed(t *testing.T) {
+	store := &fakeStorage{}
+	convars := ConvarReaderFunc(func(_ context.Context) []string {
+		return []string{"fast"}
+	})
+	h := New(store, convars)
+
+	res, err := h.Call(context.Background(), CallSpec{
+		Tier:       "thinking",
+		UserPrompt: "hello",
+		ToolName:   "summarize",
+	})
+	if err != nil {
+		t.Fatalf("unexpected err: %v", err)
+	}
+	if res.Success {
+		t.Errorf("expected Success=false")
+	}
+	if res.ErrorKind != ErrorKindTierNotAllowed {
+		t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindTierNotAllowed)
+	}
+	if len(store.snapshot()) != 0 {
+		t.Errorf("expected NO ledger row for tier_not_allowed, got %d", len(store.snapshot()))
+	}
+}
+
+// TestCall_TierAllowedHappyText: a permitted tier yields a successful
+// text call AND records a ledger row.
+func TestCall_TierAllowedHappyText(t *testing.T) {
+	store := &fakeStorage{}
+	convars := ConvarReaderFunc(func(_ context.Context) []string {
+		return []string{"fast"}
+	})
+	h := New(store, convars)
+	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
+		return "summary text here", Tokens{InputTokens: 50, OutputTokens: 12}, nil
+	})
+	defer restore()
+
+	res, err := h.Call(context.Background(), CallSpec{
+		Tier:           "fast",
+		UserPrompt:     "summarise the following ...",
+		ToolName:       "summarize",
+		ResponseFormat: "text",
+		RunID:          "run-1",
+		SkillID:        "sk-1",
+	})
+	if err != nil {
+		t.Fatalf("unexpected err: %v", err)
+	}
+	if !res.Success {
+		t.Errorf("expected Success=true; got ErrorKind=%q", res.ErrorKind)
+	}
+	if res.Text != "summary text here" {
+		t.Errorf("Text = %q, want %q", res.Text, "summary text here")
+	}
+	if res.InputTokens != 50 || res.OutputTokens != 12 {
+		t.Errorf("token counts wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
+	}
+	if got := len(store.snapshot()); got != 1 {
+		t.Fatalf("expected 1 ledger row, got %d", got)
+	}
+	row := store.snapshot()[0]
+	if !row.Success {
+		t.Errorf("ledger Success = false, want true")
+	}
+	if row.ToolName != "summarize" {
+		t.Errorf("ledger ToolName = %q", row.ToolName)
+	}
+	if row.RunID != "run-1" {
+		t.Errorf("ledger RunID = %q", row.RunID)
+	}
+	if row.InputTokens != 50 || row.OutputTokens != 12 {
+		t.Errorf("ledger token counts wrong: in=%d out=%d",
+			row.InputTokens, row.OutputTokens)
+	}
+}
+
+// TestCall_JSONFirstAttemptParses: JSON-format request, response is
+// valid JSON on first try; result.Parsed populated.
+func TestCall_JSONFirstAttemptParses(t *testing.T) {
+	store := &fakeStorage{}
+	h := New(store, nil)
+	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
+		return `{"foo":"bar","n":42}`, Tokens{InputTokens: 10, OutputTokens: 5}, nil
+	})
+	defer restore()
+
+	res, _ := h.Call(context.Background(), CallSpec{
+		UserPrompt:           "extract entities",
+		ToolName:             "extract_entities",
+		ResponseFormat:       "json",
+		RetryOnMalformedJSON: true,
+		SkillID:              "sk-2",
+	})
+	if !res.Success || res.ErrorKind != "" {
+		t.Fatalf("expected success, got %+v", res)
+	}
+	m, ok := res.Parsed.(map[string]any)
+	if !ok {
+		t.Fatalf("Parsed not a map: %T %v", res.Parsed, res.Parsed)
+	}
+	if m["foo"] != "bar" {
+		t.Errorf("Parsed[foo] = %v", m["foo"])
+	}
+}
+
+// TestCall_JSONRetryPath: first response is malformed JSON; second
+// response (after stricter prompt) parses cleanly.
+func TestCall_JSONRetryPath(t *testing.T) {
+	store := &fakeStorage{}
+	h := New(store, nil)
+	calls := 0
+	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, prompt string, _ []llm.Option) (string, Tokens, error) {
+		calls++
+		if calls == 1 {
+			return "Here is your JSON: {oh no I forgot to format it", Tokens{InputTokens: 8, OutputTokens: 12}, nil
+		}
+		// Verify stricter prompt prefix appeared on retry.
+		if !strings.Contains(prompt, "Return ONLY valid JSON") {
+			t.Errorf("retry prompt missing stricter prefix: %q", prompt)
+		}
+		return `{"key":"value"}`, Tokens{InputTokens: 14, OutputTokens: 6}, nil
+	})
+	defer restore()
+
+	res, _ := h.Call(context.Background(), CallSpec{
+		UserPrompt:           "extract",
+		ToolName:             "extract_entities",
+		ResponseFormat:       "json",
+		RetryOnMalformedJSON: true,
+	})
+	if !res.Success || res.ErrorKind != "" {
+		t.Fatalf("expected success, got %+v", res)
+	}
+	if calls != 2 {
+		t.Errorf("expected 2 LLM calls, got %d", calls)
+	}
+	m, _ := res.Parsed.(map[string]any)
+	if m["key"] != "value" {
+		t.Errorf("Parsed = %v", res.Parsed)
+	}
+	// Token counts should reflect both attempts.
+	if res.InputTokens != 22 || res.OutputTokens != 18 {
+		t.Errorf("combined tokens wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
+	}
+}
+
+// TestCall_JSONRetryFailsTwice: second attempt also fails to parse.
+// Surfaces ErrorKind=malformed_json AND keeps Success=true so the
+// caller can fall back to result.Text.
+func TestCall_JSONRetryFailsTwice(t *testing.T) {
+	store := &fakeStorage{}
+	h := New(store, nil)
+	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
+		return "still not JSON", Tokens{InputTokens: 10, OutputTokens: 4}, nil
+	})
+	defer restore()
+
+	res, _ := h.Call(context.Background(), CallSpec{
+		UserPrompt:           "extract",
+		ToolName:             "extract_entities",
+		ResponseFormat:       "json",
+		RetryOnMalformedJSON: true,
+	})
+	if !res.Success {
+		t.Errorf("expected Success=true (fall-back-to-text), got Success=false")
+	}
+	if res.ErrorKind != ErrorKindMalformedJSON {
+		t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindMalformedJSON)
+	}
+	if res.Parsed != nil {
+		t.Errorf("Parsed = %v, want nil after failed retry", res.Parsed)
+	}
+	rows := store.snapshot()
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 ledger row, got %d", len(rows))
+	}
+	if !rows[0].Success || rows[0].ErrorKind != ErrorKindMalformedJSON {
+		t.Errorf("ledger row mismatch: %+v", rows[0])
+	}
+}
+
+// TestCall_LLMUnavailable: transport error from the model.Generate
+// call is surfaced as ErrorKind=llm_unavailable AND records a ledger
+// row.
+func TestCall_LLMUnavailable(t *testing.T) {
+	store := &fakeStorage{}
+	h := New(store, nil)
+	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
+		return "", Tokens{}, errors.New("network error")
+	})
+	defer restore()
+
+	res, _ := h.Call(context.Background(), CallSpec{
+		UserPrompt: "hi",
+		ToolName:   "summarize",
+	})
+	if res.Success {
+		t.Errorf("expected Success=false")
+	}
+	if res.ErrorKind != ErrorKindLLMUnavailable {
+		t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindLLMUnavailable)
+	}
+	rows := store.snapshot()
+	if len(rows) != 1 {
+		t.Fatalf("expected 1 ledger row, got %d", len(rows))
+	}
+}
+
+// TestCall_EmptyUserPromptErrors: programmer-error guard.
+func TestCall_EmptyUserPromptErrors(t *testing.T) {
+	h := New(&fakeStorage{}, nil)
+	_, err := h.Call(context.Background(), CallSpec{ToolName: "summarize"})
+	if err == nil {
+		t.Fatal("expected error for empty user_prompt")
+	}
+}
+
+// TestCall_JSONWithCodeFenceParses: tolerance for the first-attempt
+// response wrapped in a ```json ... ``` fence. The retry path uses a
+// stricter prompt; this test pins the first-attempt tolerance so
+// callers don't waste a round-trip on a benign formatting wrapper.
+func TestCall_JSONWithCodeFenceParses(t *testing.T) {
+	store := &fakeStorage{}
+	h := New(store, nil)
+	restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
+		return "```json\n{\"x\":1}\n```", Tokens{InputTokens: 5, OutputTokens: 4}, nil
+	})
+	defer restore()
+
+	res, _ := h.Call(context.Background(), CallSpec{
+		UserPrompt:           "extract",
+		ToolName:             "extract_entities",
+		ResponseFormat:       "json",
+		RetryOnMalformedJSON: true,
+	})
+	if res.ErrorKind != "" {
+		t.Errorf("unexpected ErrorKind %q (fenced JSON should parse on first attempt)", res.ErrorKind)
+	}
+	m, _ := res.Parsed.(map[string]any)
+	if m["x"] != float64(1) {
+		t.Errorf("Parsed[x] = %v, want 1", m["x"])
+	}
+}