P1: model layer (convar->config inversion) + llmmeta
Lifts mort's pkg/logic/llms into executus/model, decoupled from mort: - tiers.go: the tier resolver now reads a host-supplied config.Source under "model.tier.<name>" with host-supplied fallbacks (Configure(cfg, defaults, ttl)), instead of convar.Manager. Tier NAMES + specs are host config; the resolution mechanism (cache, reasoning-suffix dialect, chain validation) is generic. No tier names hard-coded in the harness. - sink.go: usage/trace recording inverted off mort's llmusage/llmtrace into UsageSink / TraceSink seams + a model-owned Span, with nil-safe context attribution helpers (WithModel/WithTraceID/WithUsageTool/WithUsageUser). Both sinks optional (nil = off) so a light host records nothing. - lane decoration repointed to executus/lane; utils.Errorf -> fmt.Errorf. - call.go keeps GenerateWith[T] (instrumented structured output) — this is the structured-output primitive; no separate structured/ package. - llmmeta moved over model/ (the meta-LLM helper: tier allowlist + JSON retry + ledger). Its tests configure a minimal tier table via TestMain. New tests cover the inversion: config overrides fallback, tier registration, reasoning-suffix survival, nested-tier rejection, nil-sink no-ops. Full module: go build/vet/test -race green; core go.sum still free of gorm/redis/discordgo/sqlite. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit was merged in pull request #1.
This commit is contained in:
@@ -0,0 +1,282 @@
|
||||
package llmmeta
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// fakeStorage records every MetaCall handed to RecordMetaCall and
|
||||
// makes them available to tests via the captured slice.
|
||||
type fakeStorage struct {
|
||||
mu sync.Mutex
|
||||
calls []MetaCall
|
||||
err error
|
||||
}
|
||||
|
||||
func (f *fakeStorage) RecordMetaCall(_ context.Context, call MetaCall) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.calls = append(f.calls, call)
|
||||
return f.err
|
||||
}
|
||||
|
||||
func (f *fakeStorage) snapshot() []MetaCall {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
out := make([]MetaCall, len(f.calls))
|
||||
copy(out, f.calls)
|
||||
return out
|
||||
}
|
||||
|
||||
// TestCall_TierNotAllowed: a tier not in the allowlist returns the
|
||||
// rejection without recording a ledger row — the call did not happen.
|
||||
func TestCall_TierNotAllowed(t *testing.T) {
|
||||
store := &fakeStorage{}
|
||||
convars := ConvarReaderFunc(func(_ context.Context) []string {
|
||||
return []string{"fast"}
|
||||
})
|
||||
h := New(store, convars)
|
||||
|
||||
res, err := h.Call(context.Background(), CallSpec{
|
||||
Tier: "thinking",
|
||||
UserPrompt: "hello",
|
||||
ToolName: "summarize",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if res.Success {
|
||||
t.Errorf("expected Success=false")
|
||||
}
|
||||
if res.ErrorKind != ErrorKindTierNotAllowed {
|
||||
t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindTierNotAllowed)
|
||||
}
|
||||
if len(store.snapshot()) != 0 {
|
||||
t.Errorf("expected NO ledger row for tier_not_allowed, got %d", len(store.snapshot()))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCall_TierAllowedHappyText: a permitted tier yields a successful
|
||||
// text call AND records a ledger row.
|
||||
func TestCall_TierAllowedHappyText(t *testing.T) {
|
||||
store := &fakeStorage{}
|
||||
convars := ConvarReaderFunc(func(_ context.Context) []string {
|
||||
return []string{"fast"}
|
||||
})
|
||||
h := New(store, convars)
|
||||
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
||||
return "summary text here", Tokens{InputTokens: 50, OutputTokens: 12}, nil
|
||||
})
|
||||
defer restore()
|
||||
|
||||
res, err := h.Call(context.Background(), CallSpec{
|
||||
Tier: "fast",
|
||||
UserPrompt: "summarise the following ...",
|
||||
ToolName: "summarize",
|
||||
ResponseFormat: "text",
|
||||
RunID: "run-1",
|
||||
SkillID: "sk-1",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !res.Success {
|
||||
t.Errorf("expected Success=true; got ErrorKind=%q", res.ErrorKind)
|
||||
}
|
||||
if res.Text != "summary text here" {
|
||||
t.Errorf("Text = %q, want %q", res.Text, "summary text here")
|
||||
}
|
||||
if res.InputTokens != 50 || res.OutputTokens != 12 {
|
||||
t.Errorf("token counts wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
|
||||
}
|
||||
if got := len(store.snapshot()); got != 1 {
|
||||
t.Fatalf("expected 1 ledger row, got %d", got)
|
||||
}
|
||||
row := store.snapshot()[0]
|
||||
if !row.Success {
|
||||
t.Errorf("ledger Success = false, want true")
|
||||
}
|
||||
if row.ToolName != "summarize" {
|
||||
t.Errorf("ledger ToolName = %q", row.ToolName)
|
||||
}
|
||||
if row.RunID != "run-1" {
|
||||
t.Errorf("ledger RunID = %q", row.RunID)
|
||||
}
|
||||
if row.InputTokens != 50 || row.OutputTokens != 12 {
|
||||
t.Errorf("ledger token counts wrong: in=%d out=%d",
|
||||
row.InputTokens, row.OutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCall_JSONFirstAttemptParses: JSON-format request, response is
|
||||
// valid JSON on first try; result.Parsed populated.
|
||||
func TestCall_JSONFirstAttemptParses(t *testing.T) {
|
||||
store := &fakeStorage{}
|
||||
h := New(store, nil)
|
||||
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
||||
return `{"foo":"bar","n":42}`, Tokens{InputTokens: 10, OutputTokens: 5}, nil
|
||||
})
|
||||
defer restore()
|
||||
|
||||
res, _ := h.Call(context.Background(), CallSpec{
|
||||
UserPrompt: "extract entities",
|
||||
ToolName: "extract_entities",
|
||||
ResponseFormat: "json",
|
||||
RetryOnMalformedJSON: true,
|
||||
SkillID: "sk-2",
|
||||
})
|
||||
if !res.Success || res.ErrorKind != "" {
|
||||
t.Fatalf("expected success, got %+v", res)
|
||||
}
|
||||
m, ok := res.Parsed.(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("Parsed not a map: %T %v", res.Parsed, res.Parsed)
|
||||
}
|
||||
if m["foo"] != "bar" {
|
||||
t.Errorf("Parsed[foo] = %v", m["foo"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestCall_JSONRetryPath: first response is malformed JSON; second
|
||||
// response (after stricter prompt) parses cleanly.
|
||||
func TestCall_JSONRetryPath(t *testing.T) {
|
||||
store := &fakeStorage{}
|
||||
h := New(store, nil)
|
||||
calls := 0
|
||||
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, prompt string, _ []llm.Option) (string, Tokens, error) {
|
||||
calls++
|
||||
if calls == 1 {
|
||||
return "Here is your JSON: {oh no I forgot to format it", Tokens{InputTokens: 8, OutputTokens: 12}, nil
|
||||
}
|
||||
// Verify stricter prompt prefix appeared on retry.
|
||||
if !strings.Contains(prompt, "Return ONLY valid JSON") {
|
||||
t.Errorf("retry prompt missing stricter prefix: %q", prompt)
|
||||
}
|
||||
return `{"key":"value"}`, Tokens{InputTokens: 14, OutputTokens: 6}, nil
|
||||
})
|
||||
defer restore()
|
||||
|
||||
res, _ := h.Call(context.Background(), CallSpec{
|
||||
UserPrompt: "extract",
|
||||
ToolName: "extract_entities",
|
||||
ResponseFormat: "json",
|
||||
RetryOnMalformedJSON: true,
|
||||
})
|
||||
if !res.Success || res.ErrorKind != "" {
|
||||
t.Fatalf("expected success, got %+v", res)
|
||||
}
|
||||
if calls != 2 {
|
||||
t.Errorf("expected 2 LLM calls, got %d", calls)
|
||||
}
|
||||
m, _ := res.Parsed.(map[string]any)
|
||||
if m["key"] != "value" {
|
||||
t.Errorf("Parsed = %v", res.Parsed)
|
||||
}
|
||||
// Token counts should reflect both attempts.
|
||||
if res.InputTokens != 22 || res.OutputTokens != 18 {
|
||||
t.Errorf("combined tokens wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCall_JSONRetryFailsTwice: second attempt also fails to parse.
|
||||
// Surfaces ErrorKind=malformed_json AND keeps Success=true so the
|
||||
// caller can fall back to result.Text.
|
||||
func TestCall_JSONRetryFailsTwice(t *testing.T) {
|
||||
store := &fakeStorage{}
|
||||
h := New(store, nil)
|
||||
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
||||
return "still not JSON", Tokens{InputTokens: 10, OutputTokens: 4}, nil
|
||||
})
|
||||
defer restore()
|
||||
|
||||
res, _ := h.Call(context.Background(), CallSpec{
|
||||
UserPrompt: "extract",
|
||||
ToolName: "extract_entities",
|
||||
ResponseFormat: "json",
|
||||
RetryOnMalformedJSON: true,
|
||||
})
|
||||
if !res.Success {
|
||||
t.Errorf("expected Success=true (fall-back-to-text), got Success=false")
|
||||
}
|
||||
if res.ErrorKind != ErrorKindMalformedJSON {
|
||||
t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindMalformedJSON)
|
||||
}
|
||||
if res.Parsed != nil {
|
||||
t.Errorf("Parsed = %v, want nil after failed retry", res.Parsed)
|
||||
}
|
||||
rows := store.snapshot()
|
||||
if len(rows) != 1 {
|
||||
t.Fatalf("expected 1 ledger row, got %d", len(rows))
|
||||
}
|
||||
if !rows[0].Success || rows[0].ErrorKind != ErrorKindMalformedJSON {
|
||||
t.Errorf("ledger row mismatch: %+v", rows[0])
|
||||
}
|
||||
}
|
||||
|
||||
// TestCall_LLMUnavailable: transport error from the model.Generate
|
||||
// call is surfaced as ErrorKind=llm_unavailable AND records a ledger
|
||||
// row.
|
||||
func TestCall_LLMUnavailable(t *testing.T) {
|
||||
store := &fakeStorage{}
|
||||
h := New(store, nil)
|
||||
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
||||
return "", Tokens{}, errors.New("network error")
|
||||
})
|
||||
defer restore()
|
||||
|
||||
res, _ := h.Call(context.Background(), CallSpec{
|
||||
UserPrompt: "hi",
|
||||
ToolName: "summarize",
|
||||
})
|
||||
if res.Success {
|
||||
t.Errorf("expected Success=false")
|
||||
}
|
||||
if res.ErrorKind != ErrorKindLLMUnavailable {
|
||||
t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindLLMUnavailable)
|
||||
}
|
||||
rows := store.snapshot()
|
||||
if len(rows) != 1 {
|
||||
t.Fatalf("expected 1 ledger row, got %d", len(rows))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCall_EmptyUserPromptErrors: programmer-error guard.
|
||||
func TestCall_EmptyUserPromptErrors(t *testing.T) {
|
||||
h := New(&fakeStorage{}, nil)
|
||||
_, err := h.Call(context.Background(), CallSpec{ToolName: "summarize"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty user_prompt")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCall_JSONWithCodeFenceParses: tolerance for the first-attempt
|
||||
// response wrapped in a ```json ... ``` fence. The retry path uses a
|
||||
// stricter prompt; this test pins the first-attempt tolerance so
|
||||
// callers don't waste a round-trip on a benign formatting wrapper.
|
||||
func TestCall_JSONWithCodeFenceParses(t *testing.T) {
|
||||
store := &fakeStorage{}
|
||||
h := New(store, nil)
|
||||
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
||||
return "```json\n{\"x\":1}\n```", Tokens{InputTokens: 5, OutputTokens: 4}, nil
|
||||
})
|
||||
defer restore()
|
||||
|
||||
res, _ := h.Call(context.Background(), CallSpec{
|
||||
UserPrompt: "extract",
|
||||
ToolName: "extract_entities",
|
||||
ResponseFormat: "json",
|
||||
RetryOnMalformedJSON: true,
|
||||
})
|
||||
if res.ErrorKind != "" {
|
||||
t.Errorf("unexpected ErrorKind %q (fenced JSON should parse on first attempt)", res.ErrorKind)
|
||||
}
|
||||
m, _ := res.Parsed.(map[string]any)
|
||||
if m["x"] != float64(1) {
|
||||
t.Errorf("Parsed[x] = %v, want 1", m["x"])
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user