b424261aca
Lifts mort's pkg/logic/llms into executus/model, decoupled from mort: - tiers.go: the tier resolver now reads a host-supplied config.Source under "model.tier.<name>" with host-supplied fallbacks (Configure(cfg, defaults, ttl)), instead of convar.Manager. Tier NAMES + specs are host config; the resolution mechanism (cache, reasoning-suffix dialect, chain validation) is generic. No tier names hard-coded in the harness. - sink.go: usage/trace recording inverted off mort's llmusage/llmtrace into UsageSink / TraceSink seams + a model-owned Span, with nil-safe context attribution helpers (WithModel/WithTraceID/WithUsageTool/WithUsageUser). Both sinks optional (nil = off) so a light host records nothing. - lane decoration repointed to executus/lane; utils.Errorf -> fmt.Errorf. - call.go keeps GenerateWith[T] (instrumented structured output) — this is the structured-output primitive; no separate structured/ package. - llmmeta moved over model/ (the meta-LLM helper: tier allowlist + JSON retry + ledger). Its tests configure a minimal tier table via TestMain. New tests cover the inversion: config overrides fallback, tier registration, reasoning-suffix survival, nested-tier rejection, nil-sink no-ops. Full module: go build/vet/test -race green; core go.sum still free of gorm/redis/discordgo/sqlite. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
283 lines
8.7 KiB
Go
283 lines
8.7 KiB
Go
package llmmeta
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
|
|
llm "gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// fakeStorage records every MetaCall handed to RecordMetaCall and
|
|
// makes them available to tests via the captured slice.
|
|
type fakeStorage struct {
|
|
mu sync.Mutex
|
|
calls []MetaCall
|
|
err error
|
|
}
|
|
|
|
func (f *fakeStorage) RecordMetaCall(_ context.Context, call MetaCall) error {
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
f.calls = append(f.calls, call)
|
|
return f.err
|
|
}
|
|
|
|
func (f *fakeStorage) snapshot() []MetaCall {
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
out := make([]MetaCall, len(f.calls))
|
|
copy(out, f.calls)
|
|
return out
|
|
}
|
|
|
|
// TestCall_TierNotAllowed: a tier not in the allowlist returns the
|
|
// rejection without recording a ledger row — the call did not happen.
|
|
func TestCall_TierNotAllowed(t *testing.T) {
|
|
store := &fakeStorage{}
|
|
convars := ConvarReaderFunc(func(_ context.Context) []string {
|
|
return []string{"fast"}
|
|
})
|
|
h := New(store, convars)
|
|
|
|
res, err := h.Call(context.Background(), CallSpec{
|
|
Tier: "thinking",
|
|
UserPrompt: "hello",
|
|
ToolName: "summarize",
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("unexpected err: %v", err)
|
|
}
|
|
if res.Success {
|
|
t.Errorf("expected Success=false")
|
|
}
|
|
if res.ErrorKind != ErrorKindTierNotAllowed {
|
|
t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindTierNotAllowed)
|
|
}
|
|
if len(store.snapshot()) != 0 {
|
|
t.Errorf("expected NO ledger row for tier_not_allowed, got %d", len(store.snapshot()))
|
|
}
|
|
}
|
|
|
|
// TestCall_TierAllowedHappyText: a permitted tier yields a successful
|
|
// text call AND records a ledger row.
|
|
func TestCall_TierAllowedHappyText(t *testing.T) {
|
|
store := &fakeStorage{}
|
|
convars := ConvarReaderFunc(func(_ context.Context) []string {
|
|
return []string{"fast"}
|
|
})
|
|
h := New(store, convars)
|
|
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
|
return "summary text here", Tokens{InputTokens: 50, OutputTokens: 12}, nil
|
|
})
|
|
defer restore()
|
|
|
|
res, err := h.Call(context.Background(), CallSpec{
|
|
Tier: "fast",
|
|
UserPrompt: "summarise the following ...",
|
|
ToolName: "summarize",
|
|
ResponseFormat: "text",
|
|
RunID: "run-1",
|
|
SkillID: "sk-1",
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("unexpected err: %v", err)
|
|
}
|
|
if !res.Success {
|
|
t.Errorf("expected Success=true; got ErrorKind=%q", res.ErrorKind)
|
|
}
|
|
if res.Text != "summary text here" {
|
|
t.Errorf("Text = %q, want %q", res.Text, "summary text here")
|
|
}
|
|
if res.InputTokens != 50 || res.OutputTokens != 12 {
|
|
t.Errorf("token counts wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
|
|
}
|
|
if got := len(store.snapshot()); got != 1 {
|
|
t.Fatalf("expected 1 ledger row, got %d", got)
|
|
}
|
|
row := store.snapshot()[0]
|
|
if !row.Success {
|
|
t.Errorf("ledger Success = false, want true")
|
|
}
|
|
if row.ToolName != "summarize" {
|
|
t.Errorf("ledger ToolName = %q", row.ToolName)
|
|
}
|
|
if row.RunID != "run-1" {
|
|
t.Errorf("ledger RunID = %q", row.RunID)
|
|
}
|
|
if row.InputTokens != 50 || row.OutputTokens != 12 {
|
|
t.Errorf("ledger token counts wrong: in=%d out=%d",
|
|
row.InputTokens, row.OutputTokens)
|
|
}
|
|
}
|
|
|
|
// TestCall_JSONFirstAttemptParses: JSON-format request, response is
|
|
// valid JSON on first try; result.Parsed populated.
|
|
func TestCall_JSONFirstAttemptParses(t *testing.T) {
|
|
store := &fakeStorage{}
|
|
h := New(store, nil)
|
|
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
|
return `{"foo":"bar","n":42}`, Tokens{InputTokens: 10, OutputTokens: 5}, nil
|
|
})
|
|
defer restore()
|
|
|
|
res, _ := h.Call(context.Background(), CallSpec{
|
|
UserPrompt: "extract entities",
|
|
ToolName: "extract_entities",
|
|
ResponseFormat: "json",
|
|
RetryOnMalformedJSON: true,
|
|
SkillID: "sk-2",
|
|
})
|
|
if !res.Success || res.ErrorKind != "" {
|
|
t.Fatalf("expected success, got %+v", res)
|
|
}
|
|
m, ok := res.Parsed.(map[string]any)
|
|
if !ok {
|
|
t.Fatalf("Parsed not a map: %T %v", res.Parsed, res.Parsed)
|
|
}
|
|
if m["foo"] != "bar" {
|
|
t.Errorf("Parsed[foo] = %v", m["foo"])
|
|
}
|
|
}
|
|
|
|
// TestCall_JSONRetryPath: first response is malformed JSON; second
|
|
// response (after stricter prompt) parses cleanly.
|
|
func TestCall_JSONRetryPath(t *testing.T) {
|
|
store := &fakeStorage{}
|
|
h := New(store, nil)
|
|
calls := 0
|
|
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, prompt string, _ []llm.Option) (string, Tokens, error) {
|
|
calls++
|
|
if calls == 1 {
|
|
return "Here is your JSON: {oh no I forgot to format it", Tokens{InputTokens: 8, OutputTokens: 12}, nil
|
|
}
|
|
// Verify stricter prompt prefix appeared on retry.
|
|
if !strings.Contains(prompt, "Return ONLY valid JSON") {
|
|
t.Errorf("retry prompt missing stricter prefix: %q", prompt)
|
|
}
|
|
return `{"key":"value"}`, Tokens{InputTokens: 14, OutputTokens: 6}, nil
|
|
})
|
|
defer restore()
|
|
|
|
res, _ := h.Call(context.Background(), CallSpec{
|
|
UserPrompt: "extract",
|
|
ToolName: "extract_entities",
|
|
ResponseFormat: "json",
|
|
RetryOnMalformedJSON: true,
|
|
})
|
|
if !res.Success || res.ErrorKind != "" {
|
|
t.Fatalf("expected success, got %+v", res)
|
|
}
|
|
if calls != 2 {
|
|
t.Errorf("expected 2 LLM calls, got %d", calls)
|
|
}
|
|
m, _ := res.Parsed.(map[string]any)
|
|
if m["key"] != "value" {
|
|
t.Errorf("Parsed = %v", res.Parsed)
|
|
}
|
|
// Token counts should reflect both attempts.
|
|
if res.InputTokens != 22 || res.OutputTokens != 18 {
|
|
t.Errorf("combined tokens wrong: in=%d out=%d", res.InputTokens, res.OutputTokens)
|
|
}
|
|
}
|
|
|
|
// TestCall_JSONRetryFailsTwice: second attempt also fails to parse.
|
|
// Surfaces ErrorKind=malformed_json AND keeps Success=true so the
|
|
// caller can fall back to result.Text.
|
|
func TestCall_JSONRetryFailsTwice(t *testing.T) {
|
|
store := &fakeStorage{}
|
|
h := New(store, nil)
|
|
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
|
return "still not JSON", Tokens{InputTokens: 10, OutputTokens: 4}, nil
|
|
})
|
|
defer restore()
|
|
|
|
res, _ := h.Call(context.Background(), CallSpec{
|
|
UserPrompt: "extract",
|
|
ToolName: "extract_entities",
|
|
ResponseFormat: "json",
|
|
RetryOnMalformedJSON: true,
|
|
})
|
|
if !res.Success {
|
|
t.Errorf("expected Success=true (fall-back-to-text), got Success=false")
|
|
}
|
|
if res.ErrorKind != ErrorKindMalformedJSON {
|
|
t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindMalformedJSON)
|
|
}
|
|
if res.Parsed != nil {
|
|
t.Errorf("Parsed = %v, want nil after failed retry", res.Parsed)
|
|
}
|
|
rows := store.snapshot()
|
|
if len(rows) != 1 {
|
|
t.Fatalf("expected 1 ledger row, got %d", len(rows))
|
|
}
|
|
if !rows[0].Success || rows[0].ErrorKind != ErrorKindMalformedJSON {
|
|
t.Errorf("ledger row mismatch: %+v", rows[0])
|
|
}
|
|
}
|
|
|
|
// TestCall_LLMUnavailable: transport error from the model.Generate
|
|
// call is surfaced as ErrorKind=llm_unavailable AND records a ledger
|
|
// row.
|
|
func TestCall_LLMUnavailable(t *testing.T) {
|
|
store := &fakeStorage{}
|
|
h := New(store, nil)
|
|
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
|
return "", Tokens{}, errors.New("network error")
|
|
})
|
|
defer restore()
|
|
|
|
res, _ := h.Call(context.Background(), CallSpec{
|
|
UserPrompt: "hi",
|
|
ToolName: "summarize",
|
|
})
|
|
if res.Success {
|
|
t.Errorf("expected Success=false")
|
|
}
|
|
if res.ErrorKind != ErrorKindLLMUnavailable {
|
|
t.Errorf("ErrorKind = %q, want %q", res.ErrorKind, ErrorKindLLMUnavailable)
|
|
}
|
|
rows := store.snapshot()
|
|
if len(rows) != 1 {
|
|
t.Fatalf("expected 1 ledger row, got %d", len(rows))
|
|
}
|
|
}
|
|
|
|
// TestCall_EmptyUserPromptErrors: programmer-error guard.
|
|
func TestCall_EmptyUserPromptErrors(t *testing.T) {
|
|
h := New(&fakeStorage{}, nil)
|
|
_, err := h.Call(context.Background(), CallSpec{ToolName: "summarize"})
|
|
if err == nil {
|
|
t.Fatal("expected error for empty user_prompt")
|
|
}
|
|
}
|
|
|
|
// TestCall_JSONWithCodeFenceParses: tolerance for the first-attempt
|
|
// response wrapped in a ```json ... ``` fence. The retry path uses a
|
|
// stricter prompt; this test pins the first-attempt tolerance so
|
|
// callers don't waste a round-trip on a benign formatting wrapper.
|
|
func TestCall_JSONWithCodeFenceParses(t *testing.T) {
|
|
store := &fakeStorage{}
|
|
h := New(store, nil)
|
|
restore := SetCompleteForTest(func(_ context.Context, _ llm.Model, _, _ string, _ []llm.Option) (string, Tokens, error) {
|
|
return "```json\n{\"x\":1}\n```", Tokens{InputTokens: 5, OutputTokens: 4}, nil
|
|
})
|
|
defer restore()
|
|
|
|
res, _ := h.Call(context.Background(), CallSpec{
|
|
UserPrompt: "extract",
|
|
ToolName: "extract_entities",
|
|
ResponseFormat: "json",
|
|
RetryOnMalformedJSON: true,
|
|
})
|
|
if res.ErrorKind != "" {
|
|
t.Errorf("unexpected ErrorKind %q (fenced JSON should parse on first attempt)", res.ErrorKind)
|
|
}
|
|
m, _ := res.Parsed.(map[string]any)
|
|
if m["x"] != float64(1) {
|
|
t.Errorf("Parsed[x] = %v, want 1", m["x"])
|
|
}
|
|
}
|