From dcd004289f09ac5101e8d97ab1b2b9d8a701f791 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Wed, 10 Jun 2026 12:35:23 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20foundations=20=E2=80=94=20canonical=20t?= =?UTF-8?q?ypes,=20Parse=20grammar,=20env=20DSNs,=20health,=20chains?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of the majordomo build: - llm/ canonical contract (messages, parts, tools, capabilities, streaming, Model/Provider, error classification) - health/ clock-injected tracker (threshold bench, exponential capped cooldown, reset-on-success) - root Registry + Parse (verbatim model ids, inline recursive alias expansion with cycle detection, chain dedup), LLM_* env-DSN providers (go-llm parity: lazy fallback + eager LoadEnv), health-aware chain executor behind the Model interface - provider/fake scriptable test provider; hermetic test suite incl. the trailing-thinking chain and foreman:// env loading - ADRs 0001-0008, CLAUDE.md, README (honest matrix), CI workflow, docs/phase-1-design.md Co-Authored-By: Claude Fable 5 --- .env.example | 16 ++ .gitea/workflows/ci.yaml | 26 +++ .gitignore | 3 + CLAUDE.md | 114 +++++++++++ README.md | 214 +++++++++++++++++++++ builtin.go | 82 ++++++++ chain.go | 134 +++++++++++++ chain_test.go | 207 ++++++++++++++++++++ docs/adr/0001-package-layout.md | 46 +++++ docs/adr/0002-canonical-message-model.md | 53 ++++++ docs/adr/0003-parse-grammar.md | 57 ++++++ docs/adr/0004-env-dsn-providers.md | 60 ++++++ docs/adr/0005-provider-capabilities.md | 41 ++++ docs/adr/0006-health-and-backoff.md | 48 +++++ docs/adr/0007-dependency-policy.md | 31 +++ docs/adr/0008-chain-semantics.md | 60 ++++++ docs/adr/README.md | 14 ++ docs/phase-1-design.md | 84 +++++++++ env.go | 120 ++++++++++++ env_test.go | 195 +++++++++++++++++++ go.mod | 3 + health/health.go | 163 ++++++++++++++++ health/health_test.go | 165 ++++++++++++++++ llm/capabilities.go | 45 +++++ llm/content.go | 39 ++++ llm/errors.go | 119 ++++++++++++ llm/errors_test.go | 84 +++++++++ llm/llm.go | 12 ++ llm/message.go | 71 +++++++ llm/message_test.go | 62 ++++++ llm/model.go | 58 ++++++ llm/request.go | 98 ++++++++++ llm/response.go | 73 +++++++ llm/stream.go | 28 +++ llm/tool.go | 165 ++++++++++++++++ llm/tool_test.go | 98 ++++++++++ majordomo.go | 139 ++++++++++++++ parse.go | 122 ++++++++++++ parse_test.go | 221 ++++++++++++++++++++++ progress.md | 36 ++++ provider/fake/fake.go | 230 +++++++++++++++++++++++ registry.go | 227 ++++++++++++++++++++++ 42 files changed, 3863 insertions(+) create mode 100644 .env.example create mode 100644 .gitea/workflows/ci.yaml create mode 100644 CLAUDE.md create mode 100644 builtin.go create mode 100644 chain.go create mode 100644 chain_test.go create mode 100644 docs/adr/0001-package-layout.md create mode 100644 docs/adr/0002-canonical-message-model.md create mode 100644 docs/adr/0003-parse-grammar.md create mode 100644 docs/adr/0004-env-dsn-providers.md create mode 100644 docs/adr/0005-provider-capabilities.md create mode 100644 docs/adr/0006-health-and-backoff.md create mode 100644 docs/adr/0007-dependency-policy.md create mode 100644 docs/adr/0008-chain-semantics.md create mode 100644 docs/adr/README.md create mode 100644 docs/phase-1-design.md create mode 100644 env.go create mode 100644 env_test.go create mode 100644 go.mod create mode 100644 health/health.go create mode 100644 health/health_test.go create mode 100644 llm/capabilities.go create mode 100644 llm/content.go create mode 100644 llm/errors.go create mode 100644 llm/errors_test.go create mode 100644 llm/llm.go create mode 100644 llm/message.go create mode 100644 llm/message_test.go create mode 100644 llm/model.go create mode 100644 llm/request.go create mode 100644 llm/response.go create mode 100644 llm/stream.go create mode 100644 llm/tool.go create mode 100644 llm/tool_test.go create mode 100644 majordomo.go create mode 100644 parse.go create mode 100644 parse_test.go create mode 100644 progress.md create mode 100644 provider/fake/fake.go create mode 100644 registry.go diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..8a784f5 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# majordomo example environment. +# Copy to .env and fill in real values. .env is gitignored — never commit it. + +# Ollama Cloud API key (used by the ollama-cloud provider and live tests). +OLLAMA_API_KEY=your-ollama-cloud-key-here + +# Built-in provider keys (each optional; only needed for the providers you use). +#OPENAI_API_KEY=sk-... +#ANTHROPIC_API_KEY=sk-ant-... +#GOOGLE_API_KEY=... + +# LLM_* env-DSN provider definitions (go-llm parity). +# Format: LLM_=scheme://[token@]host[/path] +# becomes the provider's registry name (LLM_M1 -> "m1"). +#LLM_M1=foreman://token@foreman-m1.example.com +#LLM_M5=foreman://token@foreman-m5.example.com diff --git a/.gitea/workflows/ci.yaml b/.gitea/workflows/ci.yaml new file mode 100644 index 0000000..2eed90a --- /dev/null +++ b/.gitea/workflows/ci.yaml @@ -0,0 +1,26 @@ +name: CI +on: + push: { branches: ["*"] } + pull_request: { branches: ["*"] } +jobs: + build: + name: Build & Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: { go-version-file: "go.mod" } + - run: go mod download + - run: go build ./... + - run: go vet ./... + - run: go test -race -count=1 ./... + tidy: + name: Tidy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: { go-version-file: "go.mod" } + - run: | + go mod tidy + git diff --exit-code go.mod go.sum diff --git a/.gitignore b/.gitignore index 5b90e79..fce39b0 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,6 @@ go.work.sum # env file .env + +# macOS +.DS_Store diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6c39425 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,114 @@ +# CLAUDE.md — majordomo operating manual + +majordomo is a clean-slate Go substrate for LLM-backed agents: +target-agnostic model access, a parseable model naming / failover / tiering +system with health tracking, multimodality, tool calls, structured output, +and agents composed from model + system prompt + toolboxes + skills. + +**North star:** majordomo exists to re-architect mort's agentic layer. mort +is the first consumer and the design's acceptance test — when a choice is a +toss-up, pick what makes mort's tiers, failover chains, toolboxes, and +skills cleanest to express. But majordomo itself stays general-purpose and +mort-agnostic: no mort types, no Discord, no mort config. + +## Module & stack + +- Module: `gitea.stevedudenhoeffer.com/steve/majordomo`, Go 1.26. +- Stdlib-first (ADR-0007): hand-rolled `net/http` clients for + OpenAI(+compat), Anthropic(+compat), Ollama (cloud+local), foreman. The + one approved dependency is `google.golang.org/genai` (Google provider). + Anything else needs an ADR. No `go-llm`, no `go-agentkit` — importing + either is an automatic failure. + +## Package map (ADR-0001) + +``` +majordomo Registry, Parse, env-DSN loading, chain executor, re-exports + llm/ canonical contract: Message/Part/Request/Response/Option, + Tool/Toolbox, Capabilities, Stream, Model, Provider, errors + health/ clock-injected health tracker (bench/backoff) + media/ image normalization to target capabilities (Phase 3) + provider/fake/ scriptable in-memory provider for hermetic tests + provider/{openai,anthropic,ollama,google}/ (Phases 3-4) + agent/ Agent run loop (Phase 5) + skill/ Skill interface + composition (Phase 6) + examples/ one runnable example per hard requirement (Phase 7-8) +``` + +Canonical types live in leaf package `llm`; the root re-exports them via +type aliases. Providers import `llm`, never each other, never the root. + +## Parse grammar (ADR-0003) + +``` +spec := element ("," element)* # ordered failover chain +element := target | alias +target := provider "/" model # model id VERBATIM after first "/" +alias := bare token (no slash), expands INLINE, recursively, cycle-checked +``` + +- `Parse("ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud,anthropic/opus-4.8")` + → try head-to-tail. Appending `,thinking` expands the registered alias in + place at the tail. +- Provider resolution: registry (built-ins, RegisterProvider, eager env) → + lazy `LLM_{UPPER(name)}` env DSN → error. +- Single element ≡ chain of one; same Model interface, same semantics. +- No reasoning suffixes (`:high` etc. are NOT stripped — model ids are + verbatim). Reasoning effort becomes a request option (provider phases). + +## LLM_* env-DSN providers (ADR-0004, go-llm parity) + +`LLM_=scheme://[token@]host[/path]` — e.g. +`LLM_M5=foreman://token@foreman-m5.example` defines provider `m5`; then +`m5/qwen3:30b` works in Parse, chains, and aliases. Scheme ∈ {foreman, +ollama, ollama-cloud, openai, anthropic, google, gemini} ∪ RegisterScheme. +Token = credential; base URL = `https://host` always. `New()` scans the +process env eagerly; unknown names also resolve lazily at Parse time +(`my-prov` → `LLM_MY_PROV`). Malformed entries fail on use, not at startup. + +## Health & failover (ADR-0006, ADR-0008) + +- Transient (408/429/5xx, timeouts, conn refused/reset, DNS, deadline) vs + permanent (400/401/403/404/405/422, model-not-found, ctx.Canceled). + Unknown → transient. Classifier overridable. +- One transient error → retry same target (default 1 retry). Every failed + attempt counts; at threshold (default 2 consecutive) the target is + benched for base 5s × 2^n, capped 5m. Success fully resets. Chains skip + benched targets; 404 advances penalty-free; auth/malformed fail fast + (configurable); exhaustion returns a joined error naming every target. +- Tracker is in-memory, process-local, clock-injected. No persistence. + +## House conventions (mirror foreman) + +- gofmt; check errors immediately and wrap with `fmt.Errorf("%w: ...")`; + imports stdlib → third-party → internal; `// Why:` doc comments where + rationale isn't obvious. +- ADRs in `docs/adr/`, one decision each, append-only, indexed in its + README. progress.md gets a dated entry per phase. +- Conventional commits (`feat:`, `test:`, `docs:`, `chore:`, `refactor:`). +- Tests are hermetic: fake provider + fake clock; provider clients test + against `httptest`; **no network or credentials in the default suite**. + Live tests sit behind `//go:build live` / `examples/live/` and skip + without their env vars. +- `.env` holds live keys (gitignored, never committed/printed/quoted); + `.env.example` carries placeholders. + +## Gates (every phase; what CI runs) + +``` +go build ./... +go vet ./... +go test -race -count=1 ./... +go mod tidy && git diff --exit-code go.mod go.sum +``` + +CI: `.gitea/workflows/ci.yaml` (Gitea Actions, mirrors foreman). README.md +must match reality in the same commit that changes behavior — no +aspirational docs; unbuilt features are marked pending in the matrix. + +## Out of scope (anti-creep) + +No persistent store (health is in-memory behind the registry), no +observability/metrics stack, no config-file framework beyond LLM_* env +DSNs, no CLI beyond examples, no provider-specific features leaking into +the canonical API, nothing mort-specific in the library. diff --git a/README.md b/README.md index 13200d9..8356ed8 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,216 @@ # majordomo +A clean-slate Go library for building LLM-backed agents: one canonical API +over many model providers, a parseable model naming / failover / tiering +system with built-in health tracking, capability-aware multimodality, tool +calls, structured output, and composable agents and skills. + +> **Status:** under construction, phase by phase. The +> [support matrix](#featureprovider-support-matrix) below is kept honest: +> *pending* means not built yet, and this README is updated in the same +> commit as the behavior it describes. + +## Install + +```bash +go get gitea.stevedudenhoeffer.com/steve/majordomo +``` + +Requires Go 1.26+. + +## Quickstart + +```go +package main + +import ( + "context" + "fmt" + + "gitea.stevedudenhoeffer.com/steve/majordomo" +) + +func main() { + reg := majordomo.New() // built-ins + LLM_* env providers + + m, err := reg.Parse("ollama-cloud/minimax-m3:cloud") + if err != nil { panic(err) } + + resp, err := m.Generate(context.Background(), majordomo.Request{ + Messages: []majordomo.Message{majordomo.UserText("hello!")}, + }) + if err != nil { panic(err) } + fmt.Println(resp.Text()) +} +``` + +`majordomo.Parse(...)` (package level) uses a lazily-built default registry +if you don't need isolation. + +## Model specs: targets, chains, tiers + +A model spec is a comma-separated **failover chain**; each element is either +a `provider/model` target or a registered **alias** (tier): + +```go +// Try minimax-m3 first; on failure kimi-k2.6; finally fall back to opus-4.8. +m, _ := reg.Parse("ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud,anthropic/opus-4.8") + +// Identical, with the registered alias "thinking" appended and expanded +// in place as the tail of the chain: +m, _ = reg.Parse("ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud,anthropic/opus-4.8,thinking") +``` + +Everything after the **first `/`** (up to the next comma) is the model id, +passed to the provider **verbatim** — tags (`:cloud`, `:30b`) and ids with +extra slashes survive intact. majordomo never validates ids against a +catalog. + +### Custom tiers (aliases) + +```go +reg.RegisterAlias("thinking", "anthropic/opus-4.8,ollama-cloud/minimax-m3:cloud") +reg.RegisterAlias("workhorse", "ollama-cloud/minimax-m2.7:cloud,ollama-cloud/qwen3-coder:480b-cloud") + +m, _ := reg.Parse("thinking") // a chain, same Model interface as a single target +``` + +Aliases may appear anywhere in a chain (head, middle, tail), may reference +other aliases, and expand inline and recursively; cycles are detected and +returned as errors. + +### Failover & health + +Chains are health-tracked per target: + +- A **single transient error** (429/5xx, timeout, connection failure) is + retried once on the same target. +- **Repeated transient errors** (default: 2 consecutive failed attempts) + bench the target — chains skip it until its cooldown expires (exponential: + 5s, 10s, 20s, ... capped at 5m). Any success resets it. +- `model not found` advances down the chain without penalty; auth/malformed + errors fail fast (failing over can't fix a bad key). All knobs are + configurable via `WithChainConfig` / `WithHealthConfig`. +- If every element fails, you get one joined error naming each target and + why it failed. + +## Providers + +### Built-in env vars + +| Provider | Spec name | Key env var | Default endpoint | +|----------|-----------|-------------|------------------| +| OpenAI (+compatible) | `openai` | `OPENAI_API_KEY` | api.openai.com *(pending)* | +| Anthropic (+compatible) | `anthropic` | `ANTHROPIC_API_KEY` | api.anthropic.com *(pending)* | +| Google (Gemini) | `google` | `GOOGLE_API_KEY` / `GEMINI_API_KEY` | Gen AI API *(pending)* | +| Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` | https://ollama.com *(pending)* | +| Ollama (local) | `ollama` | — | `OLLAMA_HOST` or http://localhost:11434 *(pending)* | +| foreman | `foreman` | — (token via DSN) | requires DSN/base URL *(pending)* | + +### `LLM_*` env-DSN provider definitions + +Define named providers entirely from the environment (go-llm parity): + +``` +LLM_M1=foreman://test-token-change-me@foreman-m1.orgrimmar.dudenhoeffer.casa +LLM_M5=foreman://test-token-change-me@foreman-m5.orgrimmar.dudenhoeffer.casa +``` + +defines providers `m1` and `m5` (foreman targets — native Ollama wire +protocol behind a bearer token). They are first-class in `Parse`, chains, +and aliases: + +```go +m, _ := reg.Parse("m5/qwen3:30b,m1/qwen3:30b,thinking") +``` + +DSN format: `scheme://[token@]host[/path]`, scheme ∈ `foreman`, `ollama`, +`ollama-cloud`, `openai`, `anthropic`, `google`/`gemini`, or any scheme you +add with `RegisterScheme`. The token is the credential (bearer token / API +key); the base URL is always `https://host[/path]`. `New()` loads `LLM_*` +vars eagerly; unknown provider names also resolve lazily at Parse time +(`my-prov/x` → `LLM_MY_PROV`). + +### Custom providers + +Implement the two-method `Provider` interface and register it: + +```go +reg.RegisterProvider(myProvider) // now "myprovider/model-x" parses, chains, aliases +``` + +## Multimodality *(pending — Phase 3)* + +Attach images without knowing the target's limits; majordomo normalizes +(downscale, re-encode, count/size limits) against the resolved target's +declared capabilities and rejects clearly what cannot fit. + +```go +resp, err := m.Generate(ctx, majordomo.Request{ + Messages: []majordomo.Message{ + majordomo.UserParts(majordomo.Text("what's in this image?"), + majordomo.Image("image/png", pngBytes)), + }, +}) +``` + +## Tool calls *(canonical API ready; provider wiring pending — Phase 3)* + +```go +weather := majordomo.Tool{ + Name: "get_weather", + Description: "Current weather for a city", + Parameters: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}`), + Handler: func(ctx context.Context, args json.RawMessage) (any, error) { + var p struct{ City string `json:"city"` } + _ = json.Unmarshal(args, &p) + return map[string]any{"city": p.City, "temp_c": 21}, nil + }, +} +resp, _ := m.Generate(ctx, req, majordomo.WithTools(weather)) +// resp.ToolCalls → execute → append ToolResultsMessage → continue +``` + +## Structured output *(canonical API ready; provider wiring pending — Phase 3)* + +```go +resp, _ := m.Generate(ctx, req, majordomo.WithSchema(schemaJSON, "answer")) +``` + +A generic `Generate[T]` helper (schema from your struct, unmarshal into it) +lands with the agent phase. + +## Agents & skills *(pending — Phases 5–6)* + +Agents = model + system prompt + toolboxes, running a tool-dispatch loop; +skills = reusable instruction+tool bundles attachable to any agent. + +## Feature/provider support matrix + +| Provider | Resolve/Parse | Chat | Streaming | Tools | Structured | Images | Env DSN | +|----------------------|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| OpenAI (+compatible) | ✅ | pending | pending | pending | pending | pending | ✅ | +| Anthropic (+compat) | ✅ | pending | pending | pending | pending | pending | ✅ | +| Google (Gemini) | ✅ | pending | pending | pending | pending | pending | ✅ | +| Ollama Cloud | ✅ | pending | pending | pending | pending | pending | ✅ | +| Ollama (local) | ✅ | pending | pending | pending | pending | pending | ✅ | +| foreman | ✅ | pending | pending | pending | pending | pending | ✅ | +| fake (testing) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | — | + +Cross-cutting: Parse grammar ✅ · aliases/tiers ✅ · failover chains ✅ · +health tracking/backoff ✅ · LLM_* env DSNs ✅ · media pipeline pending · +agent loop pending · skills pending · `Generate[T]` pending. + +## Development + +```bash +go build ./... && go vet ./... && go test -race -count=1 ./... +``` + +The default test suite is fully hermetic (no network, no credentials). +Live integration tests (Phase 8) are gated behind the `live` build tag and +read `.env` (see `.env.example`; never commit `.env`). + +Design decisions are recorded in [docs/adr/](docs/adr/README.md); +conventions in [CLAUDE.md](CLAUDE.md); build history in +[progress.md](progress.md). diff --git a/builtin.go b/builtin.go new file mode 100644 index 0000000..0b83908 --- /dev/null +++ b/builtin.go @@ -0,0 +1,82 @@ +package majordomo + +import ( + "context" + "fmt" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// Built-in provider names. Real client implementations land per-phase +// (see progress.md); until a provider's phase ships, its registration is a +// stub that resolves (so specs parse and env DSNs load) but errors on use. +const ( + ProviderOpenAI = "openai" + ProviderAnthropic = "anthropic" + ProviderGoogle = "google" + ProviderOllama = "ollama" + ProviderOllamaCloud = "ollama-cloud" + ProviderForeman = "foreman" +) + +// registerBuiltins installs the built-in providers and env-DSN scheme +// factories into a fresh registry. +func registerBuiltins(r *Registry) { + stub := func(kind string) SchemeFactory { + return func(name string, dsn DSN) (llm.Provider, error) { + return &stubProvider{name: name, kind: kind, baseURL: dsn.BaseURL(), token: dsn.Token}, nil + } + } + + for _, kind := range []string{ + ProviderOpenAI, ProviderAnthropic, ProviderGoogle, + ProviderOllama, ProviderOllamaCloud, ProviderForeman, + } { + r.providers[kind] = &stubProvider{name: kind, kind: kind} + r.schemes[kind] = stub(kind) + } + // "gemini" is an alternate scheme for the Google provider. + r.schemes["gemini"] = stub(ProviderGoogle) +} + +// stubProvider stands in for a provider implementation that lands in a +// later phase. It resolves and carries its connection details (so Parse, +// chains, and env loading are fully functional) but errors on use. +type stubProvider struct { + name string + kind string + baseURL string + token string +} + +func (s *stubProvider) Name() string { return s.name } + +func (s *stubProvider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) { + cfg := llm.ApplyModelOptions(opts) + return &stubModel{provider: s, id: id, cfg: cfg}, nil +} + +type stubModel struct { + provider *stubProvider + id string + cfg llm.ModelConfig +} + +func (m *stubModel) err() error { + return fmt.Errorf("majordomo: provider %q (%s) is not implemented yet", m.provider.name, m.provider.kind) +} + +func (m *stubModel) Generate(context.Context, llm.Request, ...llm.Option) (*llm.Response, error) { + return nil, m.err() +} + +func (m *stubModel) Stream(context.Context, llm.Request, ...llm.Option) (llm.Stream, error) { + return nil, m.err() +} + +func (m *stubModel) Capabilities() llm.Capabilities { + if m.cfg.Capabilities != nil { + return *m.cfg.Capabilities + } + return llm.Capabilities{} +} diff --git a/chain.go b/chain.go new file mode 100644 index 0000000..a593e12 --- /dev/null +++ b/chain.go @@ -0,0 +1,134 @@ +package majordomo + +import ( + "context" + "errors" + "fmt" + + "gitea.stevedudenhoeffer.com/steve/majordomo/health" + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// ErrChainExhausted reports that every element of a failover chain failed +// (or was skipped while backed off). It is always joined with the +// per-target errors. +var ErrChainExhausted = errors.New("all chain targets failed") + +// chainTarget is one resolved element of a failover chain. +type chainTarget struct { + // key identifies the target for health tracking: "provider/model-id". + key string + model llm.Model +} + +// chain implements llm.Model over an ordered list of targets with +// health-tracked failover. A single-element spec is a chain of one — the +// behavior (retry-on-transient, backoff bookkeeping) is identical, so +// callers never branch on what Parse returned. +// +// Semantics (ADR-0006, ADR-0008): +// - Targets are tried head-to-tail; targets currently backed off are +// skipped. +// - A transient error is retried on the same target (ChainConfig +// TransientRetries, default 1). Every failed attempt counts toward the +// target's consecutive-failure threshold; when the tracker benches the +// target (default: 2 consecutive transient failures → exponential +// capped cooldown) the chain stops retrying it and advances. +// - Model-not-found advances without penalizing health. Other permanent +// errors fail fast by default (AdvanceOnPermanent flips this). +// - Any success resets the target's health. +// - When every target fails or is skipped, the returned error joins +// ErrChainExhausted with each target's reason. +type chain struct { + targets []chainTarget + tracker *health.Tracker + cfg ChainConfig +} + +// Targets returns the resolved "provider/model" keys in chain order +// (diagnostics and tests). +func (c *chain) Targets() []string { + keys := make([]string, len(c.targets)) + for i, t := range c.targets { + keys[i] = t.key + } + return keys +} + +// Capabilities reports the head element's capabilities — the chain's +// preferred target (ADR-0008). Per-attempt media normalization uses the +// actual target's capabilities, not this value. +func (c *chain) Capabilities() llm.Capabilities { + return c.targets[0].model.Capabilities() +} + +// Generate tries each target per the chain semantics above. +func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { + req = req.Apply(opts...) + return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (*llm.Response, error) { + return t.model.Generate(ctx, req) + }) +} + +// Stream tries each target per the chain semantics. Failover applies to +// establishing the stream; once a stream is open, mid-stream errors +// propagate to the consumer rather than restarting on another target +// (replaying half-delivered output would duplicate content). +func (c *chain) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) { + req = req.Apply(opts...) + return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (llm.Stream, error) { + return t.model.Stream(ctx, req) + }) +} + +// chainDo runs the head-to-tail failover algorithm around an attempt +// function, generic over the result type (response vs stream). +func chainDo[T any](ctx context.Context, c *chain, attempt func(context.Context, chainTarget) (T, error)) (T, error) { + var zero T + var failures []error + + for _, t := range c.targets { + if !c.tracker.Available(t.key) { + until := c.tracker.BackedOffUntil(t.key) + failures = append(failures, fmt.Errorf("%s: skipped (backed off until %s)", t.key, until.Format("15:04:05.000"))) + continue + } + + retries := c.cfg.retries() + for attemptN := 0; ; attemptN++ { + if err := ctx.Err(); err != nil { + return zero, err + } + result, err := attempt(ctx, t) + if err == nil { + c.tracker.ReportSuccess(t.key) + return result, nil + } + + class := c.cfg.classify(err) + if class == llm.ClassPermanent { + if errors.Is(err, llm.ErrModelNotFound) || c.cfg.AdvanceOnPermanent { + // Not a health problem (or policy says keep going): + // advance without penalizing the target. + failures = append(failures, fmt.Errorf("%s: %w", t.key, err)) + break + } + // Failing over cannot fix a bad request or bad credentials. + return zero, fmt.Errorf("%s: %w", t.key, err) + } + + // Transient: every failed attempt counts toward the target's + // consecutive-failure threshold. Retry the same target while + // attempts remain — but advance as soon as the tracker benches + // it (a freshly backed-off target is not worth more retries). + benched := c.tracker.ReportFailure(t.key) + if !benched && attemptN < retries { + continue + } + failures = append(failures, fmt.Errorf("%s: %w", t.key, err)) + break + } + } + + return zero, errors.Join(append([]error{ErrChainExhausted}, failures...)...) +} diff --git a/chain_test.go b/chain_test.go new file mode 100644 index 0000000..476c2ed --- /dev/null +++ b/chain_test.go @@ -0,0 +1,207 @@ +package majordomo + +import ( + "context" + "errors" + "io" + "net/http" + "strings" + "testing" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" + "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" +) + +func transientErr(model string) error { + return &llm.APIError{Provider: "fp", Model: model, Status: http.StatusServiceUnavailable, Message: "overloaded"} +} + +func authErr(model string) error { + return &llm.APIError{Provider: "fp", Model: model, Status: http.StatusUnauthorized, Message: "bad key"} +} + +func notFoundErr(model string) error { + return &llm.APIError{Provider: "fp", Model: model, Status: http.StatusNotFound, Message: "no such model"} +} + +// TestChainSingleTransientRecoversViaRetry: one blip, same target succeeds +// on the retry — the request never fails over. +func TestChainSingleTransientRecoversViaRetry(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("a", fake.Fail(transientErr("a")), fake.Reply("recovered")) + + m, err := r.Parse("fp/a,fp/b") + if err != nil { + t.Fatalf("Parse: %v", err) + } + resp, err := m.Generate(context.Background(), Request{Messages: []Message{UserText("hi")}}) + if err != nil { + t.Fatalf("Generate: %v", err) + } + if resp.Text() != "recovered" { + t.Errorf("text = %q, want recovered (same-target retry)", resp.Text()) + } + if got := fp.CallCount("a"); got != 2 { + t.Errorf("target a saw %d calls, want 2 (initial + retry)", got) + } + if got := fp.CallCount("b"); got != 0 { + t.Errorf("target b saw %d calls, want 0", got) + } +} + +// TestChainRepeatedTransientFailsOver: the head exhausts its retry, gets +// benched, and the chain advances to the next element. +func TestChainRepeatedTransientFailsOver(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("a", fake.Fail(transientErr("a")), fake.Fail(transientErr("a"))) + fp.Enqueue("b", fake.Reply("from-b"), fake.Reply("from-b")) + + m, err := r.Parse("fp/a,fp/b") + if err != nil { + t.Fatalf("Parse: %v", err) + } + resp, err := m.Generate(context.Background(), Request{Messages: []Message{UserText("hi")}}) + if err != nil { + t.Fatalf("Generate: %v", err) + } + if resp.Text() != "from-b" { + t.Errorf("text = %q, want from-b", resp.Text()) + } + // Two consecutive transient failures hit the default threshold: the + // head is now backed off and skipped on the next request. + if r.Health().Available("fp/a") { + t.Error("fp/a should be backed off after two consecutive transient failures") + } + resp2, err := m.Generate(context.Background(), Request{Messages: []Message{UserText("again")}}) + if err != nil { + t.Fatalf("Generate #2: %v", err) + } + if resp2.Text() != "from-b" { + t.Errorf("second response = %q, want from-b (head skipped)", resp2.Text()) + } + if got := fp.CallCount("a"); got != 2 { + t.Errorf("backed-off target a saw %d calls, want 2", got) + } +} + +// TestChainPermanentAuthFailsFast: failing over cannot fix bad credentials. +func TestChainPermanentAuthFailsFast(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("a", fake.Fail(authErr("a"))) + + m, _ := r.Parse("fp/a,fp/b") + _, err := m.Generate(context.Background(), Request{Messages: []Message{UserText("hi")}}) + if err == nil { + t.Fatal("want error") + } + var apiErr *llm.APIError + if !errors.As(err, &apiErr) || apiErr.Status != http.StatusUnauthorized { + t.Errorf("error = %v, want the 401 APIError", err) + } + if got := fp.CallCount("b"); got != 0 { + t.Errorf("target b saw %d calls, want 0 (fail-fast)", got) + } + if !r.Health().Available("fp/a") { + t.Error("permanent errors must not penalize health") + } +} + +// TestChainModelNotFoundAdvances: 404 advances without a health penalty. +func TestChainModelNotFoundAdvances(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("a", fake.Fail(notFoundErr("a"))) + fp.Enqueue("b", fake.Reply("from-b")) + + m, _ := r.Parse("fp/a,fp/b") + resp, err := m.Generate(context.Background(), Request{Messages: []Message{UserText("hi")}}) + if err != nil { + t.Fatalf("Generate: %v", err) + } + if resp.Text() != "from-b" { + t.Errorf("text = %q, want from-b", resp.Text()) + } + if !r.Health().Available("fp/a") { + t.Error("model-not-found must not penalize health") + } +} + +// TestChainExhaustedJoinsErrors: when everything fails the error names what +// was tried and why each failed. +func TestChainExhaustedJoinsErrors(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("a", fake.Fail(transientErr("a")), fake.Fail(transientErr("a"))) + fp.Enqueue("b", fake.Fail(notFoundErr("b"))) + + m, _ := r.Parse("fp/a,fp/b") + _, err := m.Generate(context.Background(), Request{Messages: []Message{UserText("hi")}}) + if !errors.Is(err, ErrChainExhausted) { + t.Fatalf("error = %v, want ErrChainExhausted", err) + } + for _, frag := range []string{"fp/a", "fp/b", "overloaded", "no such model"} { + if !strings.Contains(err.Error(), frag) { + t.Errorf("joined error %q should mention %q", err.Error(), frag) + } + } +} + +func TestChainStream(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + fp.Enqueue("a", fake.Fail(transientErr("a")), fake.Fail(transientErr("a"))) + fp.Enqueue("b", fake.Reply("streamed")) + + m, _ := r.Parse("fp/a,fp/b") + s, err := m.Stream(context.Background(), Request{Messages: []Message{UserText("hi")}}) + if err != nil { + t.Fatalf("Stream: %v", err) + } + defer s.Close() + + var text string + var final *Response + for { + ev, err := s.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + t.Fatalf("Next: %v", err) + } + text += ev.TextDelta + if ev.Response != nil { + final = ev.Response + } + } + if text != "streamed" { + t.Errorf("streamed text = %q, want streamed", text) + } + if final == nil { + t.Fatal("missing final response event") + } +} + +// TestChainContextCancellation: a canceled context aborts immediately. +func TestChainContextCancellation(t *testing.T) { + r := newTestRegistry(t) + fp := fake.New("fp") + r.RegisterProvider(fp) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + m, _ := r.Parse("fp/a,fp/b") + _, err := m.Generate(ctx, Request{Messages: []Message{UserText("hi")}}) + if !errors.Is(err, context.Canceled) { + t.Errorf("error = %v, want context.Canceled", err) + } +} diff --git a/docs/adr/0001-package-layout.md b/docs/adr/0001-package-layout.md new file mode 100644 index 0000000..1e79dad --- /dev/null +++ b/docs/adr/0001-package-layout.md @@ -0,0 +1,46 @@ +# ADR-0001: Package layout — canonical types in a leaf `llm` package, root re-exports + +**Status:** Accepted — 2026-06-10 + +## Context + +Provider implementations (openai, anthropic, google, ollama/foreman) must share +the canonical types (Message, Request, Response, Capabilities, Model, Provider). +If those types lived in the root `majordomo` package, the root could not also +register built-in providers (root → provider/openai → root is an import cycle). +go-llm solved this with a `v2/provider` leaf package; the kickoff sketch puts +the Provider interface in `provider/provider.go` and the message types at root, +which recreates the cycle. + +## Decision + +- All canonical contract types live in the leaf package + `majordomo/llm` (Message, Part, Request, Response, Option, Tool, Toolbox, + Capabilities, Stream, Model, Provider, error classification). It imports + nothing else in the module. +- The root `majordomo` package re-exports every canonical type via type + aliases (plus constructor/option wrappers), so consumers write + `majordomo.Request`, `majordomo.UserText(...)` and rarely import `llm`. +- The root owns assembly: Registry, Parse, env-DSN loading, the chain + executor, and (from Phase 3) registration of real provider clients. +- The planned `resolve/` package is folded into the root: the grammar needs + registry state (aliases, providers, env fallback) at every expansion step, + and a callback interface between two packages bought nothing but + indirection. +- `health/`, `media/`, `provider//`, `provider/fake/`, `agent/`, and + `skill/` are subpackages importing `llm` (and never each other, except + agent → skill). + +## Consequences + +- No import cycles; new providers are additive subpackages. +- Consumers get the flat one-import API the kickoff sketches. +- Type aliases (not wrappers) mean zero conversion cost and full + interchangeability between `majordomo.X` and `llm.X`. + +## Alternatives considered + +- **Everything in root.** No cycles only if providers also live in root — + a single giant package. Rejected. +- **Self-registering providers via package init() side effects.** Hides + wiring, breaks multi-registry isolation, surprises tests. Rejected. diff --git a/docs/adr/0002-canonical-message-model.md b/docs/adr/0002-canonical-message-model.md new file mode 100644 index 0000000..7d46699 --- /dev/null +++ b/docs/adr/0002-canonical-message-model.md @@ -0,0 +1,53 @@ +# ADR-0002: Canonical message/content model + +**Status:** Accepted — 2026-06-10 + +## Context + +Every provider has a different wire shape for conversations, content, +tool calls, and system prompts. majordomo needs one canonical shape that all +providers translate to/from, expressive enough for multimodality and tool +loops, small enough to keep providers honest. + +## Decision + +- `Message{Role, Parts, ToolCalls, ToolResults}` with roles system / user / + assistant / tool. `Part` is a **sealed** interface (`TextPart`, + `ImagePart`) so providers can switch exhaustively; new media kinds are + deliberate API changes, not silent pass-throughs. +- `ImagePart` is **bytes + MIME only** — no URL form. The media pipeline + must inspect/resize/transcode images against target capabilities, which + requires bytes; fetching remote URLs is the caller's job, not a hidden + network dependency inside a model call. +- `Request.System` is a dedicated top-level field (maps to Anthropic + `system`, Google `SystemInstruction`, an OpenAI/Ollama system message). + RoleSystem messages in the history are also accepted and folded by + providers. Request also carries Tools, ToolChoice, Schema/SchemaName, and + sampling knobs; per-call mutation happens via `Option` funcs applied to a + copy, so Request values are reusable. +- Model ids never carry behavior suffixes: unlike go-llm there is **no + `:low/:medium/:high` reasoning-suffix grammar** (it conflicts with + verbatim model ids like `minimax-m3:cloud`, see ADR-0003). Reasoning + effort will be a request option when providers land. +- `Response{Parts, ToolCalls, FinishReason, Usage, Model, Raw}` — `Model` + names the target that actually served the request (vital with chains); + `Raw` is the provider-native escape hatch, never required. +- Streaming (`Stream.Next() → StreamEvent`): text deltas stream as they + arrive; **tool-call arguments are buffered until complete** (consumers + never see partial JSON); the final event carries the accumulated + `*Response`; `io.EOF` terminates. + +## Consequences + +- Providers stay translation layers; nothing provider-specific leaks into + the canonical API. +- Callers needing remote images fetch them first — explicit, testable. +- Partial-tool-call streaming UIs are out of scope (acceptable: arguments + are rarely useful before they parse). + +## Alternatives considered + +- Open `Part` interface — silent content drops on unknown kinds. Rejected. +- URL image parts with lazy fetch — hidden I/O inside Generate, breaks + capability normalization. Rejected. +- go-llm-style reasoning suffixes — see ADR-0003. Rejected. diff --git a/docs/adr/0003-parse-grammar.md b/docs/adr/0003-parse-grammar.md new file mode 100644 index 0000000..24a7452 --- /dev/null +++ b/docs/adr/0003-parse-grammar.md @@ -0,0 +1,57 @@ +# ADR-0003: Parse grammar — verbatim model ids, inline alias expansion, chains + +**Status:** Accepted — 2026-06-10 + +## Context + +Callers (mort first) address models by string: single targets, tier aliases, +and comma-separated failover chains, with custom and env-defined providers as +first-class elements. go-llm's grammar is close but nests alias-chains as +composite Models and strips `:low/:medium/:high` reasoning suffixes, which +collides with Ollama-style tags (`minimax-m3:cloud`) and Google-style ids. + +## Decision + +Grammar (binding, from the kickoff): + +``` +spec := element ("," element)* +element := target | alias +target := provider "/" model # model = everything after the FIRST "/", + # up to the next comma, passed VERBATIM +alias := bare token, no slash +``` + +- Provider resolution order per target: registered providers (built-ins, + RegisterProvider, eagerly env-loaded) → lazy `LLM_{UPPER(name)}` env DSN + (ADR-0004) → error naming both places checked. +- Aliases expand **inline** wherever they appear (head/middle/tail), + recursively, into the flat element list. Cycles are detected via the + expansion stack and return `ErrAliasCycle` — never a hang. Inline (not + nested-Model, as in go-llm) expansion keeps one flat chain so health + skipping and error reporting see every element uniformly. +- Duplicate elements after expansion are dropped (first occurrence wins): + retrying an already-failed target in the same pass is never useful. +- A single element and a multi-element chain return the same `Model` + (a chain of one) — identical retry/health semantics, callers never branch. +- **No reasoning-suffix stripping.** mort's `:high` dialect is handled by + mort's spec layer during migration; majordomo will expose reasoning effort + as an explicit request option instead. +- The package-level `Default()` registry (lazy, loads process env) backs + `majordomo.Parse` for go-llm-style one-call ergonomics; `New()` builds + isolated registries for tests/multi-tenant use. + +## Consequences + +- `m1/richardyoung/qwen3-14b-abliterated:q4_K_M` (a real mort tier value) + parses as provider `m1`, model `richardyoung/qwen3-14b-abliterated:q4_K_M`. +- A bare token that is a provider name yields a targeted error + ("use openai/"). +- Alias updates after Parse don't affect already-built Models (expansion is + at Parse time). mort re-parses per request, so DB-tier edits still apply. + +## Alternatives considered + +- Nested alias expansion (go-llm): opaque chains inside chains; health + skipping can't see the elements. Rejected. +- Reasoning suffixes in the grammar: breaks verbatim ids. Rejected. diff --git a/docs/adr/0004-env-dsn-providers.md b/docs/adr/0004-env-dsn-providers.md new file mode 100644 index 0000000..1f334ab --- /dev/null +++ b/docs/adr/0004-env-dsn-providers.md @@ -0,0 +1,60 @@ +# ADR-0004: LLM_* env-DSN provider definitions (go-llm parity, plus eager load) + +**Status:** Accepted — 2026-06-10 + +## Context + +Steve's deployments define providers via env vars that must keep working +unchanged: + +``` +LLM_M1=foreman://token@foreman-m1.orgrimmar.dudenhoeffer.casa +LLM_M5=foreman://token@foreman-m5.orgrimmar.dudenhoeffer.casa +``` + +go-llm (v2/parse.go) implements this **lazily only**: `Parse("m5/x")` misses +the registry, computes `LLM_` + UPPER(name) with `-`→`_`, reads exactly that +var, parses `scheme://[token@]host[/path]` by plain string splits, requires +the scheme to be a registered provider, and dials `https://` + host. There is +no environment scan. The kickoff additionally requires `New()` to load LLM_* +providers eagerly and a testable `LoadEnv(map)`. + +## Decision + +Implement **both** paths over one DSN parser (byte-for-byte go-llm +semantics — `://` split, first-`@` split, trailing-`/` trim, ErrInvalidDSN on +missing scheme/host, base URL always `https://host[/path]`): + +- **Eager:** `New()` scans the process environment for `LLM_` and + registers each as provider `lower()` (underscores preserved: + `LLM_MY_BOX` → `my_box`). `LoadEnv(map[string]string)` is the explicit, + testable entry. Malformed entries never fail construction: they are + recorded per-name, returned joined from LoadEnv, and surface from Parse + only when that name is actually referenced (matching go-llm's + fail-on-use behavior). +- **Lazy (go-llm parity):** an unknown provider name in Parse falls back to + `LLM_{UPPER(name, - → _)}`, so hyphenated spec names (`my-prov/x` → + `LLM_MY_PROV`) work exactly as in go-llm. Lazily resolved providers are + cached in the registry. +- The DSN **scheme** selects a `SchemeFactory` (foreman, ollama, + ollama-cloud, openai, anthropic, google, gemini; extensible via + `RegisterScheme`). The factory receives the registry name and the parsed + DSN (token = credential, `https://host` = base URL). + +## Consequences + +- Existing muscle memory carries over: every go-llm-resolvable LLM_* var + resolves identically here. +- Eager loading additionally makes env providers visible to discovery + (`Provider(name)`) before first use. +- An env DSN cannot express plain-http endpoints (https is forced) — same + limitation as go-llm, kept deliberately for parity; local Ollama uses the + `ollama` provider's own default (`http://localhost:11434`) rather than a + DSN. + +## Alternatives considered + +- `url.Parse`-based DSN parsing: subtly different (percent-decoding, + userinfo passwords). Parity wins. Rejected. +- Failing New() on malformed LLM_* vars: one stray var would break every + consumer at startup. Rejected. diff --git a/docs/adr/0005-provider-capabilities.md b/docs/adr/0005-provider-capabilities.md new file mode 100644 index 0000000..bfc4636 --- /dev/null +++ b/docs/adr/0005-provider-capabilities.md @@ -0,0 +1,41 @@ +# ADR-0005: Provider interface and the capabilities model + +**Status:** Accepted — 2026-06-10 + +## Context + +Each provider — and some individual models — imposes different limits (image +dimensions/bytes/MIME/count, tools, structured output, streaming, context +size). Callers must not need to know them; the library must normalize or +clearly reject. + +## Decision + +- `Provider` is minimal: `Name()` and `Model(id, opts...) (Model, error)`. + Model ids pass through verbatim; providers never validate ids against a + catalog (models churn weekly; catalogs rot). +- `Capabilities` is a plain struct declared **per provider** with + **per-model overrides** via `WithCapabilities` (a `ModelOption`). Zero + values mean: `MaxImagesPerReq == 0` → images unsupported; + `MaxImageBytes/MaxImageDimension/ContextWindow == 0` → no declared limit; + empty `AllowedImageMIME` → any type. +- Providers construct without error even when credentials are missing; the + failure surfaces as an auth error at request time (and a chain can fail + over past it). Construction-time validation would make `New()` fragile. +- Until a provider's implementation phase lands, built-ins register as + **stubs**: they resolve in Parse (so chains, aliases, and env DSNs are + fully functional) and return a clear "not implemented yet" error on use. + +## Consequences + +- The media pipeline (Phase 3, ADR to follow) can normalize against any + target uniformly. +- Adding a provider is additive: implement two methods + declare + capabilities. + +## Alternatives considered + +- Capability methods on Model with provider-specific logic — pushes limits + knowledge into every caller. Rejected. +- Model catalogs with validation — stale within weeks, breaks pass-through + targets like foreman. Rejected. diff --git a/docs/adr/0006-health-and-backoff.md b/docs/adr/0006-health-and-backoff.md new file mode 100644 index 0000000..2fb81cb --- /dev/null +++ b/docs/adr/0006-health-and-backoff.md @@ -0,0 +1,48 @@ +# ADR-0006: Model health tracking and backoff + +**Status:** Accepted — 2026-06-10 + +## Context + +Ollama Cloud models intermittently return "high demand" errors. mort's +behavior to preserve: one blip should not fail a request (retry); a model +that keeps failing should be benched so chains skip it, then re-admitted +after a cooldown. majordomo owns this (the "model health tracker"). + +## Decision + +In-memory, process-local, thread-safe tracker in `health/`, keyed by +`"provider/model-id"`, with an **injected clock** (`func() time.Time`) so +every backoff path is unit-testable without sleeping. + +- **Classification** (`llm.Classify`, overridable via `ChainConfig.Classify`): + transient = HTTP 408/429/5xx, network timeouts, connection refused/reset, + DNS failures, `context.DeadlineExceeded`; permanent = HTTP + 400/401/403/404/405/422, `ErrModelNotFound`, `context.Canceled` (the + caller gave up — retrying defies intent). **Unknown errors default to + transient**: failing over can only help availability, and a wrongly + benched model self-heals via cooldown, while a wrongly fail-fasted request + is lost. +- **Counting:** every failed transient *attempt* increments the target's + consecutive-failure count; any success resets count **and** backoff + exponent. At threshold (default **2**) the target is benched until + `now + cooldown`, with cooldown = base (default **5s**) × multiplier + (default **2**) per consecutive backoff round, capped (default **5m**). + After the bench triggers, the count resets, so re-benching needs a fresh + run of failures — but at the doubled cooldown. +- All knobs (threshold, base/cap/multiplier, clock, classifier, retry count) + are configuration with the above defaults baked in. +- **No persistence, no interface.** The tracker is a concrete type; health + is process-local by design (out-of-scope guardrail). A consumer wanting + shared state can wrap the registry; we do not build for it now. + +## Consequences + +- Deterministic tests via fake clock; no `time.Sleep` anywhere. +- Two providers addressing the same upstream model (e.g. `m1/x` and `m5/x`) + track independently — correct, since the backends are different machines. + +## Alternatives considered + +- Persistent/pluggable health store — explicitly out of scope. Rejected. +- Unknown→permanent default — drops availability on novel errors. Rejected. diff --git a/docs/adr/0007-dependency-policy.md b/docs/adr/0007-dependency-policy.md new file mode 100644 index 0000000..9b3dad5 --- /dev/null +++ b/docs/adr/0007-dependency-policy.md @@ -0,0 +1,31 @@ +# ADR-0007: Dependency policy — stdlib-first, hand-rolled REST clients + +**Status:** Accepted — 2026-06-10 + +## Context + +go-llm leans on SDKs (openai-go, go-anthropic, genai) and carries their +transitive weight and churn. The kickoff mandates minimal dependencies with +full control over multimodal payloads and capability handling. + +## Decision + +- **Hand-rolled `net/http` JSON clients** for OpenAI(+compatible), + Anthropic(+compatible), Ollama (cloud + local), and foreman. Their REST + surfaces are small and stable; owning the wire shapes gives exact control + over tool calls, structured output, streaming, and image payloads. +- **One approved third-party dependency:** the official Google Gen AI Go SDK + (`google.golang.org/genai`) for the Gemini provider — Google's surface + moves too much to hand-roll profitably. +- Image normalization uses stdlib `image`, `image/jpeg`, `image/png`. + `golang.org/x/image` may be added **only** if a needed format demands it, + via a new ADR. +- Any other third-party dependency requires its own ADR justifying it. +- No persistent store, no metrics stack, no config framework, no CLI beyond + `examples/` (out-of-scope guardrails). + +## Consequences + +- `go.mod` stays near-empty; consumers inherit almost nothing transitively. +- We own wire-format drift: provider docs are verified against current + documentation at implementation time and recorded in the provider ADRs. diff --git a/docs/adr/0008-chain-semantics.md b/docs/adr/0008-chain-semantics.md new file mode 100644 index 0000000..1a297a3 --- /dev/null +++ b/docs/adr/0008-chain-semantics.md @@ -0,0 +1,60 @@ +# ADR-0008: Failover-chain execution semantics + +**Status:** Accepted — 2026-06-10 + +## Context + +A parsed spec is an ordered chain of targets sharing the registry's health +tracker. The executor must realize the kickoff's failover story (retry one +blip; bench repeat offenders; skip benched targets; clear exhaustion errors) +identically for chains of one and many. + +## Decision + +For each request, iterate elements head-to-tail: + +1. **Skip** targets currently benched (recorded in the exhaustion error). +2. Attempt the target. On success → report success (resets health), return. +3. On error, classify: + - **Permanent + model-not-found** → advance, no health penalty. + - **Permanent otherwise** (auth, malformed) → **fail fast** by default — + failing over cannot fix a bad request; `ChainConfig.AdvanceOnPermanent` + flips this for callers who prefer availability. + - **Transient** → report the failed attempt to the tracker; retry the + same target while attempts remain (`TransientRetries`, default 1) + **unless the tracker just benched it**, in which case advance + immediately. +4. All elements failed/skipped → return `errors.Join(ErrChainExhausted, + per-target reasons...)` naming every target and why. + +Other decisions: + +- **Capabilities() = head element's capabilities.** The head is the + preferred target and the honest answer to "what should I prepare for?". + Per-attempt media normalization (Phase 3) uses the *actual* target's + capabilities, so fallbacks still get correctly-fitted inputs. + Intersection semantics were rejected: a rarely-used tail fallback would + artificially constrain every request. +- **Streaming failover applies to stream establishment only.** Once a + stream is open, mid-stream errors propagate; silently restarting on + another target would re-deliver partial output. +- `context.Canceled` aborts the chain immediately between and during + attempts. +- Duplicate post-expansion elements were already dropped at Parse + (ADR-0003). + +## Consequences + +- "One transient error is fine" holds: blip → same-target retry succeeds, + no failover, one health mark that the success immediately clears... and + with default knobs (retries=1, threshold=2) a target whose retry also + fails is benched in the same request and the chain advances — exactly the + kickoff narrative. +- Single-target specs get the same retry/backoff behavior for free. + +## Alternatives considered + +- Per-request (not per-attempt) failure counting — needs two failed + *requests* to bench, letting a dead model eat the retry budget twice. + Rejected as weaker than the kickoff's story. +- Intersection capabilities — see above. Rejected. diff --git a/docs/adr/README.md b/docs/adr/README.md new file mode 100644 index 0000000..6346769 --- /dev/null +++ b/docs/adr/README.md @@ -0,0 +1,14 @@ +# Architecture Decision Records + +One decision per file, append-only; supersede rather than rewrite. + +| ADR | Title | Status | +|-----|-------|--------| +| [0001](0001-package-layout.md) | Package layout — canonical types in leaf `llm`, root re-exports | Accepted | +| [0002](0002-canonical-message-model.md) | Canonical message/content model | Accepted | +| [0003](0003-parse-grammar.md) | Parse grammar — verbatim ids, inline alias expansion, chains | Accepted | +| [0004](0004-env-dsn-providers.md) | LLM_* env-DSN provider definitions (go-llm parity + eager load) | Accepted | +| [0005](0005-provider-capabilities.md) | Provider interface and capabilities model | Accepted | +| [0006](0006-health-and-backoff.md) | Model health tracking and backoff | Accepted | +| [0007](0007-dependency-policy.md) | Dependency policy — stdlib-first, hand-rolled REST clients | Accepted | +| [0008](0008-chain-semantics.md) | Failover-chain execution semantics | Accepted | diff --git a/docs/phase-1-design.md b/docs/phase-1-design.md new file mode 100644 index 0000000..f31cc3b --- /dev/null +++ b/docs/phase-1-design.md @@ -0,0 +1,84 @@ +# Phase 1 design summary (for after-the-fact review) + +Written at the Phase 1 → 2 boundary of the unattended build run +(2026-06-10). Captures the public surface and the decisions behind it. +Authoritative details live in the ADRs; this is the review digest. + +## What the library looks like to a consumer + +```go +reg := majordomo.New() // built-ins + LLM_* env providers +reg.RegisterAlias("thinking", "anthropic/opus-4.8,ollama-cloud/minimax-m3:cloud") + +m, err := reg.Parse("m5/qwen3:30b,ollama-cloud/kimi-k2.6:cloud,thinking") +resp, err := m.Generate(ctx, majordomo.Request{ + System: "You are terse.", + Messages: []majordomo.Message{majordomo.UserText("hi")}, +}, majordomo.WithMaxTokens(200)) +``` + +- `Model` = `Generate` / `Stream` / `Capabilities`; a chain and a single + target are the same interface. +- `Provider` = `Name` / `Model(id, opts...)`; ids verbatim, no catalogs. +- Canonical types live in `majordomo/llm`, re-exported at root via aliases + (ADR-0001) — providers import `llm` only. + +## Parse grammar (ADR-0003) + +`spec := element ("," element)*`; element = `provider/model` (model id = +everything after the first slash, verbatim) or a bare alias token expanded +inline + recursively with cycle detection. Both kickoff README examples are +covered by tests, including the trailing-`thinking` variant and dedup of +overlapping alias expansions. + +**Deviation from go-llm worth reviewing:** no `:low/:medium/:high` +reasoning-suffix stripping — it conflicts with verbatim ids +(`minimax-m3:cloud`, `richardyoung/qwen3-14b-abliterated:q4_K_M` in mort's +tiers). Plan: reasoning effort becomes an explicit request option when +providers land; mort's wrapper translates its legacy suffix dialect during +Phase 9. If you want suffix parity instead, it's an additive change behind +a RegistryOption. + +## LLM_* env DSNs (ADR-0004) + +Parser is byte-for-byte go-llm (`scheme://[token@]host[/path]`, https +forced, fail-on-use for malformed values). Two resolution paths: +eager scan in `New()`/`LoadEnv(map)` (kickoff requirement; +`LLM_M1` → provider `m1`) **plus** go-llm's lazy `LLM_{UPPER(name)}` +fallback at Parse time (so hyphenated names keep working). Schemes are +factories (`RegisterScheme`) — consumers can bind custom provider kinds to +DSNs. + +## Health & chains (ADR-0006, ADR-0008) + +Clock-injected in-memory tracker keyed `provider/model`. Transient vs +permanent via `llm.Classify` (unknown → transient; `context.Canceled` → +permanent). Defaults: 1 same-target retry; bench after 2 consecutive failed +attempts; cooldown 5s ×2 capped 5m; success resets everything. Chains skip +benched targets, advance penalty-free on 404, fail fast on auth/malformed +(flippable via `AdvanceOnPermanent`), and join per-target reasons on +exhaustion. Chain `Capabilities()` = head element (per-attempt media +normalization will use the actual target, Phase 3). Streaming failover +covers stream establishment only. + +## Flagged for reconsideration + +1. **Reasoning suffixes** (above) — deliberate deviation, easy to add back. +2. **Duplicate-element dedup in chains** (first occurrence wins): right for + health semantics, but means `a,b,a` won't retry `a` at the tail even + after `b` fails. Believed correct (same request, same bench state); + flag if "retry head last" matters to you. +3. **`AdvanceOnPermanent` default = fail-fast** on auth/malformed errors: + matches the kickoff; mort's old behavior was closer to + advance-on-everything. Phase 9 can set the flag per-registry if mort's + UX prefers availability. +4. **Stub built-ins**: until Phases 3–4, `openai/...` etc. parse fine and + error on use with "not implemented yet". Chains mixing stubs and real + providers will fail over past stubs naturally (the error classifies + transient) — temporary, gone by Phase 4. + +## ADR set + +0001 package layout · 0002 message model · 0003 parse grammar · +0004 env DSNs · 0005 provider/capabilities · 0006 health/backoff · +0007 dependency policy · 0008 chain semantics diff --git a/env.go b/env.go new file mode 100644 index 0000000..5ccd553 --- /dev/null +++ b/env.go @@ -0,0 +1,120 @@ +package majordomo + +import ( + "errors" + "fmt" + "sort" + "strings" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// ErrInvalidDSN reports a malformed env-DSN value. +var ErrInvalidDSN = errors.New("invalid DSN") + +// ErrUnknownProvider reports a spec element whose provider could not be +// resolved through the registry or the LLM_* environment. +var ErrUnknownProvider = errors.New("unknown provider") + +// DSN is a parsed provider Data Source Name, as used in LLM_* env vars. +// +// Format (go-llm parity): scheme://[token@]host[/path] +// +// LLM_M1=foreman://test-token@foreman-m1.example.com +// +// defines provider "m1": a foreman target at https://foreman-m1.example.com +// authenticated with the bearer token "test-token". +type DSN struct { + // Scheme selects the provider implementation: "foreman", "ollama", + // "ollama-cloud", "openai", "anthropic", "google"/"gemini", or any + // custom scheme registered with RegisterScheme. + Scheme string + // Token is the provider secret (bearer token or API key); empty = none. + Token string + // Host is hostname[:port][/path] with no scheme prefix and no trailing + // slash. + Host string +} + +// BaseURL returns the https base URL for the DSN host (go-llm parity: +// env-defined providers always speak TLS). +func (d DSN) BaseURL() string { return "https://" + d.Host } + +// ParseDSN parses a raw DSN string. The algorithm matches go-llm exactly: +// split on "://", then an optional "@" separates the token from the host; +// trailing slashes on the host are trimmed. +func ParseDSN(raw string) (DSN, error) { + scheme, rest, found := strings.Cut(raw, "://") + if !found { + return DSN{}, fmt.Errorf("%w: missing scheme://: %q", ErrInvalidDSN, raw) + } + + var token, host string + if before, after, hasAt := strings.Cut(rest, "@"); hasAt { + token = before + host = after + } else { + host = rest + } + host = strings.TrimRight(host, "/") + if host == "" { + return DSN{}, fmt.Errorf("%w: missing host: %q", ErrInvalidDSN, raw) + } + return DSN{Scheme: scheme, Token: token, Host: host}, nil +} + +// LoadEnv registers a provider for every LLM_ entry in env. is +// lowercased to form the registry name (LLM_M1 → "m1"); the value is a DSN +// whose scheme selects the factory. Entries that fail to parse are recorded +// and their error is returned (joined) — and also surfaces later if the +// name is referenced in Parse — but valid entries always register. +// +// New() calls this with the process environment; tests call it explicitly. +func (r *Registry) LoadEnv(env map[string]string) error { + // Deterministic order makes error output stable. + keys := make([]string, 0, len(env)) + for k := range env { + if strings.HasPrefix(k, "LLM_") && len(k) > len("LLM_") { + keys = append(keys, k) + } + } + sort.Strings(keys) + + var errs []error + for _, key := range keys { + name := strings.ToLower(strings.TrimPrefix(key, "LLM_")) + p, err := r.providerFromDSN(name, env[key]) + if err != nil { + err = fmt.Errorf("%s: %w", key, err) + errs = append(errs, err) + r.mu.Lock() + r.envErrs[name] = err + r.mu.Unlock() + continue + } + r.mu.Lock() + r.providers[name] = p + delete(r.envErrs, name) + r.mu.Unlock() + } + return errors.Join(errs...) +} + +// providerFromDSN parses a DSN and builds a provider via its scheme factory. +func (r *Registry) providerFromDSN(name, raw string) (llm.Provider, error) { + dsn, err := ParseDSN(raw) + if err != nil { + return nil, err + } + r.mu.RLock() + factory, ok := r.schemes[dsn.Scheme] + r.mu.RUnlock() + if !ok { + return nil, fmt.Errorf("%w: DSN scheme %q is not a registered scheme", ErrUnknownProvider, dsn.Scheme) + } + p, err := factory(name, dsn) + if err != nil { + return nil, fmt.Errorf("scheme %q: %w", dsn.Scheme, err) + } + return p, nil +} diff --git a/env_test.go b/env_test.go new file mode 100644 index 0000000..dda67bd --- /dev/null +++ b/env_test.go @@ -0,0 +1,195 @@ +package majordomo + +import ( + "errors" + "slices" + "strings" + "testing" +) + +func TestParseDSN(t *testing.T) { + tests := []struct { + raw string + want DSN + wantErr error + }{ + { + raw: "foreman://test-token-change-me@foreman-m1.orgrimmar.dudenhoeffer.casa", + want: DSN{Scheme: "foreman", Token: "test-token-change-me", Host: "foreman-m1.orgrimmar.dudenhoeffer.casa"}, + }, + { + raw: "ollama://my-host.example:11434", + want: DSN{Scheme: "ollama", Token: "", Host: "my-host.example:11434"}, + }, + { + raw: "openai://sk-key@api.example.com/v1/", + want: DSN{Scheme: "openai", Token: "sk-key", Host: "api.example.com/v1"}, + }, + {raw: "no-scheme-here", wantErr: ErrInvalidDSN}, + {raw: "foreman://token@", wantErr: ErrInvalidDSN}, + {raw: "foreman:///", wantErr: ErrInvalidDSN}, + } + for _, tt := range tests { + got, err := ParseDSN(tt.raw) + if tt.wantErr != nil { + if !errors.Is(err, tt.wantErr) { + t.Errorf("ParseDSN(%q) error = %v, want %v", tt.raw, err, tt.wantErr) + } + continue + } + if err != nil { + t.Errorf("ParseDSN(%q): %v", tt.raw, err) + continue + } + if got != tt.want { + t.Errorf("ParseDSN(%q) = %+v, want %+v", tt.raw, got, tt.want) + } + } +} + +func TestDSNBaseURL(t *testing.T) { + d := DSN{Scheme: "foreman", Host: "h.example:8443/base"} + if got, want := d.BaseURL(), "https://h.example:8443/base"; got != want { + t.Errorf("BaseURL = %q, want %q", got, want) + } +} + +// TestLoadEnvForeman covers the required behavior: an LLM_* foreman DSN +// defines a named provider that is first-class in Parse and in chains. +func TestLoadEnvForeman(t *testing.T) { + r := newTestRegistry(t) + err := r.LoadEnv(map[string]string{ + "LLM_M1": "foreman://test-token-change-me@foreman-m1.orgrimmar.dudenhoeffer.casa", + "LLM_M5": "foreman://test-token-change-me@foreman-m5.orgrimmar.dudenhoeffer.casa", + }) + if err != nil { + t.Fatalf("LoadEnv: %v", err) + } + + for _, name := range []string{"m1", "m5"} { + p, ok := r.Provider(name) + if !ok { + t.Fatalf("provider %q not registered", name) + } + sp, ok := p.(*stubProvider) + if !ok { + t.Fatalf("provider %q is %T, want *stubProvider (phase 1)", name, p) + } + if sp.kind != ProviderForeman { + t.Errorf("provider %q kind = %q, want foreman", name, sp.kind) + } + wantURL := "https://foreman-" + name + ".orgrimmar.dudenhoeffer.casa" + if sp.baseURL != wantURL { + t.Errorf("provider %q baseURL = %q, want %q", name, sp.baseURL, wantURL) + } + if sp.token != "test-token-change-me" { + t.Errorf("provider %q token = %q, want the DSN userinfo", name, sp.token) + } + } + + // Env-defined providers are first-class chain elements alongside + // built-ins and aliases. + r.RegisterAlias("thinking", "anthropic/opus-4.8") + m, err := r.Parse("m5/qwen3:30b,m1/qwen3:30b,thinking") + if err != nil { + t.Fatalf("Parse: %v", err) + } + want := []string{"m5/qwen3:30b", "m1/qwen3:30b", "anthropic/opus-4.8"} + if got := targetsOf(t, m); !slices.Equal(got, want) { + t.Errorf("targets = %v, want %v", got, want) + } +} + +func TestLoadEnvNameNormalization(t *testing.T) { + r := newTestRegistry(t) + if err := r.LoadEnv(map[string]string{"LLM_MY_BOX": "ollama://my-box.example"}); err != nil { + t.Fatalf("LoadEnv: %v", err) + } + if _, ok := r.Provider("my_box"); !ok { + t.Error("LLM_MY_BOX should register provider \"my_box\"") + } +} + +func TestLoadEnvIgnoresNonLLMVars(t *testing.T) { + r := newTestRegistry(t) + if err := r.LoadEnv(map[string]string{ + "PATH": "/usr/bin", + "LLM_": "foreman://x@h", + "NOT_LLM_": "foreman://x@h", + }); err != nil { + t.Fatalf("LoadEnv: %v", err) + } + if _, ok := r.Provider(""); ok { + t.Error("empty-suffix LLM_ var must not register a provider") + } +} + +func TestLoadEnvInvalidDSN(t *testing.T) { + r := newTestRegistry(t) + err := r.LoadEnv(map[string]string{ + "LLM_BAD": "not-a-dsn", + "LLM_GOOD": "foreman://tok@good.example", + }) + if !errors.Is(err, ErrInvalidDSN) { + t.Errorf("LoadEnv error = %v, want ErrInvalidDSN", err) + } + // The valid entry still registered. + if _, ok := r.Provider("good"); !ok { + t.Error("valid LLM_GOOD entry should register despite LLM_BAD failing") + } + // The invalid entry's error surfaces when the name is used. + _, perr := r.Parse("bad/some-model") + if perr == nil || !strings.Contains(perr.Error(), "LLM_BAD") { + t.Errorf("Parse(bad/...) error = %v, want recorded LLM_BAD load error", perr) + } +} + +func TestLoadEnvUnknownScheme(t *testing.T) { + r := newTestRegistry(t) + err := r.LoadEnv(map[string]string{"LLM_X": "zorp://tok@host.example"}) + if !errors.Is(err, ErrUnknownProvider) { + t.Errorf("LoadEnv error = %v, want ErrUnknownProvider", err) + } + if err == nil || !strings.Contains(err.Error(), `"zorp"`) { + t.Errorf("error %v should name the unknown scheme", err) + } +} + +// TestLazyEnvFallback covers go-llm parity: a provider name that is not +// registered resolves through LLM_{UPPER(name)} at Parse time. +func TestLazyEnvFallback(t *testing.T) { + env := map[string]string{ + "LLM_M9": "foreman://lazy-token@foreman-m9.example", + "LLM_MY_PROV": "ollama://my-prov.example", + } + r := New( + WithoutEnvProviders(), + WithEnvLookup(func(k string) string { return env[k] }), + ) + + m, err := r.Parse("m9/qwen3:30b") + if err != nil { + t.Fatalf("Parse(m9/...): %v", err) + } + if got := targetsOf(t, m); !slices.Equal(got, []string{"m9/qwen3:30b"}) { + t.Errorf("targets = %v", got) + } + // The lazily-resolved provider is cached. + if _, ok := r.Provider("m9"); !ok { + t.Error("lazy env provider should be cached in the registry") + } + + // Hyphenated names map to underscored env vars (go-llm parity). + if _, err := r.Parse("my-prov/llama3"); err != nil { + t.Errorf("Parse(my-prov/...): %v", err) + } +} + +// TestNewLoadsProcessEnv covers the eager scan in New(). +func TestNewLoadsProcessEnv(t *testing.T) { + t.Setenv("LLM_ENVTEST", "foreman://tok@envtest.example") + r := New(WithEnvLookup(func(string) string { return "" })) + if _, ok := r.Provider("envtest"); !ok { + t.Error("New() should eagerly load LLM_ENVTEST from the process environment") + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9db200e --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module gitea.stevedudenhoeffer.com/steve/majordomo + +go 1.26 diff --git a/health/health.go b/health/health.go new file mode 100644 index 0000000..e31146c --- /dev/null +++ b/health/health.go @@ -0,0 +1,163 @@ +// Package health tracks per-target model health for failover decisions. +// +// Why: a failover chain must skip targets that are repeatedly failing +// ("backed off") and re-admit them after a cooldown, without any persistent +// state or background goroutines. The tracker is in-memory, process-local, +// thread-safe, and clock-injected so backoff is unit-testable. +// +// Semantics (see ADR-0006): +// - One transient failure increments a consecutive-failure count. +// - Reaching the failure threshold (default 2) backs the target off until +// now + cooldown. Cooldown grows exponentially per consecutive backoff +// (default base 5s, x2 each time, capped at 5m). +// - Any success fully resets the target: failure count and backoff +// history both clear. +package health + +import ( + "sync" + "time" +) + +// Default configuration values. +const ( + DefaultFailureThreshold = 2 + DefaultBaseCooldown = 5 * time.Second + DefaultMaxCooldown = 5 * time.Minute + DefaultMultiplier = 2.0 +) + +// Clock supplies the current time; injected for tests. +type Clock func() time.Time + +// Config tunes the tracker. Zero values select the defaults above. +type Config struct { + // FailureThreshold is the number of consecutive transient failures that + // triggers a backoff. + FailureThreshold int + // BaseCooldown is the first backoff duration. + BaseCooldown time.Duration + // MaxCooldown caps the exponential growth. + MaxCooldown time.Duration + // Multiplier scales the cooldown per consecutive backoff. + Multiplier float64 + // Clock supplies the current time (defaults to time.Now). + Clock Clock +} + +func (c Config) withDefaults() Config { + if c.FailureThreshold <= 0 { + c.FailureThreshold = DefaultFailureThreshold + } + if c.BaseCooldown <= 0 { + c.BaseCooldown = DefaultBaseCooldown + } + if c.MaxCooldown <= 0 { + c.MaxCooldown = DefaultMaxCooldown + } + if c.Multiplier <= 1 { + c.Multiplier = DefaultMultiplier + } + if c.Clock == nil { + c.Clock = time.Now + } + return c +} + +// Tracker records per-key health. Keys are opaque; majordomo uses +// "provider/model-id". +// +// Tracker is an interface-free concrete type on purpose: consumers that want +// persistence can wrap it behind their own interface; majordomo itself stays +// in-memory (ADR-0006). +type Tracker struct { + mu sync.Mutex + cfg Config + entries map[string]*entry +} + +type entry struct { + // consecutiveFailures counts transient failures since the last success + // or backoff trigger. + consecutiveFailures int + // backoffs counts consecutive backoff rounds since the last success; + // it drives the exponential cooldown. + backoffs int + // until is the moment the current backoff expires (zero = not backed off). + until time.Time +} + +// NewTracker creates a tracker with the given configuration. +func NewTracker(cfg Config) *Tracker { + return &Tracker{cfg: cfg.withDefaults(), entries: make(map[string]*entry)} +} + +// Available reports whether the key is currently usable (not backed off). +func (t *Tracker) Available(key string) bool { + t.mu.Lock() + defer t.mu.Unlock() + e, ok := t.entries[key] + if !ok { + return true + } + return !t.cfg.Clock().Before(e.until) +} + +// ReportSuccess resets the key's failure count and backoff history. +func (t *Tracker) ReportSuccess(key string) { + t.mu.Lock() + defer t.mu.Unlock() + delete(t.entries, key) +} + +// ReportFailure records a transient failure. When the consecutive-failure +// count reaches the threshold the key is backed off and the method reports +// true; the count then resets so re-admission requires a fresh run of +// failures to trigger the next (longer) backoff. +func (t *Tracker) ReportFailure(key string) (backedOff bool) { + t.mu.Lock() + defer t.mu.Unlock() + e, ok := t.entries[key] + if !ok { + e = &entry{} + t.entries[key] = e + } + e.consecutiveFailures++ + if e.consecutiveFailures < t.cfg.FailureThreshold { + return false + } + cooldown := t.cooldownFor(e.backoffs) + e.until = t.cfg.Clock().Add(cooldown) + e.backoffs++ + e.consecutiveFailures = 0 + return true +} + +// BackedOffUntil returns the end of the key's current backoff window, or the +// zero time when the key is not backed off. Useful for diagnostics and error +// messages. +func (t *Tracker) BackedOffUntil(key string) time.Time { + t.mu.Lock() + defer t.mu.Unlock() + e, ok := t.entries[key] + if !ok || !t.cfg.Clock().Before(e.until) { + return time.Time{} + } + return e.until +} + +// cooldownFor computes the cooldown for the n-th consecutive backoff +// (0-based): base * multiplier^n, capped at MaxCooldown. +func (t *Tracker) cooldownFor(n int) time.Duration { + d := float64(t.cfg.BaseCooldown) + for range n { + d *= t.cfg.Multiplier + if time.Duration(d) >= t.cfg.MaxCooldown { + return t.cfg.MaxCooldown + } + } + if time.Duration(d) > t.cfg.MaxCooldown { + return t.cfg.MaxCooldown + } + return time.Duration(d) +} diff --git a/health/health_test.go b/health/health_test.go new file mode 100644 index 0000000..1e1e915 --- /dev/null +++ b/health/health_test.go @@ -0,0 +1,165 @@ +package health + +import ( + "sync" + "testing" + "time" +) + +// fakeClock is a manually-advanced clock for deterministic backoff tests. +type fakeClock struct { + mu sync.Mutex + now time.Time +} + +func newFakeClock() *fakeClock { + return &fakeClock{now: time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)} +} + +func (c *fakeClock) Now() time.Time { + c.mu.Lock() + defer c.mu.Unlock() + return c.now +} + +func (c *fakeClock) Advance(d time.Duration) { + c.mu.Lock() + defer c.mu.Unlock() + c.now = c.now.Add(d) +} + +func newTestTracker(clock *fakeClock) *Tracker { + return NewTracker(Config{ + FailureThreshold: 2, + BaseCooldown: 5 * time.Second, + MaxCooldown: 5 * time.Minute, + Multiplier: 2, + Clock: clock.Now, + }) +} + +func TestSingleFailureStaysAvailable(t *testing.T) { + clock := newFakeClock() + tr := newTestTracker(clock) + if backedOff := tr.ReportFailure("k"); backedOff { + t.Error("first failure must not back off") + } + if !tr.Available("k") { + t.Error("key should remain available after one failure") + } +} + +func TestThresholdTriggersBackoff(t *testing.T) { + clock := newFakeClock() + tr := newTestTracker(clock) + tr.ReportFailure("k") + if backedOff := tr.ReportFailure("k"); !backedOff { + t.Error("second consecutive failure should back off") + } + if tr.Available("k") { + t.Error("key should be unavailable during backoff") + } + if until := tr.BackedOffUntil("k"); !until.Equal(clock.Now().Add(5 * time.Second)) { + t.Errorf("BackedOffUntil = %v, want now+5s", until) + } +} + +func TestCooldownExpiryReadmits(t *testing.T) { + clock := newFakeClock() + tr := newTestTracker(clock) + tr.ReportFailure("k") + tr.ReportFailure("k") + clock.Advance(5*time.Second - time.Millisecond) + if tr.Available("k") { + t.Error("still inside cooldown") + } + clock.Advance(time.Millisecond) + if !tr.Available("k") { + t.Error("cooldown expiry should re-admit the key") + } +} + +func TestExponentialCooldownWithCap(t *testing.T) { + clock := newFakeClock() + tr := newTestTracker(clock) + + // Consecutive backoffs: 5s, 10s, 20s, ... capped at 5m. + wantCooldowns := []time.Duration{ + 5 * time.Second, 10 * time.Second, 20 * time.Second, 40 * time.Second, + 80 * time.Second, 160 * time.Second, 5 * time.Minute, 5 * time.Minute, + } + for i, want := range wantCooldowns { + tr.ReportFailure("k") + tr.ReportFailure("k") + until := tr.BackedOffUntil("k") + if got := until.Sub(clock.Now()); got != want { + t.Fatalf("backoff #%d cooldown = %v, want %v", i+1, got, want) + } + clock.Advance(want) + } +} + +func TestSuccessResetsEverything(t *testing.T) { + clock := newFakeClock() + tr := newTestTracker(clock) + + // Build up to a long cooldown... + for range 3 { + tr.ReportFailure("k") + tr.ReportFailure("k") + clock.Advance(tr.BackedOffUntil("k").Sub(clock.Now())) + } + // ...then a success resets both the count and the exponent. + tr.ReportSuccess("k") + tr.ReportFailure("k") + if !tr.Available("k") { + t.Error("one failure after success must not back off") + } + tr.ReportFailure("k") + if got := tr.BackedOffUntil("k").Sub(clock.Now()); got != 5*time.Second { + t.Errorf("post-reset cooldown = %v, want base 5s", got) + } +} + +func TestKeysAreIndependent(t *testing.T) { + clock := newFakeClock() + tr := newTestTracker(clock) + tr.ReportFailure("a") + tr.ReportFailure("a") + if tr.Available("a") { + t.Error("a should be backed off") + } + if !tr.Available("b") { + t.Error("b must be unaffected") + } +} + +func TestDefaultsApplied(t *testing.T) { + tr := NewTracker(Config{}) + if tr.cfg.FailureThreshold != DefaultFailureThreshold || + tr.cfg.BaseCooldown != DefaultBaseCooldown || + tr.cfg.MaxCooldown != DefaultMaxCooldown || + tr.cfg.Multiplier != DefaultMultiplier || + tr.cfg.Clock == nil { + t.Errorf("defaults not applied: %+v", tr.cfg) + } +} + +func TestTrackerConcurrency(t *testing.T) { + clock := newFakeClock() + tr := newTestTracker(clock) + var wg sync.WaitGroup + for i := range 8 { + wg.Add(1) + go func(n int) { + defer wg.Done() + key := []string{"a", "b"}[n%2] + for range 200 { + tr.ReportFailure(key) + tr.Available(key) + tr.ReportSuccess(key) + } + }(i) + } + wg.Wait() +} diff --git a/llm/capabilities.go b/llm/capabilities.go new file mode 100644 index 0000000..c0fbf0c --- /dev/null +++ b/llm/capabilities.go @@ -0,0 +1,45 @@ +package llm + +import "slices" + +// Capabilities declares what a model (or provider) supports and the limits +// it imposes. Providers declare defaults; individual models may override. +// The media pipeline normalizes image inputs against these values before a +// request is serialized. +// +// Zero-value semantics: +// - MaxImagesPerReq == 0 means image input is NOT supported. +// - MaxImageBytes / MaxImageDimension / ContextWindow == 0 mean +// "no declared limit", not zero. +// - AllowedImageMIME empty means any MIME type is acceptable +// (only meaningful when images are supported at all). +type Capabilities struct { + // MaxImageBytes is the largest single image payload, in bytes. + MaxImageBytes int + // MaxImageDimension is the largest allowed width or height, in pixels. + MaxImageDimension int + // AllowedImageMIME lists acceptable image content types + // (e.g. "image/jpeg", "image/png"). + AllowedImageMIME []string + // MaxImagesPerReq is the most images one request may carry; 0 = images + // unsupported. + MaxImagesPerReq int + + SupportsTools bool + SupportsStructured bool + SupportsStreaming bool + + // ContextWindow is the model's context size in tokens, when known. + ContextWindow int +} + +// SupportsImages reports whether the target accepts image input. +func (c Capabilities) SupportsImages() bool { return c.MaxImagesPerReq > 0 } + +// MIMEAllowed reports whether the given image MIME type is acceptable. +func (c Capabilities) MIMEAllowed(mime string) bool { + if len(c.AllowedImageMIME) == 0 { + return true + } + return slices.Contains(c.AllowedImageMIME, mime) +} diff --git a/llm/content.go b/llm/content.go new file mode 100644 index 0000000..c0219c8 --- /dev/null +++ b/llm/content.go @@ -0,0 +1,39 @@ +package llm + +// Part is one piece of message content: text, an image, or future media +// kinds. The set of implementations is closed (sealed by the unexported +// method) so providers can switch exhaustively over content kinds. +// +// Why: providers need a finite, known content vocabulary to serialize into +// their wire formats; an open interface would silently drop unknown content. +type Part interface { + isPart() +} + +// TextPart is plain text content. +type TextPart struct { + Text string +} + +func (TextPart) isPart() {} + +// ImagePart is image content carried as raw bytes plus a MIME type. +// +// Why bytes-only (no URL form): the media pipeline must be able to inspect, +// downscale, and re-encode every image to fit the target's capabilities, and +// that requires the bytes. Callers with a URL fetch it themselves; majordomo +// does not download remote content on a caller's behalf. +type ImagePart struct { + // MIME is the image content type, e.g. "image/png" or "image/jpeg". + MIME string + // Data is the raw, unencoded image bytes (providers base64 as needed). + Data []byte +} + +func (ImagePart) isPart() {} + +// Text constructs a text content part. +func Text(s string) Part { return TextPart{Text: s} } + +// Image constructs an image content part from raw bytes. +func Image(mime string, data []byte) Part { return ImagePart{MIME: mime, Data: data} } diff --git a/llm/errors.go b/llm/errors.go new file mode 100644 index 0000000..985569c --- /dev/null +++ b/llm/errors.go @@ -0,0 +1,119 @@ +package llm + +import ( + "context" + "errors" + "fmt" + "net" + "net/http" + "strings" + "syscall" +) + +// ErrorClass buckets errors for retry/failover decisions. +type ErrorClass int + +const ( + // ClassTransient errors may succeed on retry or on another target: + // rate limits, server errors, timeouts, connection failures. + ClassTransient ErrorClass = iota + // ClassPermanent errors will not improve on retry of the same request: + // malformed requests, auth failures, model-not-found. + ClassPermanent +) + +// ErrModelNotFound marks a permanent "this target does not know this model" +// condition. Chains advance past it without penalizing the target's health. +var ErrModelNotFound = errors.New("model not found") + +// APIError is a structured provider error carrying enough context to +// classify it and to debug it. +type APIError struct { + // Provider and Model identify the target that failed. + Provider string + Model string + + // Status is the HTTP status code, or 0 when the failure was not an HTTP + // response (connection error, decode error, ...). + Status int + + // Code is the provider-specific error code, when one was supplied. + Code string + + // Message is the provider's human-readable error message. + Message string + + // Err is the wrapped underlying cause, if any. + Err error +} + +func (e *APIError) Error() string { + var b strings.Builder + fmt.Fprintf(&b, "%s/%s", e.Provider, e.Model) + if e.Status != 0 { + fmt.Fprintf(&b, ": HTTP %d", e.Status) + } + if e.Code != "" { + fmt.Fprintf(&b, " [%s]", e.Code) + } + if e.Message != "" { + fmt.Fprintf(&b, ": %s", e.Message) + } + if e.Err != nil { + fmt.Fprintf(&b, ": %v", e.Err) + } + return b.String() +} + +func (e *APIError) Unwrap() error { + if e.Err != nil { + return e.Err + } + if e.Status == http.StatusNotFound { + return ErrModelNotFound + } + return nil +} + +// Classify buckets an error as transient or permanent. +// +// The default policy (overridable via health configuration): +// - context.Canceled is permanent — the caller gave up; retrying defies +// their intent. context.DeadlineExceeded is transient. +// - Network timeouts, refused/reset connections, and DNS failures are +// transient ("high demand" conditions). +// - HTTP 400/401/403/404/405/422 (and ErrModelNotFound) are permanent; +// 408/429 and all 5xx are transient. +// - Anything unrecognized is transient: when in doubt, failing over to the +// next target in a chain can only help availability. +func Classify(err error) ErrorClass { + if err == nil { + return ClassTransient + } + if errors.Is(err, context.Canceled) { + return ClassPermanent + } + if errors.Is(err, context.DeadlineExceeded) { + return ClassTransient + } + if errors.Is(err, ErrModelNotFound) { + return ClassPermanent + } + if errors.Is(err, syscall.ECONNREFUSED) || errors.Is(err, syscall.ECONNRESET) { + return ClassTransient + } + if _, ok := errors.AsType[net.Error](err); ok { + return ClassTransient + } + if apiErr, ok := errors.AsType[*APIError](err); ok && apiErr.Status != 0 { + switch { + case apiErr.Status == http.StatusRequestTimeout, // 408 + apiErr.Status == http.StatusTooManyRequests, // 429 + apiErr.Status >= 500: + return ClassTransient + case apiErr.Status >= 400: + return ClassPermanent + } + } + return ClassTransient +} diff --git a/llm/errors_test.go b/llm/errors_test.go new file mode 100644 index 0000000..231532a --- /dev/null +++ b/llm/errors_test.go @@ -0,0 +1,84 @@ +package llm + +import ( + "context" + "errors" + "fmt" + "net" + "strings" + "syscall" + "testing" +) + +type fakeNetErr struct{ timeout bool } + +func (e fakeNetErr) Error() string { return "fake net error" } +func (e fakeNetErr) Timeout() bool { return e.timeout } +func (e fakeNetErr) Temporary() bool { return true } + +var _ net.Error = fakeNetErr{} + +func TestClassify(t *testing.T) { + tests := []struct { + name string + err error + want ErrorClass + }{ + {"canceled is permanent", context.Canceled, ClassPermanent}, + {"deadline is transient", context.DeadlineExceeded, ClassTransient}, + {"wrapped canceled", fmt.Errorf("call: %w", context.Canceled), ClassPermanent}, + {"model not found", fmt.Errorf("x: %w", ErrModelNotFound), ClassPermanent}, + {"conn refused", syscall.ECONNREFUSED, ClassTransient}, + {"conn reset", fmt.Errorf("write: %w", syscall.ECONNRESET), ClassTransient}, + {"net timeout", fakeNetErr{timeout: true}, ClassTransient}, + {"http 429", &APIError{Status: 429}, ClassTransient}, + {"http 408", &APIError{Status: 408}, ClassTransient}, + {"http 500", &APIError{Status: 500}, ClassTransient}, + {"http 503", &APIError{Status: 503}, ClassTransient}, + {"http 529", &APIError{Status: 529}, ClassTransient}, + {"http 400", &APIError{Status: 400}, ClassPermanent}, + {"http 401", &APIError{Status: 401}, ClassPermanent}, + {"http 403", &APIError{Status: 403}, ClassPermanent}, + {"http 404", &APIError{Status: 404}, ClassPermanent}, + {"http 422", &APIError{Status: 422}, ClassPermanent}, + {"wrapped api error", fmt.Errorf("call: %w", &APIError{Status: 503}), ClassTransient}, + {"unknown defaults transient", errors.New("mystery"), ClassTransient}, + {"non-http api error defaults transient", &APIError{Message: "decode failed"}, ClassTransient}, + } + for _, tt := range tests { + if got := Classify(tt.err); got != tt.want { + t.Errorf("%s: Classify = %v, want %v", tt.name, got, tt.want) + } + } +} + +func TestAPIError404UnwrapsToModelNotFound(t *testing.T) { + err := &APIError{Provider: "openai", Model: "nope", Status: 404} + if !errors.Is(err, ErrModelNotFound) { + t.Error("404 APIError should match ErrModelNotFound") + } + if errors.Is(&APIError{Status: 500}, ErrModelNotFound) { + t.Error("500 APIError must not match ErrModelNotFound") + } +} + +func TestAPIErrorMessage(t *testing.T) { + err := &APIError{ + Provider: "anthropic", Model: "opus-4.8", + Status: 429, Code: "rate_limit_error", Message: "slow down", + } + got := err.Error() + for _, frag := range []string{"anthropic/opus-4.8", "429", "rate_limit_error", "slow down"} { + if !strings.Contains(got, frag) { + t.Errorf("error string %q missing %q", got, frag) + } + } +} + +func TestAPIErrorUnwrapsCause(t *testing.T) { + cause := errors.New("boom") + err := &APIError{Provider: "p", Model: "m", Err: cause} + if !errors.Is(err, cause) { + t.Error("APIError should unwrap to its cause") + } +} diff --git a/llm/llm.go b/llm/llm.go new file mode 100644 index 0000000..7e4488b --- /dev/null +++ b/llm/llm.go @@ -0,0 +1,12 @@ +// Package llm defines majordomo's canonical, provider-agnostic contract: +// messages and content parts, requests and responses, tools, capabilities, +// streaming, and the Model/Provider interfaces every backend implements. +// +// Why: provider implementations (openai, anthropic, google, ollama, foreman, +// and any client-defined backend) must share one vocabulary without importing +// each other or the root package. This package is the dependency leaf — it +// imports nothing else in the module, and everything else imports it. +// +// Most consumers never import this package directly: the root majordomo +// package re-exports every type here via type aliases. +package llm diff --git a/llm/message.go b/llm/message.go new file mode 100644 index 0000000..78efd10 --- /dev/null +++ b/llm/message.go @@ -0,0 +1,71 @@ +package llm + +import "strings" + +// Role identifies the author of a message. +type Role string + +const ( + RoleSystem Role = "system" + RoleUser Role = "user" + RoleAssistant Role = "assistant" + RoleTool Role = "tool" +) + +// Message is one turn in a conversation. +// +// Exactly which fields are populated depends on the role: user and system +// messages carry Parts; assistant messages carry Parts and/or ToolCalls; +// tool messages carry ToolResults. Providers translate this canonical shape +// to and from their wire formats. +type Message struct { + Role Role + + // Parts is the message content (text, images, ...). + Parts []Part + + // ToolCalls are tool invocations requested by the assistant + // (meaningful only when Role == RoleAssistant). + ToolCalls []ToolCall + + // ToolResults carry the outcomes of earlier ToolCalls + // (meaningful only when Role == RoleTool). + ToolResults []ToolResult +} + +// Text returns the concatenation of all text parts in the message. +func (m Message) Text() string { + var b strings.Builder + for _, p := range m.Parts { + if t, ok := p.(TextPart); ok { + b.WriteString(t.Text) + } + } + return b.String() +} + +// SystemText constructs a system message with one text part. +func SystemText(s string) Message { + return Message{Role: RoleSystem, Parts: []Part{Text(s)}} +} + +// UserText constructs a user message with one text part. +func UserText(s string) Message { + return Message{Role: RoleUser, Parts: []Part{Text(s)}} +} + +// UserParts constructs a user message from arbitrary content parts +// (e.g. text plus images). +func UserParts(parts ...Part) Message { + return Message{Role: RoleUser, Parts: parts} +} + +// AssistantText constructs an assistant message with one text part. +func AssistantText(s string) Message { + return Message{Role: RoleAssistant, Parts: []Part{Text(s)}} +} + +// ToolResultsMessage constructs a tool message carrying one or more results. +func ToolResultsMessage(results ...ToolResult) Message { + return Message{Role: RoleTool, ToolResults: results} +} diff --git a/llm/message_test.go b/llm/message_test.go new file mode 100644 index 0000000..c319a17 --- /dev/null +++ b/llm/message_test.go @@ -0,0 +1,62 @@ +package llm + +import "testing" + +func TestMessageText(t *testing.T) { + m := UserParts(Text("a "), Image("image/png", []byte{1}), Text("b")) + if got := m.Text(); got != "a b" { + t.Errorf("Text = %q, want %q", got, "a b") + } +} + +func TestConstructors(t *testing.T) { + if m := SystemText("s"); m.Role != RoleSystem || m.Text() != "s" { + t.Errorf("SystemText = %+v", m) + } + if m := UserText("u"); m.Role != RoleUser || m.Text() != "u" { + t.Errorf("UserText = %+v", m) + } + if m := AssistantText("a"); m.Role != RoleAssistant || m.Text() != "a" { + t.Errorf("AssistantText = %+v", m) + } + m := ToolResultsMessage(ToolResult{ID: "1", Content: "ok"}) + if m.Role != RoleTool || len(m.ToolResults) != 1 { + t.Errorf("ToolResultsMessage = %+v", m) + } +} + +func TestResponseTextAndMessage(t *testing.T) { + r := &Response{ + Parts: []Part{Text("hello "), Text("world")}, + ToolCalls: []ToolCall{{ID: "1", Name: "t"}}, + } + if got := r.Text(); got != "hello world" { + t.Errorf("Text = %q", got) + } + m := r.Message() + if m.Role != RoleAssistant || m.Text() != "hello world" || len(m.ToolCalls) != 1 { + t.Errorf("Message = %+v", m) + } +} + +func TestUsageAccumulation(t *testing.T) { + u := Usage{InputTokens: 10, OutputTokens: 5} + u.Add(Usage{InputTokens: 1, OutputTokens: 2}) + if u.InputTokens != 11 || u.OutputTokens != 7 || u.Total() != 18 { + t.Errorf("usage = %+v", u) + } +} + +func TestCapabilitiesHelpers(t *testing.T) { + c := Capabilities{} + if c.SupportsImages() { + t.Error("zero MaxImagesPerReq must mean images unsupported") + } + if !c.MIMEAllowed("image/png") { + t.Error("empty AllowedImageMIME must allow any type") + } + c = Capabilities{MaxImagesPerReq: 2, AllowedImageMIME: []string{"image/jpeg"}} + if !c.SupportsImages() || c.MIMEAllowed("image/png") || !c.MIMEAllowed("image/jpeg") { + t.Errorf("capabilities helpers misbehave: %+v", c) + } +} diff --git a/llm/model.go b/llm/model.go new file mode 100644 index 0000000..2479ec0 --- /dev/null +++ b/llm/model.go @@ -0,0 +1,58 @@ +package llm + +import "context" + +// Model is the canonical generation interface. A Model may be a single +// provider-bound target or a failover chain — the two are interchangeable +// and callers never branch on which they got. +type Model interface { + // Generate performs one request/response round trip. + Generate(ctx context.Context, req Request, opts ...Option) (*Response, error) + + // Stream performs one request with incremental delivery. + Stream(ctx context.Context, req Request, opts ...Option) (Stream, error) + + // Capabilities reports what this model supports. For chains this is the + // head element's capabilities (the preferred target); per-attempt media + // normalization always uses the actual target's capabilities. + Capabilities() Capabilities +} + +// ModelOption configures a Model at construction time (Provider.Model). +type ModelOption func(*ModelConfig) + +// ModelConfig carries per-model construction settings shared by all +// providers. +type ModelConfig struct { + // Capabilities, when non-nil, overrides the provider's default + // capabilities for this model. + Capabilities *Capabilities +} + +// ApplyModelOptions folds options into a config. +func ApplyModelOptions(opts []ModelOption) ModelConfig { + var cfg ModelConfig + for _, opt := range opts { + opt(&cfg) + } + return cfg +} + +// WithCapabilities overrides the provider's default capabilities for one +// model (e.g. a vision-capable tag on an otherwise text-only provider). +func WithCapabilities(caps Capabilities) ModelOption { + return func(cfg *ModelConfig) { cfg.Capabilities = &caps } +} + +// Provider mints Models bound to one backend. Implementations translate the +// canonical Request/Response to and from their wire format and enforce their +// declared Capabilities. +type Provider interface { + // Name is the registry identifier used in "provider/model" specs. + Name() string + + // Model returns a Model bound to the given id. The id is whatever the + // backend accepts — majordomo passes it through verbatim and never + // validates it against a catalog. + Model(id string, opts ...ModelOption) (Model, error) +} diff --git a/llm/request.go b/llm/request.go new file mode 100644 index 0000000..fbbf6f6 --- /dev/null +++ b/llm/request.go @@ -0,0 +1,98 @@ +package llm + +import "encoding/json" + +// Request is the canonical generation request. Providers translate it to +// their wire format and enforce their declared Capabilities against it. +type Request struct { + // System is the system prompt. Providers map it to their native system + // mechanism (top-level system field, system message, SystemInstruction). + // Any RoleSystem messages in Messages are folded in after this field. + System string + + // Messages is the conversation so far, oldest first. + Messages []Message + + // Tools the model may call. + Tools []Tool + + // ToolChoice constrains tool use: "" or "auto" lets the model decide, + // "none" forbids tool calls, "required" forces some tool call, and any + // other value names the one tool the model must call. + ToolChoice string + + // Schema, when non-nil, is a JSON Schema object the response must + // conform to (structured output). Providers map it to their native + // mechanism. SchemaName names the schema for providers that require one. + Schema json.RawMessage + SchemaName string + + // Sampling and limit knobs. Pointer fields distinguish "unset" (provider + // default) from an explicit zero. + Temperature *float64 + TopP *float64 + + // MaxTokens caps the response length; 0 means provider default. + MaxTokens int + + // StopSequences halt generation when emitted. + StopSequences []string +} + +// Option mutates a Request before it is sent. Options passed to Generate or +// Stream are applied to a copy of the request, so a Request value can be +// safely reused across calls. +type Option func(*Request) + +// WithSystem sets the system prompt. +func WithSystem(s string) Option { return func(r *Request) { r.System = s } } + +// WithTools appends tools to the request. +func WithTools(tools ...Tool) Option { + return func(r *Request) { r.Tools = append(r.Tools, tools...) } +} + +// WithToolbox appends every tool in the toolbox to the request. +func WithToolbox(b *Toolbox) Option { + return func(r *Request) { r.Tools = append(r.Tools, b.Tools()...) } +} + +// WithToolChoice sets the tool-choice policy ("auto", "none", "required", +// or a specific tool name). +func WithToolChoice(choice string) Option { + return func(r *Request) { r.ToolChoice = choice } +} + +// WithSchema requests structured output conforming to the given JSON Schema. +// name is optional; providers that require a schema name fall back to +// "response" when it is empty. +func WithSchema(schema json.RawMessage, name string) Option { + return func(r *Request) { r.Schema = schema; r.SchemaName = name } +} + +// WithTemperature sets the sampling temperature. +func WithTemperature(t float64) Option { + return func(r *Request) { r.Temperature = &t } +} + +// WithTopP sets nucleus-sampling top-p. +func WithTopP(p float64) Option { + return func(r *Request) { r.TopP = &p } +} + +// WithMaxTokens caps the response length. +func WithMaxTokens(n int) Option { return func(r *Request) { r.MaxTokens = n } } + +// WithStopSequences sets stop sequences. +func WithStopSequences(stops ...string) Option { + return func(r *Request) { r.StopSequences = stops } +} + +// Apply returns a copy of the request with all options applied. Providers +// and wrappers call this once at the top of Generate/Stream. +func (r Request) Apply(opts ...Option) Request { + for _, opt := range opts { + opt(&r) + } + return r +} diff --git a/llm/response.go b/llm/response.go new file mode 100644 index 0000000..79a871d --- /dev/null +++ b/llm/response.go @@ -0,0 +1,73 @@ +package llm + +import "strings" + +// FinishReason explains why generation stopped. +type FinishReason string + +const ( + // FinishStop: the model completed its answer (or hit a stop sequence). + FinishStop FinishReason = "stop" + // FinishLength: the MaxTokens (or context) limit was hit. + FinishLength FinishReason = "length" + // FinishToolCalls: the model stopped to request tool invocations. + FinishToolCalls FinishReason = "tool_calls" + // FinishContentFilter: the provider suppressed content. + FinishContentFilter FinishReason = "content_filter" + // FinishOther: any provider-specific reason not mapped above. + FinishOther FinishReason = "other" +) + +// Usage reports token accounting for one request. +type Usage struct { + InputTokens int + OutputTokens int +} + +// Total returns input plus output tokens. +func (u Usage) Total() int { return u.InputTokens + u.OutputTokens } + +// Add accumulates another usage record (used by agents summing steps). +func (u *Usage) Add(o Usage) { + u.InputTokens += o.InputTokens + u.OutputTokens += o.OutputTokens +} + +// Response is the canonical generation result. +type Response struct { + // Parts is the response content (text, and for multimodal-output models, + // other media). + Parts []Part + + // ToolCalls are the tool invocations the model requested, if any. + ToolCalls []ToolCall + + FinishReason FinishReason + Usage Usage + + // Model identifies the resolved target that produced this response as + // "provider/model-id". With failover chains this names the element that + // actually served the request. + Model string + + // Raw is the provider-native response object, an escape hatch for + // provider-specific fields. May be nil; never required for normal use. + Raw any +} + +// Text returns the concatenation of all text parts in the response. +func (r *Response) Text() string { + var b strings.Builder + for _, p := range r.Parts { + if t, ok := p.(TextPart); ok { + b.WriteString(t.Text) + } + } + return b.String() +} + +// Message converts the response into an assistant message suitable for +// appending to a conversation history. +func (r *Response) Message() Message { + return Message{Role: RoleAssistant, Parts: r.Parts, ToolCalls: r.ToolCalls} +} diff --git a/llm/stream.go b/llm/stream.go new file mode 100644 index 0000000..4c0c979 --- /dev/null +++ b/llm/stream.go @@ -0,0 +1,28 @@ +package llm + +// StreamEvent is one increment of a streaming response. +// +// Exactly one field group is meaningful per event: a text delta, a completed +// tool call, or the final response. Tool-call arguments are buffered by the +// provider until complete — consumers never see partial JSON. +type StreamEvent struct { + // TextDelta is a fragment of assistant text. + TextDelta string + + // ToolCall, when non-nil, is a fully-assembled tool call. + ToolCall *ToolCall + + // Response, when non-nil, is the final accumulated response (content, + // tool calls, finish reason, usage). It is always the last event. + Response *Response +} + +// Stream delivers a response incrementally. +// +// Next returns io.EOF after the final event (the one carrying Response). +// Close releases the underlying connection and is safe to call at any time, +// including after io.EOF or concurrently with Next returning. +type Stream interface { + Next() (StreamEvent, error) + Close() error +} diff --git a/llm/tool.go b/llm/tool.go new file mode 100644 index 0000000..ecb1e4c --- /dev/null +++ b/llm/tool.go @@ -0,0 +1,165 @@ +package llm + +import ( + "context" + "encoding/json" + "fmt" +) + +// Tool is a callable capability exposed to a model: a name, a description, +// JSON-Schema parameters, and a Go handler. Providers map this one canonical +// shape onto their native function-calling formats. +type Tool struct { + Name string + Description string + + // Parameters is a JSON Schema object describing the tool's arguments. + // nil means the tool takes no arguments. + Parameters json.RawMessage + + // Handler executes the tool. args is the raw JSON arguments object the + // model supplied. The returned value is JSON-encoded into the ToolResult. + Handler func(ctx context.Context, args json.RawMessage) (any, error) +} + +// ToolCall is a model's request to invoke a tool. +type ToolCall struct { + // ID is the provider-assigned call id; majordomo synthesizes one for + // providers that do not supply ids. ToolResult.ID must echo it. + ID string + Name string + + // Arguments is the raw JSON arguments object. + Arguments json.RawMessage +} + +// ToolResult is the outcome of executing a ToolCall, sent back to the model. +type ToolResult struct { + // ID matches the originating ToolCall.ID. + ID string + Name string + + // Content is the result serialized as text (JSON for structured values). + Content string + + // IsError marks the result as a failure; the content then describes the + // error so the model can react (retry, apologize, try another tool). + IsError bool +} + +// Toolbox is a named, ordered set of tools. +// +// Why: agents compose their available tools from several sources (multiple +// toolboxes plus skills); a small named container with duplicate detection +// keeps that merge explicit and debuggable. +type Toolbox struct { + name string + order []string + tools map[string]Tool +} + +// NewToolbox creates a toolbox with the given name and initial tools. +// Duplicate tool names panic: toolboxes are assembled at startup, and a +// silently shadowed tool is a programming error worth failing loudly on. +func NewToolbox(name string, tools ...Tool) *Toolbox { + b := &Toolbox{name: name, tools: make(map[string]Tool, len(tools))} + for _, t := range tools { + if err := b.Add(t); err != nil { + panic(err) + } + } + return b +} + +// Name returns the toolbox name. +func (b *Toolbox) Name() string { return b.name } + +// Add registers a tool, rejecting empty or duplicate names. +func (b *Toolbox) Add(t Tool) error { + if t.Name == "" { + return fmt.Errorf("toolbox %q: tool with empty name", b.name) + } + if _, exists := b.tools[t.Name]; exists { + return fmt.Errorf("toolbox %q: duplicate tool %q", b.name, t.Name) + } + b.tools[t.Name] = t + b.order = append(b.order, t.Name) + return nil +} + +// Tools returns the tools in insertion order. +func (b *Toolbox) Tools() []Tool { + out := make([]Tool, 0, len(b.order)) + for _, name := range b.order { + out = append(out, b.tools[name]) + } + return out +} + +// Get returns the named tool. +func (b *Toolbox) Get(name string) (Tool, bool) { + t, ok := b.tools[name] + return t, ok +} + +// Execute runs the named tool for the given call and packages the outcome as +// a ToolResult. It never panics and never returns an error: handler errors +// and panics become IsError results so an agent loop can always continue. +func (b *Toolbox) Execute(ctx context.Context, call ToolCall) ToolResult { + t, ok := b.tools[call.Name] + if !ok { + return ToolResult{ + ID: call.ID, Name: call.Name, + Content: fmt.Sprintf("unknown tool %q", call.Name), + IsError: true, + } + } + return ExecuteTool(ctx, t, call) +} + +// ExecuteTool runs a single tool for the given call, recovering panics and +// converting errors into IsError results. +func ExecuteTool(ctx context.Context, t Tool, call ToolCall) (res ToolResult) { + res = ToolResult{ID: call.ID, Name: call.Name} + defer func() { + if r := recover(); r != nil { + res.Content = fmt.Sprintf("tool %q panicked: %v", call.Name, r) + res.IsError = true + } + }() + + if t.Handler == nil { + res.Content = fmt.Sprintf("tool %q has no handler", call.Name) + res.IsError = true + return res + } + + args := call.Arguments + if len(args) == 0 { + args = json.RawMessage("{}") + } + out, err := t.Handler(ctx, args) + if err != nil { + res.Content = err.Error() + res.IsError = true + return res + } + + switch v := out.(type) { + case nil: + res.Content = "null" + case string: + res.Content = v + case json.RawMessage: + res.Content = string(v) + default: + enc, err := json.Marshal(v) + if err != nil { + res.Content = fmt.Sprintf("tool %q returned unencodable value: %v", call.Name, err) + res.IsError = true + return res + } + res.Content = string(enc) + } + return res +} diff --git a/llm/tool_test.go b/llm/tool_test.go new file mode 100644 index 0000000..10e4a70 --- /dev/null +++ b/llm/tool_test.go @@ -0,0 +1,98 @@ +package llm + +import ( + "context" + "encoding/json" + "errors" + "strings" + "testing" +) + +func TestToolboxAddRejectsDuplicatesAndEmptyNames(t *testing.T) { + b := NewToolbox("box") + if err := b.Add(Tool{Name: "a"}); err != nil { + t.Fatalf("Add: %v", err) + } + if err := b.Add(Tool{Name: "a"}); err == nil { + t.Error("duplicate name should error") + } + if err := b.Add(Tool{}); err == nil { + t.Error("empty name should error") + } +} + +func TestToolboxOrderPreserved(t *testing.T) { + b := NewToolbox("box", Tool{Name: "z"}, Tool{Name: "a"}, Tool{Name: "m"}) + var names []string + for _, tool := range b.Tools() { + names = append(names, tool.Name) + } + if got, want := strings.Join(names, ","), "z,a,m"; got != want { + t.Errorf("order = %s, want %s", got, want) + } +} + +func TestExecuteUnknownTool(t *testing.T) { + b := NewToolbox("box") + res := b.Execute(context.Background(), ToolCall{ID: "1", Name: "missing"}) + if !res.IsError || !strings.Contains(res.Content, "missing") { + t.Errorf("result = %+v, want unknown-tool error", res) + } +} + +func TestExecuteHandlerOutcomes(t *testing.T) { + echo := func(v any, err error) Tool { + return Tool{Name: "t", Handler: func(context.Context, json.RawMessage) (any, error) { return v, err }} + } + + tests := []struct { + name string + tool Tool + wantContent string + wantErr bool + }{ + {"string passthrough", echo("plain", nil), "plain", false}, + {"struct json-encoded", echo(struct { + N int `json:"n"` + }{4}, nil), `{"n":4}`, false}, + {"raw message passthrough", echo(json.RawMessage(`{"k":1}`), nil), `{"k":1}`, false}, + {"nil becomes null", echo(nil, nil), "null", false}, + {"handler error", echo(nil, errors.New("boom")), "boom", true}, + {"unencodable value", echo(func() {}, nil), "unencodable", true}, + {"no handler", Tool{Name: "t"}, "no handler", true}, + } + for _, tt := range tests { + res := ExecuteTool(context.Background(), tt.tool, ToolCall{ID: "c1", Name: "t"}) + if res.IsError != tt.wantErr { + t.Errorf("%s: IsError = %v, want %v (%+v)", tt.name, res.IsError, tt.wantErr, res) + } + if !strings.Contains(res.Content, tt.wantContent) { + t.Errorf("%s: content = %q, want it to contain %q", tt.name, res.Content, tt.wantContent) + } + if res.ID != "c1" { + t.Errorf("%s: result ID = %q, want c1", tt.name, res.ID) + } + } +} + +func TestExecuteRecoversPanic(t *testing.T) { + tool := Tool{Name: "t", Handler: func(context.Context, json.RawMessage) (any, error) { + panic("kaboom") + }} + res := ExecuteTool(context.Background(), tool, ToolCall{ID: "1", Name: "t"}) + if !res.IsError || !strings.Contains(res.Content, "kaboom") { + t.Errorf("result = %+v, want recovered panic error", res) + } +} + +func TestExecuteEmptyArgsBecomeEmptyObject(t *testing.T) { + var got json.RawMessage + tool := Tool{Name: "t", Handler: func(_ context.Context, args json.RawMessage) (any, error) { + got = args + return "ok", nil + }} + ExecuteTool(context.Background(), tool, ToolCall{ID: "1", Name: "t"}) + if string(got) != "{}" { + t.Errorf("args = %q, want {}", got) + } +} diff --git a/majordomo.go b/majordomo.go new file mode 100644 index 0000000..a63a006 --- /dev/null +++ b/majordomo.go @@ -0,0 +1,139 @@ +// Package majordomo is a clean-slate substrate for building LLM-backed +// agents: target-agnostic model access, a parseable model naming / +// failover / tiering system with health tracking, multimodality, tool calls +// and structured output, and agents composed from a model + system prompt + +// toolboxes + skills. +// +// The one-call entry point is Parse: +// +// reg := majordomo.New() +// m, err := reg.Parse("ollama-cloud/minimax-m3:cloud,anthropic/opus-4.8,thinking") +// resp, err := m.Generate(ctx, majordomo.Request{ +// Messages: []majordomo.Message{majordomo.UserText("hello")}, +// }) +// +// A spec is a comma-separated failover chain. Each element is either a +// "provider/model" target (built-in, client-registered, or defined via an +// LLM_* env DSN) or a registered alias/tier, which expands inline. See +// Registry.Parse for the full grammar. +// +// The canonical types (Message, Request, Response, Tool, Capabilities, ...) +// are defined in the llm subpackage and re-exported here, so most consumers +// only ever import this package (plus agent and skill). +package majordomo + +import ( + "encoding/json" + "sync" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// Re-exported canonical types. See the llm package for documentation. +type ( + Model = llm.Model + Provider = llm.Provider + Message = llm.Message + Role = llm.Role + Part = llm.Part + TextPart = llm.TextPart + ImagePart = llm.ImagePart + Request = llm.Request + Response = llm.Response + Option = llm.Option + ModelOption = llm.ModelOption + ModelConfig = llm.ModelConfig + Tool = llm.Tool + ToolCall = llm.ToolCall + ToolResult = llm.ToolResult + Toolbox = llm.Toolbox + Capabilities = llm.Capabilities + Stream = llm.Stream + StreamEvent = llm.StreamEvent + Usage = llm.Usage + FinishReason = llm.FinishReason + APIError = llm.APIError + ErrorClass = llm.ErrorClass +) + +// Re-exported role and finish-reason constants. +const ( + RoleSystem = llm.RoleSystem + RoleUser = llm.RoleUser + RoleAssistant = llm.RoleAssistant + RoleTool = llm.RoleTool + + FinishStop = llm.FinishStop + FinishLength = llm.FinishLength + FinishToolCalls = llm.FinishToolCalls + FinishContentFilter = llm.FinishContentFilter + FinishOther = llm.FinishOther + + ClassTransient = llm.ClassTransient + ClassPermanent = llm.ClassPermanent +) + +// ErrModelNotFound re-exports llm.ErrModelNotFound. +var ErrModelNotFound = llm.ErrModelNotFound + +// Re-exported content and message constructors. +func Text(s string) Part { return llm.Text(s) } +func Image(mime string, data []byte) Part { return llm.Image(mime, data) } +func SystemText(s string) Message { return llm.SystemText(s) } +func UserText(s string) Message { return llm.UserText(s) } +func UserParts(parts ...Part) Message { return llm.UserParts(parts...) } +func AssistantText(s string) Message { return llm.AssistantText(s) } +func ToolResultsMessage(results ...ToolResult) Message { return llm.ToolResultsMessage(results...) } +func NewToolbox(name string, tools ...Tool) *Toolbox { return llm.NewToolbox(name, tools...) } + +// Re-exported request options. +func WithSystem(s string) Option { return llm.WithSystem(s) } +func WithTools(tools ...Tool) Option { return llm.WithTools(tools...) } +func WithToolbox(b *Toolbox) Option { return llm.WithToolbox(b) } +func WithToolChoice(choice string) Option { return llm.WithToolChoice(choice) } +func WithSchema(schema json.RawMessage, name string) Option { return llm.WithSchema(schema, name) } +func WithTemperature(t float64) Option { return llm.WithTemperature(t) } +func WithTopP(p float64) Option { return llm.WithTopP(p) } +func WithMaxTokens(n int) Option { return llm.WithMaxTokens(n) } +func WithStopSequences(stops ...string) Option { return llm.WithStopSequences(stops...) } + +// WithModelCapabilities re-exports llm.WithCapabilities for Provider.Model +// calls made through this package. +func WithModelCapabilities(caps Capabilities) ModelOption { return llm.WithCapabilities(caps) } + +// Classify re-exports llm.Classify. +func Classify(err error) ErrorClass { return llm.Classify(err) } + +// defaultRegistry backs the package-level convenience functions. +var defaultRegistry = func() func() *Registry { + var ( + once sync.Once + reg *Registry + ) + return func() *Registry { + once.Do(func() { reg = New() }) + return reg + } +}() + +// Default returns the lazily-initialized package-level Registry (built-in +// providers plus LLM_* env providers from the process environment). +func Default() *Registry { return defaultRegistry() } + +// Parse resolves a spec using the Default registry. +func Parse(spec string) (Model, error) { return Default().Parse(spec) } + +// MustParse is Parse that panics on error; for wiring code and examples. +func MustParse(spec string) Model { + m, err := Parse(spec) + if err != nil { + panic(err) + } + return m +} + +// RegisterProvider registers a provider in the Default registry. +func RegisterProvider(p Provider) { Default().RegisterProvider(p) } + +// RegisterAlias registers an alias/tier in the Default registry. +func RegisterAlias(name, spec string) { Default().RegisterAlias(name, spec) } diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..50467dc --- /dev/null +++ b/parse.go @@ -0,0 +1,122 @@ +package majordomo + +import ( + "errors" + "fmt" + "slices" + "strings" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// ErrAliasCycle reports a self-referential or looping alias expansion. +var ErrAliasCycle = errors.New("alias cycle") + +// ErrEmptySpec reports a spec with no usable elements. +var ErrEmptySpec = errors.New("empty model spec") + +// element is one resolved chain element: a provider name plus a verbatim +// model id. +type element struct { + provider string + model string +} + +func (e element) key() string { return e.provider + "/" + e.model } + +// Parse resolves a model spec to a Model. +// +// Grammar: +// +// spec := chain +// chain := element ("," element)* +// element := target | alias +// target := provider "/" model +// alias := bare token with no slash +// +// The provider of a target is the first path segment; everything after the +// first "/" (up to the next comma) is the model id and is passed to the +// provider verbatim — "ollama-cloud/minimax-m3:cloud" keeps its tag, and +// Google-style ids with extra slashes survive intact. Providers resolve +// through the registry: built-ins, RegisterProvider entries, LLM_* env +// definitions (eager or lazy), in that order. +// +// An alias expands to its registered spec inline, wherever it appears in a +// chain (head, middle, or tail), recursively, with cycle detection. +// +// A single element and a multi-element chain return the same Model +// interface; callers never branch on which they got. Multi-element chains +// try elements head-to-tail with health-tracked failover (see ChainConfig +// and the health package). +func (r *Registry) Parse(spec string) (llm.Model, error) { + elements, err := r.expand(spec, nil) + if err != nil { + return nil, err + } + if len(elements) == 0 { + return nil, fmt.Errorf("%w: %q", ErrEmptySpec, spec) + } + + targets := make([]chainTarget, 0, len(elements)) + seen := make(map[string]bool, len(elements)) + for _, el := range elements { + // A duplicate element (e.g. via overlapping alias expansions) would + // just retry the same backed-off target; keep the first occurrence. + if seen[el.key()] { + continue + } + seen[el.key()] = true + + p, err := r.providerFor(el.provider) + if err != nil { + return nil, fmt.Errorf("spec %q: %w", spec, err) + } + m, err := p.Model(el.model) + if err != nil { + return nil, fmt.Errorf("spec %q: provider %q: model %q: %w", spec, el.provider, el.model, err) + } + targets = append(targets, chainTarget{key: el.key(), model: m}) + } + + return &chain{targets: targets, tracker: r.tracker, cfg: r.chainCfg}, nil +} + +// expand splits a spec into elements, expanding aliases inline and +// recursively. visiting holds the alias names currently being expanded, for +// cycle detection. +func (r *Registry) expand(spec string, visiting []string) ([]element, error) { + var out []element + for raw := range strings.SplitSeq(spec, ",") { + raw = strings.TrimSpace(raw) + if raw == "" { + continue + } + + if provider, model, hasSlash := strings.Cut(raw, "/"); hasSlash { + out = append(out, element{provider: provider, model: model}) + continue + } + + // Bare token: must be a registered alias. + r.mu.RLock() + target, isAlias := r.aliases[raw] + _, isProvider := r.providers[raw] + r.mu.RUnlock() + + if !isAlias { + if isProvider { + return nil, fmt.Errorf("%q is a provider, not an alias — use %q", raw, raw+"/") + } + return nil, fmt.Errorf("%w: %q is not a registered alias and has no provider/ prefix", ErrUnknownProvider, raw) + } + if slices.Contains(visiting, raw) { + return nil, fmt.Errorf("%w: %s", ErrAliasCycle, strings.Join(append(visiting, raw), " -> ")) + } + sub, err := r.expand(target, append(visiting, raw)) + if err != nil { + return nil, fmt.Errorf("alias %q: %w", raw, err) + } + out = append(out, sub...) + } + return out, nil +} diff --git a/parse_test.go b/parse_test.go new file mode 100644 index 0000000..6b52847 --- /dev/null +++ b/parse_test.go @@ -0,0 +1,221 @@ +package majordomo + +import ( + "context" + "errors" + "slices" + "strings" + "testing" + + "gitea.stevedudenhoeffer.com/steve/majordomo/provider/fake" +) + +// newTestRegistry returns a registry isolated from the process environment. +func newTestRegistry(t *testing.T, opts ...RegistryOption) *Registry { + t.Helper() + opts = append([]RegistryOption{ + WithoutEnvProviders(), + WithEnvLookup(func(string) string { return "" }), + }, opts...) + return New(opts...) +} + +// targetsOf extracts the resolved chain keys from a parsed model. +func targetsOf(t *testing.T, m Model) []string { + t.Helper() + c, ok := m.(*chain) + if !ok { + t.Fatalf("Parse returned %T, want *chain", m) + } + return c.Targets() +} + +func TestParseSingleTarget(t *testing.T) { + r := newTestRegistry(t) + r.RegisterProvider(fake.New("fp")) + + m, err := r.Parse("fp/some-model:7b") + if err != nil { + t.Fatalf("Parse: %v", err) + } + want := []string{"fp/some-model:7b"} + if got := targetsOf(t, m); !slices.Equal(got, want) { + t.Errorf("targets = %v, want %v", got, want) + } + + resp, err := m.Generate(context.Background(), Request{Messages: []Message{UserText("hi")}}) + if err != nil { + t.Fatalf("Generate: %v", err) + } + if resp.Text() == "" { + t.Error("empty response text") + } + if resp.Model != "fp/some-model:7b" { + t.Errorf("resp.Model = %q, want fp/some-model:7b", resp.Model) + } +} + +func TestParseModelIDIsVerbatim(t *testing.T) { + r := newTestRegistry(t) + r.RegisterProvider(fake.New("google")) + r.RegisterProvider(fake.New("ollama-cloud")) + + // Everything after the first slash, up to the next comma, is the model + // id: colons and additional slashes pass through untouched. + for spec, want := range map[string]string{ + "ollama-cloud/minimax-m3:cloud": "ollama-cloud/minimax-m3:cloud", + "google/models/gemini-3.0-pro": "google/models/gemini-3.0-pro", + "ollama-cloud/qwen3-coder:480b-cloud": "ollama-cloud/qwen3-coder:480b-cloud", + } { + m, err := r.Parse(spec) + if err != nil { + t.Fatalf("Parse(%q): %v", spec, err) + } + if got := targetsOf(t, m); !slices.Equal(got, []string{want}) { + t.Errorf("Parse(%q) targets = %v, want [%s]", spec, got, want) + } + } +} + +// TestParseTrailingAliasChain covers the README's flagship example: a chain +// whose tail is a registered alias, expanded inline. +func TestParseTrailingAliasChain(t *testing.T) { + r := newTestRegistry(t) + r.RegisterProvider(fake.New("ollama-cloud")) + r.RegisterProvider(fake.New("anthropic")) + r.RegisterProvider(fake.New("openai")) + r.RegisterAlias("thinking", "openai/gpt-5.5,anthropic/opus-4.8") + + m, err := r.Parse("ollama-cloud/minimax-m3:cloud,ollama-cloud/kimi-k2.6:cloud,anthropic/opus-4.8,thinking") + if err != nil { + t.Fatalf("Parse: %v", err) + } + // "thinking" expands inline at the tail; its anthropic/opus-4.8 element + // is a duplicate of the explicit one and is kept once (first wins). + want := []string{ + "ollama-cloud/minimax-m3:cloud", + "ollama-cloud/kimi-k2.6:cloud", + "anthropic/opus-4.8", + "openai/gpt-5.5", + } + if got := targetsOf(t, m); !slices.Equal(got, want) { + t.Errorf("targets = %v, want %v", got, want) + } +} + +func TestParseAliasPositions(t *testing.T) { + r := newTestRegistry(t) + r.RegisterProvider(fake.New("fp")) + r.RegisterAlias("mid", "fp/m1,fp/m2") + + m, err := r.Parse("fp/head,mid,fp/tail") + if err != nil { + t.Fatalf("Parse: %v", err) + } + want := []string{"fp/head", "fp/m1", "fp/m2", "fp/tail"} + if got := targetsOf(t, m); !slices.Equal(got, want) { + t.Errorf("targets = %v, want %v", got, want) + } +} + +func TestParseNestedAlias(t *testing.T) { + r := newTestRegistry(t) + r.RegisterProvider(fake.New("fp")) + r.RegisterAlias("inner", "fp/deep") + r.RegisterAlias("outer", "inner,fp/shallow") + + m, err := r.Parse("outer") + if err != nil { + t.Fatalf("Parse: %v", err) + } + want := []string{"fp/deep", "fp/shallow"} + if got := targetsOf(t, m); !slices.Equal(got, want) { + t.Errorf("targets = %v, want %v", got, want) + } +} + +func TestParseAliasCycle(t *testing.T) { + r := newTestRegistry(t) + r.RegisterAlias("a", "b") + r.RegisterAlias("b", "a") + if _, err := r.Parse("a"); !errors.Is(err, ErrAliasCycle) { + t.Errorf("Parse(a) error = %v, want ErrAliasCycle", err) + } + + r.RegisterAlias("self", "self") + if _, err := r.Parse("self"); !errors.Is(err, ErrAliasCycle) { + t.Errorf("Parse(self) error = %v, want ErrAliasCycle", err) + } +} + +func TestParseUnknownAlias(t *testing.T) { + r := newTestRegistry(t) + if _, err := r.Parse("nonesuch"); !errors.Is(err, ErrUnknownProvider) { + t.Errorf("error = %v, want ErrUnknownProvider", err) + } +} + +func TestParseBareProviderName(t *testing.T) { + r := newTestRegistry(t) + _, err := r.Parse("openai") + if err == nil || !strings.Contains(err.Error(), "openai/") { + t.Errorf("error = %v, want hint about openai/", err) + } +} + +func TestParseUnknownProviderMentionsEnvVar(t *testing.T) { + r := newTestRegistry(t) + _, err := r.Parse("nope/some-model") + if !errors.Is(err, ErrUnknownProvider) { + t.Fatalf("error = %v, want ErrUnknownProvider", err) + } + if !strings.Contains(err.Error(), "LLM_NOPE") { + t.Errorf("error %q should mention the LLM_NOPE env var", err) + } +} + +func TestParseEmptySpecs(t *testing.T) { + r := newTestRegistry(t) + for _, spec := range []string{"", " ", ",", " , ,"} { + if _, err := r.Parse(spec); !errors.Is(err, ErrEmptySpec) { + t.Errorf("Parse(%q) error = %v, want ErrEmptySpec", spec, err) + } + } +} + +func TestParseTrimsWhitespace(t *testing.T) { + r := newTestRegistry(t) + r.RegisterProvider(fake.New("fp")) + m, err := r.Parse(" fp/a , fp/b ") + if err != nil { + t.Fatalf("Parse: %v", err) + } + want := []string{"fp/a", "fp/b"} + if got := targetsOf(t, m); !slices.Equal(got, want) { + t.Errorf("targets = %v, want %v", got, want) + } +} + +func TestParseDeduplicatesElements(t *testing.T) { + r := newTestRegistry(t) + r.RegisterProvider(fake.New("fp")) + m, err := r.Parse("fp/a,fp/b,fp/a") + if err != nil { + t.Fatalf("Parse: %v", err) + } + want := []string{"fp/a", "fp/b"} + if got := targetsOf(t, m); !slices.Equal(got, want) { + t.Errorf("targets = %v, want %v", got, want) + } +} + +func TestBuiltinsResolve(t *testing.T) { + r := newTestRegistry(t) + // All built-in provider names resolve even before their client + // implementations land (stub providers error only on use). + for _, name := range []string{"openai", "anthropic", "google", "ollama", "ollama-cloud", "foreman"} { + if _, err := r.Parse(name + "/anything"); err != nil { + t.Errorf("Parse(%s/anything): %v", name, err) + } + } +} diff --git a/progress.md b/progress.md new file mode 100644 index 0000000..293f146 --- /dev/null +++ b/progress.md @@ -0,0 +1,36 @@ +# progress + +## 2026-06-10 — Phase 1: foundations, ADRs, skeleton, docs + +**Landed:** +- Module scaffold (Go 1.26), `.gitea/workflows/ci.yaml` (foreman-style + gates: build, vet, race tests, tidy-diff), `.env.example`. +- `llm/` canonical contract: Message/Part (sealed; text+image), + Request/Options, Response/Usage/FinishReason, Stream/StreamEvent, + Tool/Toolbox (panic-safe Execute), Capabilities (zero-value semantics), + Model/Provider interfaces, APIError + transient/permanent Classify. +- `health/`: clock-injected tracker — consecutive-failure threshold, + exponential capped cooldown, reset-on-success, thread-safe; full + deterministic test suite (fake clock). +- Root: Registry (providers/aliases/schemes/health), Parse with the binding + grammar (verbatim model ids, inline recursive alias expansion, cycle + detection, dedup), LLM_* env-DSN loading (go-llm-parity lazy fallback + + eager LoadEnv/New scan), chain executor implementing Model + (retry-on-transient, bench-on-repeat, skip-benched, 404-advance, + fail-fast-on-auth, joined exhaustion errors). Built-ins register as + resolvable stubs until their phases land. +- `provider/fake/`: scriptable provider (per-model outcome queues, request + recording, capabilities overrides, streaming) — the hermetic test rig. +- ADRs 0001–0008 + index; CLAUDE.md; honest README with pending-marked + matrix. +- Tests cover the two required cases: the trailing-`thinking` chain parse + and `LLM_M1=foreman://token@host` loading (plus DSN table, lazy fallback, + cycle detection, chain failover/backoff/exhaustion, toolbox execution, + error classification). + +**Notes:** chain executor landed in Phase 1 (design was settled); +Phase 2 deepens its test matrix (cooldown re-admission via fake clock, +alias-in-chain failover, permanent-policy override) and wires anything the +tests flush out. + +**Next:** Phase 2 — exhaustive health/chain test matrix. diff --git a/provider/fake/fake.go b/provider/fake/fake.go new file mode 100644 index 0000000..ea1c57c --- /dev/null +++ b/provider/fake/fake.go @@ -0,0 +1,230 @@ +// Package fake provides an in-memory llm.Provider for hermetic tests. +// +// Why: the resolver, env-DSN loader, chain executor, health tracker, agent +// loop, and skill composition must all be testable with no live API calls. +// The fake provider scripts responses and errors per model id, records every +// request it receives, and supports tools, structured output, and streaming +// well enough to drive those layers deterministically. +package fake + +import ( + "context" + "fmt" + "io" + "sync" + + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// Step is one scripted outcome: either a response or an error. +type Step struct { + Response *llm.Response + Err error +} + +// Reply scripts a successful text response. +func Reply(text string) Step { + return Step{Response: &llm.Response{ + Parts: []llm.Part{llm.Text(text)}, + FinishReason: llm.FinishStop, + Usage: llm.Usage{InputTokens: 1, OutputTokens: 1}, + }} +} + +// ReplyWith scripts an arbitrary successful response. +func ReplyWith(resp llm.Response) Step { return Step{Response: &resp} } + +// Fail scripts an error outcome. +func Fail(err error) Step { return Step{Err: err} } + +// Call records one request received by the fake, with the model id it was +// addressed to. +type Call struct { + ModelID string + Request llm.Request +} + +// Provider is a scriptable in-memory llm.Provider. +// +// Outcomes are enqueued per model id with Enqueue. A model whose queue is +// empty falls back to the provider default response (a fixed text reply). +// All methods are safe for concurrent use. +type Provider struct { + name string + + mu sync.Mutex + caps llm.Capabilities + modelCaps map[string]llm.Capabilities + queues map[string][]Step + calls []Call + defaultFn func(modelID string, req llm.Request) Step +} + +// Option configures the fake provider. +type Option func(*Provider) + +// WithCapabilities sets the provider-default capabilities. +func WithCapabilities(caps llm.Capabilities) Option { + return func(p *Provider) { p.caps = caps } +} + +// WithModelCapabilities overrides capabilities for one model id. +func WithModelCapabilities(modelID string, caps llm.Capabilities) Option { + return func(p *Provider) { p.modelCaps[modelID] = caps } +} + +// WithDefault sets the outcome used when a model's queue is empty. +func WithDefault(fn func(modelID string, req llm.Request) Step) Option { + return func(p *Provider) { p.defaultFn = fn } +} + +// New creates a fake provider with the given registry name. +func New(name string, opts ...Option) *Provider { + p := &Provider{ + name: name, + modelCaps: make(map[string]llm.Capabilities), + queues: make(map[string][]Step), + caps: llm.Capabilities{ + SupportsTools: true, + SupportsStructured: true, + SupportsStreaming: true, + MaxImagesPerReq: 4, + }, + defaultFn: func(modelID string, _ llm.Request) Step { + return Reply(fmt.Sprintf("fake response from %s", modelID)) + }, + } + for _, opt := range opts { + opt(p) + } + return p +} + +// Name implements llm.Provider. +func (p *Provider) Name() string { return p.name } + +// Model implements llm.Provider. Any id is accepted. +func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) { + cfg := llm.ApplyModelOptions(opts) + return &model{provider: p, id: id, cfg: cfg}, nil +} + +// Enqueue appends scripted outcomes for a model id. +func (p *Provider) Enqueue(modelID string, steps ...Step) { + p.mu.Lock() + defer p.mu.Unlock() + p.queues[modelID] = append(p.queues[modelID], steps...) +} + +// Calls returns a copy of every request received so far. +func (p *Provider) Calls() []Call { + p.mu.Lock() + defer p.mu.Unlock() + out := make([]Call, len(p.calls)) + copy(out, p.calls) + return out +} + +// CallCount returns the number of requests received for one model id. +func (p *Provider) CallCount(modelID string) int { + p.mu.Lock() + defer p.mu.Unlock() + n := 0 + for _, c := range p.calls { + if c.ModelID == modelID { + n++ + } + } + return n +} + +// next records the call and pops the next scripted outcome. +func (p *Provider) next(modelID string, req llm.Request) Step { + p.mu.Lock() + defer p.mu.Unlock() + p.calls = append(p.calls, Call{ModelID: modelID, Request: req}) + q := p.queues[modelID] + if len(q) == 0 { + return p.defaultFn(modelID, req) + } + step := q[0] + p.queues[modelID] = q[1:] + return step +} + +func (p *Provider) capsFor(modelID string, cfg llm.ModelConfig) llm.Capabilities { + if cfg.Capabilities != nil { + return *cfg.Capabilities + } + p.mu.Lock() + defer p.mu.Unlock() + if caps, ok := p.modelCaps[modelID]; ok { + return caps + } + return p.caps +} + +type model struct { + provider *Provider + id string + cfg llm.ModelConfig +} + +func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + req = req.Apply(opts...) + step := m.provider.next(m.id, req) + if step.Err != nil { + return nil, step.Err + } + resp := *step.Response + if resp.Model == "" { + resp.Model = m.provider.name + "/" + m.id + } + return &resp, nil +} + +func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) { + resp, err := m.Generate(ctx, req, opts...) + if err != nil { + return nil, err + } + // Deliver the response as a small sequence of events: one text delta per + // part, one event per tool call, then the final response. + var events []llm.StreamEvent + for _, part := range resp.Parts { + if t, ok := part.(llm.TextPart); ok { + events = append(events, llm.StreamEvent{TextDelta: t.Text}) + } + } + for i := range resp.ToolCalls { + events = append(events, llm.StreamEvent{ToolCall: &resp.ToolCalls[i]}) + } + events = append(events, llm.StreamEvent{Response: resp}) + return &stream{events: events}, nil +} + +func (m *model) Capabilities() llm.Capabilities { + return m.provider.capsFor(m.id, m.cfg) +} + +type stream struct { + mu sync.Mutex + events []llm.StreamEvent + pos int +} + +func (s *stream) Next() (llm.StreamEvent, error) { + s.mu.Lock() + defer s.mu.Unlock() + if s.pos >= len(s.events) { + return llm.StreamEvent{}, io.EOF + } + ev := s.events[s.pos] + s.pos++ + return ev, nil +} + +func (s *stream) Close() error { return nil } diff --git a/registry.go b/registry.go new file mode 100644 index 0000000..430982f --- /dev/null +++ b/registry.go @@ -0,0 +1,227 @@ +package majordomo + +import ( + "fmt" + "os" + "strings" + "sync" + "time" + + "gitea.stevedudenhoeffer.com/steve/majordomo/health" + "gitea.stevedudenhoeffer.com/steve/majordomo/llm" +) + +// Registry owns providers, aliases/tiers, env-DSN scheme factories, the +// model health tracker, and Parse. It is safe for concurrent use. +type Registry struct { + mu sync.RWMutex + providers map[string]llm.Provider + aliases map[string]string + schemes map[string]SchemeFactory + // envErrs records LLM_* entries that failed to load so the failure + // surfaces when (and only when) that provider name is actually used. + envErrs map[string]error + + tracker *health.Tracker + chainCfg ChainConfig + envLookup func(string) string +} + +// SchemeFactory builds a provider instance from an env DSN. name is the +// registry name the provider will be registered under (e.g. "m1" for +// LLM_M1); dsn carries the scheme, credential, and host. +type SchemeFactory func(name string, dsn DSN) (llm.Provider, error) + +// ChainConfig tunes failover-chain execution. +type ChainConfig struct { + // TransientRetries is the number of immediate same-target retries after + // a transient error. 0 selects the default (1); negative disables + // retries. + TransientRetries int + + // AdvanceOnPermanent, when true, makes the chain advance to the next + // element on permanent errors other than model-not-found instead of + // returning immediately. Model-not-found always advances (without + // penalizing health); auth/malformed errors default to fail-fast + // because failing over cannot help a bad request. + AdvanceOnPermanent bool + + // Classify overrides the default error classifier (llm.Classify). + Classify func(error) llm.ErrorClass +} + +// DefaultTransientRetries is the default number of same-target retries +// after a single transient error. +const DefaultTransientRetries = 1 + +func (c ChainConfig) retries() int { + switch { + case c.TransientRetries < 0: + return 0 + case c.TransientRetries == 0: + return DefaultTransientRetries + default: + return c.TransientRetries + } +} + +func (c ChainConfig) classify(err error) llm.ErrorClass { + if c.Classify != nil { + return c.Classify(err) + } + return llm.Classify(err) +} + +type registryConfig struct { + health health.Config + chain ChainConfig + envLookup func(string) string + environ func() []string + skipEnv bool +} + +// RegistryOption configures New. +type RegistryOption func(*registryConfig) + +// WithHealthConfig overrides the health tracker configuration +// (thresholds, cooldowns, clock). +func WithHealthConfig(cfg health.Config) RegistryOption { + return func(rc *registryConfig) { rc.health = cfg } +} + +// WithChainConfig overrides failover-chain behavior (retry count, +// permanent-error policy, classifier). +func WithChainConfig(cfg ChainConfig) RegistryOption { + return func(rc *registryConfig) { rc.chain = cfg } +} + +// WithClock injects a clock for the health tracker; tests use a fake clock +// to step through backoff windows deterministically. +func WithClock(clock func() time.Time) RegistryOption { + return func(rc *registryConfig) { rc.health.Clock = clock } +} + +// WithEnvLookup injects the env-var lookup used for lazy LLM_* resolution +// during Parse (defaults to os.Getenv). Tests use this to avoid touching +// the process environment. +func WithEnvLookup(lookup func(string) string) RegistryOption { + return func(rc *registryConfig) { rc.envLookup = lookup } +} + +// WithoutEnvProviders disables the eager LLM_* scan at construction time. +// Lazy per-name resolution during Parse still works (use WithEnvLookup to +// control it in tests). +func WithoutEnvProviders() RegistryOption { + return func(rc *registryConfig) { rc.skipEnv = true } +} + +// New creates a Registry with all built-in providers and scheme factories +// registered, then loads LLM_* env-DSN providers from the process +// environment (unless WithoutEnvProviders is given). Malformed LLM_* entries +// do not fail construction; the error surfaces if that provider name is +// referenced in Parse. +func New(opts ...RegistryOption) *Registry { + cfg := registryConfig{ + envLookup: os.Getenv, + environ: os.Environ, + } + for _, opt := range opts { + opt(&cfg) + } + + r := &Registry{ + providers: make(map[string]llm.Provider), + aliases: make(map[string]string), + schemes: make(map[string]SchemeFactory), + envErrs: make(map[string]error), + tracker: health.NewTracker(cfg.health), + chainCfg: cfg.chain, + envLookup: cfg.envLookup, + } + + registerBuiltins(r) + + if !cfg.skipEnv { + env := make(map[string]string) + for _, kv := range cfg.environ() { + if k, v, ok := strings.Cut(kv, "="); ok { + env[k] = v + } + } + // Errors are recorded per-name and surfaced on use; see envErrs. + _ = r.LoadEnv(env) + } + return r +} + +// RegisterProvider adds or replaces a provider under its Name(). +func (r *Registry) RegisterProvider(p llm.Provider) { + r.mu.Lock() + defer r.mu.Unlock() + r.providers[p.Name()] = p +} + +// RegisterAlias maps a bare name (no slash) to a spec. The spec may be a +// single target, another alias, or a comma-separated chain; Parse expands +// aliases inline and recursively, with cycle detection. +func (r *Registry) RegisterAlias(name, spec string) { + r.mu.Lock() + defer r.mu.Unlock() + r.aliases[name] = spec +} + +// RegisterScheme adds or replaces an env-DSN scheme factory, letting +// consumers wire custom provider kinds into LLM_* definitions. +func (r *Registry) RegisterScheme(scheme string, factory SchemeFactory) { + r.mu.Lock() + defer r.mu.Unlock() + r.schemes[scheme] = factory +} + +// Provider returns the registered provider with the given name, if any. +func (r *Registry) Provider(name string) (llm.Provider, bool) { + r.mu.RLock() + defer r.mu.RUnlock() + p, ok := r.providers[name] + return p, ok +} + +// Health exposes the registry's health tracker (read-mostly; useful for +// diagnostics and tests). +func (r *Registry) Health() *health.Tracker { return r.tracker } + +// providerFor resolves a provider name: registered providers first, then a +// recorded env-load error for that name, then lazy LLM_* env resolution +// (go-llm parity: "m5" → env LLM_M5, "my-prov" → LLM_MY_PROV). Providers +// resolved lazily are cached in the registry. +func (r *Registry) providerFor(name string) (llm.Provider, error) { + r.mu.RLock() + p, ok := r.providers[name] + envErr := r.envErrs[name] + r.mu.RUnlock() + if ok { + return p, nil + } + if envErr != nil { + return nil, envErr + } + + envKey := "LLM_" + strings.ToUpper(strings.ReplaceAll(name, "-", "_")) + envVal := r.envLookup(envKey) + if envVal == "" { + return nil, fmt.Errorf("%w: %q (checked registry and %s env var)", ErrUnknownProvider, name, envKey) + } + p, err := r.providerFromDSN(name, envVal) + if err != nil { + return nil, fmt.Errorf("parse %s: %w", envKey, err) + } + + r.mu.Lock() + defer r.mu.Unlock() + // Another goroutine may have raced us here; keep the first registration. + if existing, ok := r.providers[name]; ok { + return existing, nil + } + r.providers[name] = p + return p, nil +}