feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3:
- provider/openai: Chat Completions for OpenAI + compat endpoints (SSE
  streaming with by-index tool-call assembly, response_format json_schema,
  legacy max_tokens option, reasoning_effort)
- provider/anthropic: Messages API (tool_use/tool_result, GA structured
  output via output_config.format, full SSE event parser, 529 transient)
- provider/ollama: one native /api/chat client behind the ollama,
  ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant
  of foreman's buffered single-object responses; object tool arguments;
  format-schema structured output; think mapping)
- media/: capability normalization (sniff, downscale, transcode, byte
  ladder, ErrUnsupported), wired into the chain executor per target with
  penalty-free advance past incapable elements
- registry: real provider + scheme wiring, WithHTTPClient option, required
  env-foreman TLS chat round-trip test
- ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README
  matrix + CLAUDE.md synced

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
+19 -7
View File
@@ -7,6 +7,7 @@ import (
"gitea.stevedudenhoeffer.com/steve/majordomo/health"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
"gitea.stevedudenhoeffer.com/steve/majordomo/media"
)
// ErrChainExhausted reports that every element of a failover chain failed
@@ -65,8 +66,8 @@ func (c *chain) Capabilities() llm.Capabilities {
// Generate tries each target per the chain semantics above.
func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
req = req.Apply(opts...)
return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (*llm.Response, error) {
return t.model.Generate(ctx, req)
return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (*llm.Response, error) {
return t.model.Generate(ctx, nreq)
})
}
@@ -76,14 +77,17 @@ func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Optio
// (replaying half-delivered output would duplicate content).
func (c *chain) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
req = req.Apply(opts...)
return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (llm.Stream, error) {
return t.model.Stream(ctx, req)
return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (llm.Stream, error) {
return t.model.Stream(ctx, nreq)
})
}
// chainDo runs the head-to-tail failover algorithm around an attempt
// function, generic over the result type (response vs stream).
func chainDo[T any](ctx context.Context, c *chain, attempt func(context.Context, chainTarget) (T, error)) (T, error) {
// function, generic over the result type (response vs stream). Before each
// target is tried, the request's media is normalized against THAT target's
// capabilities (ADR-0008/0009) — a request that cannot be made to fit one
// target advances to the next without a health penalty.
func chainDo[T any](ctx context.Context, c *chain, req llm.Request, attempt func(context.Context, chainTarget, llm.Request) (T, error)) (T, error) {
var zero T
var failures []error
@@ -94,12 +98,20 @@ func chainDo[T any](ctx context.Context, c *chain, attempt func(context.Context,
continue
}
nreq, err := media.Normalize(req, t.model.Capabilities())
if err != nil {
// Always ErrUnsupported-wrapped: this target cannot take the
// request by declaration. Advance, no health penalty.
failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
continue
}
retries := c.cfg.retries()
for attemptN := 0; ; attemptN++ {
if err := ctx.Err(); err != nil {
return zero, err
}
result, err := attempt(ctx, t)
result, err := attempt(ctx, t, nreq)
if err == nil {
c.tracker.ReportSuccess(t.key)
return result, nil