feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
@@ -7,6 +7,7 @@ import (

 	"gitea.stevedudenhoeffer.com/steve/majordomo/health"
 	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+	"gitea.stevedudenhoeffer.com/steve/majordomo/media"
 )

 // ErrChainExhausted reports that every element of a failover chain failed
@@ -65,8 +66,8 @@ func (c *chain) Capabilities() llm.Capabilities {
 // Generate tries each target per the chain semantics above.
 func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
 	req = req.Apply(opts...)
-	return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (*llm.Response, error) {
-		return t.model.Generate(ctx, req)
+	return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (*llm.Response, error) {
+		return t.model.Generate(ctx, nreq)
 	})
 }

@@ -76,14 +77,17 @@ func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Optio
 // (replaying half-delivered output would duplicate content).
 func (c *chain) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
 	req = req.Apply(opts...)
-	return chainDo(ctx, c, func(ctx context.Context, t chainTarget) (llm.Stream, error) {
-		return t.model.Stream(ctx, req)
+	return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (llm.Stream, error) {
+		return t.model.Stream(ctx, nreq)
 	})
 }

 // chainDo runs the head-to-tail failover algorithm around an attempt
-// function, generic over the result type (response vs stream).
-func chainDo[T any](ctx context.Context, c *chain, attempt func(context.Context, chainTarget) (T, error)) (T, error) {
+// function, generic over the result type (response vs stream). Before each
+// target is tried, the request's media is normalized against THAT target's
+// capabilities (ADR-0008/0009) — a request that cannot be made to fit one
+// target advances to the next without a health penalty.
+func chainDo[T any](ctx context.Context, c *chain, req llm.Request, attempt func(context.Context, chainTarget, llm.Request) (T, error)) (T, error) {
 	var zero T
 	var failures []error

@@ -94,12 +98,20 @@ func chainDo[T any](ctx context.Context, c *chain, attempt func(context.Context,
 			continue
 		}

+		nreq, err := media.Normalize(req, t.model.Capabilities())
+		if err != nil {
+			// Always ErrUnsupported-wrapped: this target cannot take the
+			// request by declaration. Advance, no health penalty.
+			failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
+			continue
+		}
+
 		retries := c.cfg.retries()
 		for attemptN := 0; ; attemptN++ {
 			if err := ctx.Err(); err != nil {
 				return zero, err
 			}
-			result, err := attempt(ctx, t)
+			result, err := attempt(ctx, t, nreq)
 			if err == nil {
 				c.tracker.ReportSuccess(t.key)
 				return result, nil