feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
@@ -0,0 +1,319 @@
+// Package anthropic implements llm.Provider for the Anthropic Messages API
+// and Anthropic-compatible endpoints.
+//
+// API surface targeted: POST {base}/v1/messages with headers x-api-key,
+// anthropic-version: 2023-06-01, and content-type: application/json, per the
+// platform.claude.com Messages API reference as of June 2026. Streaming uses
+// the documented SSE event sequence (message_start, content_block_start,
+// content_block_delta, content_block_stop, message_delta, message_stop).
+// Structured output uses the GA output_config.format mechanism with
+// {"type":"json_schema"}; the result arrives as JSON text in the first text
+// content block.
+//
+// Why a hand-rolled client (no SDK): ADR-0007 — majordomo is stdlib-first,
+// and the canonical llm contract needs only a narrow slice of the API.
+package anthropic
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+const (
+	defaultName    = "anthropic"
+	defaultBaseURL = "https://api.anthropic.com"
+
+	// apiVersion is the anthropic-version header value. 2023-06-01 remains
+	// the current (and only) stable version string as of June 2026.
+	apiVersion = "2023-06-01"
+
+	// defaultMaxTokens is used when Request.MaxTokens is 0, because the
+	// Messages API requires max_tokens on every request.
+	defaultMaxTokens = 4096
+)
+
+// defaultCapabilities reflects the documented first-party API image limits:
+// 100 images per request (200K-context models), 10 MB per image, 8000 px per
+// side, and the four supported media types.
+func defaultCapabilities() llm.Capabilities {
+	return llm.Capabilities{
+		SupportsTools:      true,
+		SupportsStructured: true,
+		SupportsStreaming:  true,
+		MaxImagesPerReq:    100,
+		MaxImageBytes:      10 << 20,
+		MaxImageDimension:  8000,
+		AllowedImageMIME: []string{
+			"image/jpeg", "image/png", "image/gif", "image/webp",
+		},
+	}
+}
+
+// Provider is an llm.Provider backed by the Anthropic Messages API.
+type Provider struct {
+	name      string
+	apiKey    string
+	baseURL   string
+	client    *http.Client
+	caps      llm.Capabilities
+	maxTokens int
+}
+
+// Option configures the provider at construction.
+type Option func(*Provider)
+
+// WithAPIKey sets the API key explicitly, bypassing the ANTHROPIC_API_KEY
+// environment default.
+func WithAPIKey(key string) Option {
+	return func(p *Provider) { p.apiKey = key }
+}
+
+// WithBaseURL points the provider at an Anthropic-compatible endpoint. A
+// trailing slash is trimmed; "/v1/messages" is appended per request.
+func WithBaseURL(u string) Option {
+	return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
+}
+
+// WithHTTPClient replaces the HTTP client (timeouts, proxies, test doubles).
+func WithHTTPClient(c *http.Client) Option {
+	return func(p *Provider) { p.client = c }
+}
+
+// WithName overrides the registry name. Why: an Anthropic-compatible
+// endpoint registered under its own name must surface that name in
+// Response.Model and errors, not "anthropic".
+func WithName(name string) Option {
+	return func(p *Provider) { p.name = name }
+}
+
+// WithDefaultCapabilities replaces the provider-default capabilities.
+func WithDefaultCapabilities(caps llm.Capabilities) Option {
+	return func(p *Provider) { p.caps = caps }
+}
+
+// WithDefaultMaxTokens overrides the max_tokens value used when
+// Request.MaxTokens is 0. Why: the Messages API rejects requests without
+// max_tokens, so the provider must always send something.
+func WithDefaultMaxTokens(n int) Option {
+	return func(p *Provider) { p.maxTokens = n }
+}
+
+// New creates an Anthropic provider. It never fails: a missing API key
+// (no WithAPIKey and no ANTHROPIC_API_KEY in the environment) surfaces as a
+// 401-style *llm.APIError at request time, not at construction.
+func New(opts ...Option) *Provider {
+	p := &Provider{
+		name:      defaultName,
+		baseURL:   defaultBaseURL,
+		client:    http.DefaultClient,
+		caps:      defaultCapabilities(),
+		maxTokens: defaultMaxTokens,
+	}
+	for _, opt := range opts {
+		opt(p)
+	}
+	if p.apiKey == "" {
+		p.apiKey = os.Getenv("ANTHROPIC_API_KEY")
+	}
+	return p
+}
+
+// Name implements llm.Provider.
+func (p *Provider) Name() string { return p.name }
+
+// Model implements llm.Provider. The id is passed through verbatim — it is
+// never validated against a catalog.
+func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
+	cfg := llm.ApplyModelOptions(opts)
+	caps := p.caps
+	if cfg.Capabilities != nil {
+		caps = *cfg.Capabilities
+	}
+	return &model{provider: p, id: id, caps: caps}, nil
+}
+
+type model struct {
+	provider *Provider
+	id       string
+	caps     llm.Capabilities
+}
+
+// Capabilities implements llm.Model.
+func (m *model) Capabilities() llm.Capabilities { return m.caps }
+
+// fullName is the "provider/model" identifier used in Response.Model.
+func (m *model) fullName() string { return m.provider.name + "/" + m.id }
+
+// Generate implements llm.Model.
+func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
+	req = req.Apply(opts...)
+	if err := m.enforceCapabilities(req); err != nil {
+		return nil, err
+	}
+	httpResp, err := m.do(ctx, req, false)
+	if err != nil {
+		return nil, err
+	}
+	defer httpResp.Body.Close()
+	if httpResp.StatusCode/100 != 2 {
+		return nil, m.apiError(httpResp)
+	}
+	var wr wireResponse
+	if err := json.NewDecoder(httpResp.Body).Decode(&wr); err != nil {
+		return nil, fmt.Errorf("%s: decode response: %w", m.provider.name, err)
+	}
+	return m.toResponse(&wr), nil
+}
+
+// Stream implements llm.Model. A non-2xx status is returned as an error from
+// Stream itself, before any events are delivered.
+func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
+	req = req.Apply(opts...)
+	if err := m.enforceCapabilities(req); err != nil {
+		return nil, err
+	}
+	httpResp, err := m.do(ctx, req, true)
+	if err != nil {
+		return nil, err
+	}
+	if httpResp.StatusCode/100 != 2 {
+		defer httpResp.Body.Close()
+		return nil, m.apiError(httpResp)
+	}
+	return newStream(m, httpResp.Body), nil
+}
+
+// enforceCapabilities is the honest backstop behind the media layer: it
+// rejects (rather than silently mutates) requests the target cannot serve.
+// Why: a separate media layer resizes/transcodes images BEFORE requests
+// reach the provider, so anything still out of bounds here is a real error.
+func (m *model) enforceCapabilities(req llm.Request) error {
+	images := 0
+	for _, msg := range req.Messages {
+		for _, part := range msg.Parts {
+			img, ok := part.(llm.ImagePart)
+			if !ok {
+				continue
+			}
+			images++
+			if !m.caps.SupportsImages() {
+				return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.fullName())
+			}
+			if !m.caps.MIMEAllowed(img.MIME) {
+				return fmt.Errorf("%w: %s does not accept image MIME %q", llm.ErrUnsupported, m.fullName(), img.MIME)
+			}
+			if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
+				return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d bytes",
+					llm.ErrUnsupported, len(img.Data), m.fullName(), m.caps.MaxImageBytes)
+			}
+		}
+	}
+	if m.caps.MaxImagesPerReq > 0 && images > m.caps.MaxImagesPerReq {
+		return fmt.Errorf("%w: request carries %d images, %s allows at most %d",
+			llm.ErrUnsupported, images, m.fullName(), m.caps.MaxImagesPerReq)
+	}
+	return nil
+}
+
+// do builds and executes one Messages API call. Transport errors are wrapped
+// with context but NOT converted to *llm.APIError, so llm.Classify still
+// sees the underlying net.Error / syscall errno.
+func (m *model) do(ctx context.Context, req llm.Request, streaming bool) (*http.Response, error) {
+	p := m.provider
+	if p.apiKey == "" {
+		// Why request-time, not construction-time: New never fails by
+		// convention, and a 401-shaped APIError classifies permanent so
+		// chains fail fast past a misconfigured target.
+		return nil, &llm.APIError{
+			Provider: p.name,
+			Model:    m.id,
+			Status:   http.StatusUnauthorized,
+			Code:     "authentication_error",
+			Message:  "no API key configured: set ANTHROPIC_API_KEY or use WithAPIKey",
+		}
+	}
+
+	body, err := json.Marshal(buildWireRequest(m.id, req, p.maxTokens, streaming))
+	if err != nil {
+		return nil, fmt.Errorf("%s: encode request: %w", p.name, err)
+	}
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/messages", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("%s: build request: %w", p.name, err)
+	}
+	httpReq.Header.Set("x-api-key", p.apiKey)
+	httpReq.Header.Set("anthropic-version", apiVersion)
+	httpReq.Header.Set("content-type", "application/json")
+	if streaming {
+		httpReq.Header.Set("accept", "text/event-stream")
+	}
+
+	resp, err := p.client.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("%s: do request: %w", p.name, err)
+	}
+	return resp, nil
+}
+
+// apiError converts a non-2xx response into *llm.APIError, filling Code and
+// Message from the documented {"type":"error","error":{...}} body when it
+// parses, and falling back to the raw body text when it does not.
+func (m *model) apiError(resp *http.Response) error {
+	apiErr := &llm.APIError{
+		Provider: m.provider.name,
+		Model:    m.id,
+		Status:   resp.StatusCode,
+	}
+	body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
+	if err != nil {
+		return apiErr
+	}
+	var we wireErrorEnvelope
+	if json.Unmarshal(body, &we) == nil && we.Error.Type != "" {
+		apiErr.Code = we.Error.Type
+		apiErr.Message = we.Error.Message
+	} else {
+		apiErr.Message = strings.TrimSpace(string(body))
+	}
+	return apiErr
+}
+
+// toResponse maps a wire response onto the canonical llm.Response. Thinking
+// and other unrecognized block types are tolerated and skipped — they are
+// not part of the canonical content vocabulary.
+func (m *model) toResponse(wr *wireResponse) *llm.Response {
+	resp := &llm.Response{
+		FinishReason: mapStopReason(wr.StopReason),
+		Usage:        wr.Usage.toUsage(),
+		Model:        m.fullName(),
+		Raw:          wr,
+	}
+	for _, block := range wr.Content {
+		switch block.Type {
+		case "text":
+			resp.Parts = append(resp.Parts, llm.TextPart{Text: block.Text})
+		case "tool_use":
+			args := block.Input
+			if len(args) == 0 {
+				args = json.RawMessage("{}")
+			}
+			resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
+				ID:        block.ID,
+				Name:      block.Name,
+				Arguments: args,
+			})
+		default:
+			// thinking, redacted_thinking, server-tool blocks, and any
+			// future types are skipped, not surfaced as parts.
+		}
+	}
+	return resp
+}