feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
@@ -0,0 +1,299 @@
+package anthropic
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// Wire types mirror the Messages API JSON shapes (June 2026 docs). Only the
+// fields majordomo uses are modeled; unknown response fields are ignored by
+// encoding/json.
+
+type wireRequest struct {
+	Model         string            `json:"model"`
+	MaxTokens     int               `json:"max_tokens"`
+	System        string            `json:"system,omitempty"`
+	Messages      []wireMessage     `json:"messages"`
+	Stream        bool              `json:"stream,omitempty"`
+	Tools         []wireTool        `json:"tools,omitempty"`
+	ToolChoice    *wireToolChoice   `json:"tool_choice,omitempty"`
+	Temperature   *float64          `json:"temperature,omitempty"`
+	TopP          *float64          `json:"top_p,omitempty"`
+	StopSequences []string          `json:"stop_sequences,omitempty"`
+	OutputConfig  *wireOutputConfig `json:"output_config,omitempty"`
+}
+
+type wireMessage struct {
+	Role    string      `json:"role"`
+	Content []wireBlock `json:"content"`
+}
+
+// wireBlock is a request-side content block. Exactly one shape is populated
+// per block, keyed by Type: text, image, tool_use, or tool_result.
+type wireBlock struct {
+	Type string `json:"type"`
+
+	// text
+	Text string `json:"text,omitempty"`
+
+	// image
+	Source *wireImageSource `json:"source,omitempty"`
+
+	// tool_use
+	ID    string          `json:"id,omitempty"`
+	Name  string          `json:"name,omitempty"`
+	Input json.RawMessage `json:"input,omitempty"`
+
+	// tool_result
+	ToolUseID string `json:"tool_use_id,omitempty"`
+	Content   string `json:"content,omitempty"`
+	IsError   bool   `json:"is_error,omitempty"`
+}
+
+type wireImageSource struct {
+	Type      string `json:"type"`
+	MediaType string `json:"media_type"`
+	Data      string `json:"data"`
+}
+
+type wireTool struct {
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	InputSchema json.RawMessage `json:"input_schema"`
+}
+
+type wireToolChoice struct {
+	Type string `json:"type"`
+	Name string `json:"name,omitempty"`
+}
+
+type wireOutputConfig struct {
+	Format *wireOutputFormat `json:"format,omitempty"`
+}
+
+type wireOutputFormat struct {
+	Type   string          `json:"type"`
+	Schema json.RawMessage `json:"schema"`
+}
+
+type wireResponse struct {
+	ID         string          `json:"id"`
+	Type       string          `json:"type"`
+	Role       string          `json:"role"`
+	Model      string          `json:"model"`
+	Content    []wireRespBlock `json:"content"`
+	StopReason string          `json:"stop_reason"`
+	Usage      wireUsage       `json:"usage"`
+}
+
+type wireRespBlock struct {
+	Type  string          `json:"type"`
+	Text  string          `json:"text"`
+	ID    string          `json:"id"`
+	Name  string          `json:"name"`
+	Input json.RawMessage `json:"input"`
+}
+
+type wireUsage struct {
+	InputTokens              int `json:"input_tokens"`
+	OutputTokens             int `json:"output_tokens"`
+	CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
+	CacheReadInputTokens     int `json:"cache_read_input_tokens"`
+}
+
+// toUsage maps API token accounting onto the canonical Usage. Why the sum:
+// the API's input_tokens counts only tokens after the last cache breakpoint;
+// real total input is input + cache_creation + cache_read.
+func (u wireUsage) toUsage() llm.Usage {
+	return llm.Usage{
+		InputTokens:  u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
+		OutputTokens: u.OutputTokens,
+	}
+}
+
+type wireErrorEnvelope struct {
+	Type  string `json:"type"`
+	Error struct {
+		Type    string `json:"type"`
+		Message string `json:"message"`
+	} `json:"error"`
+}
+
+// buildWireRequest translates the canonical request into the Messages API
+// shape.
+//
+// Request.ReasoningEffort is intentionally ignored: the current Messages API
+// has no low/medium/high reasoning knob — thinking is adaptive on current
+// models, and the legacy budget/disable parameters 400 on them. The llm
+// contract says providers ignore ReasoningEffort where no mapping exists.
+//
+// Request.SchemaName is likewise ignored: output_config.format takes a bare
+// schema with no name field.
+func buildWireRequest(modelID string, req llm.Request, defaultMax int, stream bool) wireRequest {
+	maxTokens := req.MaxTokens
+	if maxTokens == 0 {
+		// max_tokens is required by the API; 0 means "provider default".
+		maxTokens = defaultMax
+	}
+
+	wr := wireRequest{
+		Model:         modelID,
+		MaxTokens:     maxTokens,
+		System:        foldSystem(req),
+		Messages:      toWireMessages(req.Messages),
+		Stream:        stream,
+		Tools:         toWireTools(req.Tools),
+		ToolChoice:    toWireToolChoice(req.ToolChoice),
+		Temperature:   req.Temperature,
+		TopP:          req.TopP,
+		StopSequences: req.StopSequences,
+	}
+	if req.Schema != nil {
+		wr.OutputConfig = &wireOutputConfig{Format: &wireOutputFormat{
+			Type:   "json_schema",
+			Schema: req.Schema,
+		}}
+	}
+	return wr
+}
+
+// foldSystem joins Request.System with the text of every RoleSystem message
+// (System field first, original order, "\n\n" separators). Why: the API
+// takes the system prompt as a top-level field and rejects system roles
+// inside messages, so canonical RoleSystem messages must fold in here.
+func foldSystem(req llm.Request) string {
+	parts := make([]string, 0, 2)
+	if req.System != "" {
+		parts = append(parts, req.System)
+	}
+	for _, msg := range req.Messages {
+		if msg.Role != llm.RoleSystem {
+			continue
+		}
+		if text := msg.Text(); text != "" {
+			parts = append(parts, text)
+		}
+	}
+	return strings.Join(parts, "\n\n")
+}
+
+func toWireMessages(msgs []llm.Message) []wireMessage {
+	out := make([]wireMessage, 0, len(msgs))
+	for _, msg := range msgs {
+		switch msg.Role {
+		case llm.RoleSystem:
+			// Folded into the top-level system field by foldSystem.
+			continue
+
+		case llm.RoleTool:
+			// One user message carrying one tool_result block per result.
+			blocks := make([]wireBlock, 0, len(msg.ToolResults))
+			for _, res := range msg.ToolResults {
+				blocks = append(blocks, wireBlock{
+					Type:      "tool_result",
+					ToolUseID: res.ID,
+					Content:   res.Content,
+					IsError:   res.IsError,
+				})
+			}
+			out = append(out, wireMessage{Role: "user", Content: blocks})
+
+		case llm.RoleAssistant:
+			blocks := toWireBlocks(msg.Parts)
+			for _, call := range msg.ToolCalls {
+				args := call.Arguments
+				if len(args) == 0 {
+					// The API requires input to be a JSON object.
+					args = json.RawMessage("{}")
+				}
+				blocks = append(blocks, wireBlock{
+					Type:  "tool_use",
+					ID:    call.ID,
+					Name:  call.Name,
+					Input: args,
+				})
+			}
+			out = append(out, wireMessage{Role: "assistant", Content: blocks})
+
+		default: // llm.RoleUser and anything unrecognized
+			out = append(out, wireMessage{Role: "user", Content: toWireBlocks(msg.Parts)})
+		}
+	}
+	return out
+}
+
+func toWireBlocks(parts []llm.Part) []wireBlock {
+	blocks := make([]wireBlock, 0, len(parts))
+	for _, part := range parts {
+		switch p := part.(type) {
+		case llm.TextPart:
+			blocks = append(blocks, wireBlock{Type: "text", Text: p.Text})
+		case llm.ImagePart:
+			blocks = append(blocks, wireBlock{Type: "image", Source: &wireImageSource{
+				Type:      "base64",
+				MediaType: p.MIME,
+				Data:      base64.StdEncoding.EncodeToString(p.Data),
+			}})
+		}
+	}
+	return blocks
+}
+
+func toWireTools(tools []llm.Tool) []wireTool {
+	if len(tools) == 0 {
+		return nil
+	}
+	out := make([]wireTool, 0, len(tools))
+	for _, t := range tools {
+		schema := t.Parameters
+		if len(schema) == 0 {
+			// Why: input_schema is required by the API; a tool with no
+			// arguments still needs an (empty) object schema.
+			schema = json.RawMessage(`{"type":"object","properties":{}}`)
+		}
+		out = append(out, wireTool{
+			Name:        t.Name,
+			Description: t.Description,
+			InputSchema: schema,
+		})
+	}
+	return out
+}
+
+// toWireToolChoice maps the canonical tool-choice policy. "" omits the field
+// (API default is auto); any value other than the three keywords names the
+// one tool the model must call.
+func toWireToolChoice(choice string) *wireToolChoice {
+	switch choice {
+	case "":
+		return nil
+	case "auto":
+		return &wireToolChoice{Type: "auto"}
+	case "required":
+		return &wireToolChoice{Type: "any"}
+	case "none":
+		return &wireToolChoice{Type: "none"}
+	default:
+		return &wireToolChoice{Type: "tool", Name: choice}
+	}
+}
+
+// mapStopReason maps the API stop_reason onto the canonical FinishReason.
+func mapStopReason(stop string) llm.FinishReason {
+	switch stop {
+	case "end_turn", "stop_sequence":
+		return llm.FinishStop
+	case "max_tokens", "model_context_window_exceeded":
+		return llm.FinishLength
+	case "tool_use":
+		return llm.FinishToolCalls
+	case "refusal":
+		return llm.FinishContentFilter
+	default:
+		// pause_turn and any future provider-specific reasons.
+		return llm.FinishOther
+	}
+}