feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
@@ -0,0 +1,343 @@
+package ollama
+
+import (
+	"bytes"
+	"context"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strconv"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// ---- wire types (field names per ollama api/types.go) ----
+
+type chatRequest struct {
+	Model    string          `json:"model"`
+	Messages []chatMessage   `json:"messages"`
+	Tools    []toolDef       `json:"tools,omitempty"`
+	Format   json.RawMessage `json:"format,omitempty"`
+	Options  map[string]any  `json:"options,omitempty"`
+	// Stream has no omitempty on purpose: the server default is true, so
+	// Generate must send an explicit false.
+	Stream bool `json:"stream"`
+	// Think is bool-or-string on the wire ("low"/"medium"/"high" or a bool).
+	Think json.RawMessage `json:"think,omitempty"`
+}
+
+type chatMessage struct {
+	Role      string     `json:"role"`
+	Content   string     `json:"content"`
+	Images    []string   `json:"images,omitempty"` // raw base64, no data: prefix
+	ToolCalls []toolCall `json:"tool_calls,omitempty"`
+	ToolName  string     `json:"tool_name,omitempty"` // on role:"tool" results
+}
+
+type toolDef struct {
+	Type     string      `json:"type"`
+	Function toolDefFunc `json:"function"`
+}
+
+type toolDefFunc struct {
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
+
+type toolCall struct {
+	ID       string       `json:"id,omitempty"`
+	Function toolCallFunc `json:"function"`
+}
+
+type toolCallFunc struct {
+	Index int    `json:"index,omitempty"`
+	Name  string `json:"name"`
+	// Arguments is a JSON OBJECT on the wire (unlike OpenAI's string).
+	Arguments json.RawMessage `json:"arguments"`
+}
+
+type chatResponse struct {
+	Model           string      `json:"model"`
+	Message         respMessage `json:"message"`
+	Done            bool        `json:"done"`
+	DoneReason      string      `json:"done_reason"`
+	PromptEvalCount int         `json:"prompt_eval_count"`
+	EvalCount       int         `json:"eval_count"`
+}
+
+type respMessage struct {
+	Role      string     `json:"role"`
+	Content   string     `json:"content"`
+	Thinking  string     `json:"thinking"`
+	ToolCalls []toolCall `json:"tool_calls"`
+}
+
+type errorBody struct {
+	Error string `json:"error"`
+}
+
+// ---- model ----
+
+type model struct {
+	provider *Provider
+	id       string
+	caps     llm.Capabilities
+}
+
+func (m *model) Capabilities() llm.Capabilities { return m.caps }
+
+func (m *model) qualified() string { return m.provider.name + "/" + m.id }
+
+// enforceCapabilities is the backstop check (the media layer normalizes
+// before requests get here; see ADR-0009).
+func (m *model) enforceCapabilities(req llm.Request) error {
+	count := 0
+	for _, msg := range req.Messages {
+		for _, part := range msg.Parts {
+			img, ok := part.(llm.ImagePart)
+			if !ok {
+				continue
+			}
+			count++
+			if !m.caps.SupportsImages() {
+				return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.qualified())
+			}
+			if !m.caps.MIMEAllowed(img.MIME) {
+				return fmt.Errorf("%w: %s does not accept %s images", llm.ErrUnsupported, m.qualified(), img.MIME)
+			}
+			if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
+				return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d",
+					llm.ErrUnsupported, len(img.Data), m.qualified(), m.caps.MaxImageBytes)
+			}
+		}
+	}
+	if count > 0 && m.caps.MaxImagesPerReq > 0 && count > m.caps.MaxImagesPerReq {
+		return fmt.Errorf("%w: %d images exceed %s limit of %d",
+			llm.ErrUnsupported, count, m.qualified(), m.caps.MaxImagesPerReq)
+	}
+	return nil
+}
+
+// buildRequest maps the canonical request onto the wire shape.
+func (m *model) buildRequest(req llm.Request, stream bool) (*chatRequest, error) {
+	out := &chatRequest{Model: m.id, Stream: stream}
+
+	// System prompt: dedicated field first, then folded RoleSystem messages.
+	var sys []string
+	if req.System != "" {
+		sys = append(sys, req.System)
+	}
+	for _, msg := range req.Messages {
+		if msg.Role == llm.RoleSystem {
+			if t := msg.Text(); t != "" {
+				sys = append(sys, t)
+			}
+		}
+	}
+	if len(sys) > 0 {
+		out.Messages = append(out.Messages, chatMessage{
+			Role: "system", Content: strings.Join(sys, "\n\n"),
+		})
+	}
+
+	for _, msg := range req.Messages {
+		switch msg.Role {
+		case llm.RoleSystem:
+			// Already folded above.
+		case llm.RoleTool:
+			for _, res := range msg.ToolResults {
+				content := res.Content
+				if res.IsError {
+					content = "ERROR: " + content
+				}
+				out.Messages = append(out.Messages, chatMessage{
+					Role: "tool", Content: content, ToolName: res.Name,
+				})
+			}
+		default:
+			cm := chatMessage{Role: string(msg.Role), Content: msg.Text()}
+			for _, part := range msg.Parts {
+				if img, ok := part.(llm.ImagePart); ok {
+					cm.Images = append(cm.Images, base64.StdEncoding.EncodeToString(img.Data))
+				}
+			}
+			for _, tc := range msg.ToolCalls {
+				args := tc.Arguments
+				if len(args) == 0 {
+					args = json.RawMessage("{}")
+				}
+				cm.ToolCalls = append(cm.ToolCalls, toolCall{
+					ID:       tc.ID,
+					Function: toolCallFunc{Name: tc.Name, Arguments: args},
+				})
+			}
+			out.Messages = append(out.Messages, cm)
+		}
+	}
+
+	// Tools. Ollama has no tool_choice: "none" maps to omitting the tools;
+	// "required"/named choices have no wire equivalent and are best-effort
+	// ignored (documented in the README support matrix).
+	if req.ToolChoice != "none" {
+		for _, t := range req.Tools {
+			params := t.Parameters
+			if len(params) == 0 {
+				params = json.RawMessage(`{"type":"object","properties":{}}`)
+			}
+			out.Tools = append(out.Tools, toolDef{
+				Type:     "function",
+				Function: toolDefFunc{Name: t.Name, Description: t.Description, Parameters: params},
+			})
+		}
+	}
+
+	if len(req.Schema) > 0 {
+		out.Format = req.Schema
+	}
+
+	opts := make(map[string]any)
+	if req.Temperature != nil {
+		opts["temperature"] = *req.Temperature
+	}
+	if req.TopP != nil {
+		opts["top_p"] = *req.TopP
+	}
+	if req.MaxTokens > 0 {
+		opts["num_predict"] = req.MaxTokens
+	}
+	if len(req.StopSequences) > 0 {
+		opts["stop"] = req.StopSequences
+	}
+	if len(opts) > 0 {
+		out.Options = opts
+	}
+
+	switch req.ReasoningEffort {
+	case "":
+	case "low", "medium", "high":
+		out.Think = json.RawMessage(strconv.Quote(req.ReasoningEffort))
+	default:
+		return nil, fmt.Errorf("ollama: invalid reasoning effort %q (want low/medium/high)", req.ReasoningEffort)
+	}
+
+	return out, nil
+}
+
+// do POSTs /api/chat and returns the response body on 2xx, or a classified
+// error.
+func (m *model) do(ctx context.Context, wireReq *chatRequest) (*http.Response, error) {
+	p := m.provider
+	if err := p.checkReady(); err != nil {
+		return nil, err
+	}
+	body, err := json.Marshal(wireReq)
+	if err != nil {
+		return nil, fmt.Errorf("ollama: encode request: %w", err)
+	}
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/api/chat", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("ollama: build request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	if p.token != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+p.token)
+	}
+
+	resp, err := p.client.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("ollama %s: do request: %w", m.qualified(), err)
+	}
+	if resp.StatusCode/100 != 2 {
+		defer resp.Body.Close()
+		raw, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
+		var eb errorBody
+		_ = json.Unmarshal(raw, &eb)
+		msg := eb.Error
+		if msg == "" {
+			msg = strings.TrimSpace(string(raw))
+		}
+		return nil, &llm.APIError{
+			Provider: p.name, Model: m.id,
+			Status: resp.StatusCode, Message: msg,
+		}
+	}
+	return resp, nil
+}
+
+// Generate implements llm.Model.
+func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
+	req = req.Apply(opts...)
+	if err := m.enforceCapabilities(req); err != nil {
+		return nil, err
+	}
+	wireReq, err := m.buildRequest(req, false)
+	if err != nil {
+		return nil, err
+	}
+	resp, err := m.do(ctx, wireReq)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	var cr chatResponse
+	if err := json.NewDecoder(resp.Body).Decode(&cr); err != nil {
+		return nil, fmt.Errorf("ollama %s: decode response: %w", m.qualified(), err)
+	}
+	return m.toResponse(&cr), nil
+}
+
+// toResponse converts a final wire chunk into the canonical response.
+func (m *model) toResponse(cr *chatResponse) *llm.Response {
+	out := &llm.Response{
+		Model: m.qualified(),
+		Usage: llm.Usage{InputTokens: cr.PromptEvalCount, OutputTokens: cr.EvalCount},
+		Raw:   cr,
+	}
+	if cr.Message.Content != "" {
+		out.Parts = append(out.Parts, llm.Text(cr.Message.Content))
+	}
+	out.ToolCalls = convertToolCalls(cr.Message.ToolCalls)
+	out.FinishReason = finishReason(cr.DoneReason, len(out.ToolCalls) > 0)
+	return out
+}
+
+// convertToolCalls maps wire tool calls, synthesizing ids where the model
+// omitted them (ids are optional in Ollama's shape but required by our
+// agent loop to match results to calls).
+func convertToolCalls(calls []toolCall) []llm.ToolCall {
+	out := make([]llm.ToolCall, 0, len(calls))
+	for i, tc := range calls {
+		id := tc.ID
+		if id == "" {
+			id = "call_" + strconv.Itoa(i)
+		}
+		args := tc.Function.Arguments
+		if len(args) == 0 {
+			args = json.RawMessage("{}")
+		}
+		out = append(out, llm.ToolCall{ID: id, Name: tc.Function.Name, Arguments: args})
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+func finishReason(doneReason string, hasToolCalls bool) llm.FinishReason {
+	if hasToolCalls {
+		return llm.FinishToolCalls
+	}
+	switch doneReason {
+	case "stop", "":
+		return llm.FinishStop
+	case "length":
+		return llm.FinishLength
+	default:
+		return llm.FinishOther
+	}
+}