feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
@@ -0,0 +1,222 @@
+package openai
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// model is one provider-bound target.
+type model struct {
+	p    *Provider
+	id   string
+	caps llm.Capabilities
+}
+
+// Capabilities implements llm.Model.
+func (m *model) Capabilities() llm.Capabilities { return m.caps }
+
+// Generate implements llm.Model.
+func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
+	req = req.Apply(opts...)
+	if err := checkRequest(m.caps, req); err != nil {
+		return nil, err
+	}
+	httpResp, err := m.do(ctx, req, false)
+	if err != nil {
+		return nil, err
+	}
+	defer httpResp.Body.Close()
+	if httpResp.StatusCode/100 != 2 {
+		return nil, m.apiError(httpResp)
+	}
+	var wire chatResponse
+	if err := json.NewDecoder(httpResp.Body).Decode(&wire); err != nil {
+		return nil, fmt.Errorf("openai: decode response: %w", err)
+	}
+	return m.toResponse(&wire), nil
+}
+
+// Stream implements llm.Model.
+func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
+	req = req.Apply(opts...)
+	if !m.caps.SupportsStreaming {
+		return nil, fmt.Errorf("%w: streaming not supported by %s/%s", llm.ErrUnsupported, m.p.name, m.id)
+	}
+	if err := checkRequest(m.caps, req); err != nil {
+		return nil, err
+	}
+	httpResp, err := m.do(ctx, req, true)
+	if err != nil {
+		return nil, err
+	}
+	if httpResp.StatusCode/100 != 2 {
+		defer httpResp.Body.Close()
+		return nil, m.apiError(httpResp)
+	}
+	sc := bufio.NewScanner(httpResp.Body)
+	// Why: a single SSE data line carries a whole JSON chunk; tool-call
+	// argument fragments can make lines far larger than Scanner's 64 KiB
+	// default cap.
+	sc.Buffer(make([]byte, 0, 64*1024), 16<<20)
+	return &stream{m: m, body: httpResp.Body, sc: sc}, nil
+}
+
+// do builds and performs the HTTP request. Transport failures are wrapped
+// raw (never as *llm.APIError) so llm.Classify still sees net.Error,
+// syscall errnos, and context errors underneath.
+func (m *model) do(ctx context.Context, req llm.Request, stream bool) (*http.Response, error) {
+	if m.p.apiKey == "" {
+		// Why a synthetic 401: the constructor never fails, so a missing
+		// key must surface at request time as the auth failure it is —
+		// permanent under llm.Classify, like a real 401.
+		return nil, &llm.APIError{
+			Provider: m.p.name,
+			Model:    m.id,
+			Status:   http.StatusUnauthorized,
+			Code:     "missing_api_key",
+			Message:  "no API key configured: set OPENAI_API_KEY or use WithAPIKey",
+		}
+	}
+	body, err := json.Marshal(m.buildRequest(req, stream))
+	if err != nil {
+		return nil, fmt.Errorf("openai: encode request: %w", err)
+	}
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, m.p.baseURL+"/chat/completions", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("openai: build request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+	httpReq.Header.Set("Authorization", "Bearer "+m.p.apiKey)
+	if stream {
+		httpReq.Header.Set("Accept", "text/event-stream")
+	}
+	httpResp, err := m.p.client.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("openai: do request: %w", err)
+	}
+	return httpResp, nil
+}
+
+// apiError converts a non-2xx response into *llm.APIError, pulling code and
+// message from the {"error":{...}} body when it parses.
+func (m *model) apiError(httpResp *http.Response) error {
+	apiErr := &llm.APIError{Provider: m.p.name, Model: m.id, Status: httpResp.StatusCode}
+	body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 1<<20))
+	var env errorEnvelope
+	if err := json.Unmarshal(body, &env); err == nil &&
+		(env.Error.Message != "" || env.Error.Type != "" || env.Error.Code != "") {
+		apiErr.Message = env.Error.Message
+		apiErr.Code = env.Error.Code
+		if apiErr.Code == "" {
+			apiErr.Code = env.Error.Type
+		}
+	} else {
+		// Why: compat servers emit all sorts of error bodies; a raw snippet
+		// beats silence when the canonical envelope is absent.
+		apiErr.Message = strings.TrimSpace(string(body))
+	}
+	return apiErr
+}
+
+// toResponse maps the wire response onto the canonical llm.Response.
+func (m *model) toResponse(wire *chatResponse) *llm.Response {
+	resp := &llm.Response{Model: m.p.name + "/" + m.id, Raw: wire}
+	if wire.Usage != nil {
+		resp.Usage = llm.Usage{
+			InputTokens:  wire.Usage.PromptTokens,
+			OutputTokens: wire.Usage.CompletionTokens,
+		}
+	}
+	if len(wire.Choices) == 0 {
+		resp.FinishReason = llm.FinishOther
+		return resp
+	}
+	choice := wire.Choices[0]
+	if choice.Message.Content != "" {
+		resp.Parts = append(resp.Parts, llm.TextPart{Text: choice.Message.Content})
+	}
+	for i, tc := range choice.Message.ToolCalls {
+		id := tc.ID
+		if id == "" {
+			// Why: ToolResult.ID must echo ToolCall.ID, so calls from compat
+			// servers that omit ids get synthesized ones.
+			id = fmt.Sprintf("call_%d", i)
+		}
+		resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
+			ID:        id,
+			Name:      tc.Function.Name,
+			Arguments: json.RawMessage(tc.Function.Arguments),
+		})
+	}
+	resp.FinishReason = mapFinish(choice.FinishReason, len(resp.ToolCalls) > 0)
+	return resp
+}
+
+// mapFinish maps a wire finish_reason to the canonical enum. Tool-call
+// presence wins over the reported reason: a forced (named tool_choice) call
+// can finish with "stop" while still carrying tool_calls.
+func mapFinish(reason string, hasToolCalls bool) llm.FinishReason {
+	if hasToolCalls {
+		return llm.FinishToolCalls
+	}
+	switch reason {
+	case "stop":
+		return llm.FinishStop
+	case "length":
+		return llm.FinishLength
+	case "tool_calls":
+		return llm.FinishToolCalls
+	case "content_filter":
+		return llm.FinishContentFilter
+	default:
+		return llm.FinishOther
+	}
+}
+
+// checkRequest enforces the model's effective capabilities. Why enforcement
+// rather than normalization: a separate media layer resizes/transcodes
+// images BEFORE requests reach the provider; this check is the honest
+// backstop that refuses, with llm.ErrUnsupported, what the target
+// declaredly cannot serve (chains advance past it penalty-free).
+func checkRequest(caps llm.Capabilities, req llm.Request) error {
+	if len(req.Tools) > 0 && !caps.SupportsTools {
+		return fmt.Errorf("%w: tools not supported", llm.ErrUnsupported)
+	}
+	if len(req.Schema) > 0 && !caps.SupportsStructured {
+		return fmt.Errorf("%w: structured output not supported", llm.ErrUnsupported)
+	}
+	images := 0
+	for _, msg := range req.Messages {
+		for _, part := range msg.Parts {
+			img, ok := part.(llm.ImagePart)
+			if !ok {
+				continue
+			}
+			images++
+			if !caps.SupportsImages() {
+				return fmt.Errorf("%w: image input not supported", llm.ErrUnsupported)
+			}
+			if !caps.MIMEAllowed(img.MIME) {
+				return fmt.Errorf("%w: image MIME type %q not allowed (allowed: %s)",
+					llm.ErrUnsupported, img.MIME, strings.Join(caps.AllowedImageMIME, ", "))
+			}
+			if caps.MaxImageBytes > 0 && len(img.Data) > caps.MaxImageBytes {
+				return fmt.Errorf("%w: image is %d bytes, limit is %d",
+					llm.ErrUnsupported, len(img.Data), caps.MaxImageBytes)
+			}
+		}
+	}
+	if images > caps.MaxImagesPerReq {
+		return fmt.Errorf("%w: request carries %d images, limit is %d",
+			llm.ErrUnsupported, images, caps.MaxImagesPerReq)
+	}
+	return nil
+}