feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
@@ -0,0 +1,247 @@
+package anthropic
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"io"
+	"strings"
+	"sync"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// wireStreamEvent is the union of all SSE data payloads the Messages API
+// emits. Dispatch is on Type (the data always carries one), so the SSE
+// "event:" line is informational only.
+type wireStreamEvent struct {
+	Type  string `json:"type"`
+	Index int    `json:"index"`
+
+	// message_start
+	Message *struct {
+		Usage wireUsage `json:"usage"`
+	} `json:"message"`
+
+	// content_block_start
+	ContentBlock *struct {
+		Type string `json:"type"`
+		ID   string `json:"id"`
+		Name string `json:"name"`
+	} `json:"content_block"`
+
+	// content_block_delta / message_delta
+	Delta struct {
+		Type        string `json:"type"`
+		Text        string `json:"text"`
+		PartialJSON string `json:"partial_json"`
+		StopReason  string `json:"stop_reason"`
+	} `json:"delta"`
+
+	// message_delta
+	Usage *wireUsage `json:"usage"`
+
+	// error
+	Error *struct {
+		Type    string `json:"type"`
+		Message string `json:"message"`
+	} `json:"error"`
+}
+
+// stream adapts the Messages API SSE stream to llm.Stream.
+//
+// Why single-threaded pull (no reader goroutine): Next is already the
+// consumer's pull point, so parsing lazily inside Next keeps cancellation,
+// buffering, and error propagation trivial — Close just closes the body and
+// the next read fails.
+type stream struct {
+	provider string
+	model    string
+	full     string // provider/model
+	body     io.ReadCloser
+	scanner  *bufio.Scanner
+
+	// accumulated response
+	parts     []llm.Part
+	toolCalls []llm.ToolCall
+	usage     llm.Usage
+	finish    llm.FinishReason
+
+	// current content block state
+	blockType string
+	textBuf   strings.Builder
+	toolID    string
+	toolName  string
+	argsBuf   strings.Builder
+
+	done      bool // final Response event emitted
+	closeOnce sync.Once
+	closeErr  error
+}
+
+func newStream(m *model, body io.ReadCloser) *stream {
+	sc := bufio.NewScanner(body)
+	// Why a large limit: one SSE line carries one whole delta; default 64K
+	// can be exceeded by large structured-output or tool-argument deltas.
+	sc.Buffer(make([]byte, 0, 64*1024), 10*1024*1024)
+	return &stream{
+		provider: m.provider.name,
+		model:    m.id,
+		full:     m.fullName(),
+		body:     body,
+		scanner:  sc,
+		finish:   llm.FinishOther,
+	}
+}
+
+// Close implements llm.Stream. Safe to call at any time and more than once.
+func (s *stream) Close() error {
+	s.closeOnce.Do(func() { s.closeErr = s.body.Close() })
+	return s.closeErr
+}
+
+// Next implements llm.Stream. It emits TextDelta fragments as they arrive,
+// fully-assembled ToolCalls at content_block_stop, exactly one final
+// Response event at message_stop, then io.EOF.
+func (s *stream) Next() (llm.StreamEvent, error) {
+	if s.done {
+		return llm.StreamEvent{}, io.EOF
+	}
+	for {
+		data, err := s.nextData()
+		if err != nil {
+			return llm.StreamEvent{}, err
+		}
+		var ev wireStreamEvent
+		if err := json.Unmarshal([]byte(data), &ev); err != nil {
+			return llm.StreamEvent{}, fmt.Errorf("%s: decode stream event: %w", s.provider, err)
+		}
+
+		switch ev.Type {
+		case "message_start":
+			if ev.Message != nil {
+				s.usage = ev.Message.Usage.toUsage()
+			}
+
+		case "content_block_start":
+			s.blockType = ""
+			s.textBuf.Reset()
+			s.argsBuf.Reset()
+			if ev.ContentBlock != nil {
+				s.blockType = ev.ContentBlock.Type
+				if s.blockType == "tool_use" {
+					s.toolID = ev.ContentBlock.ID
+					s.toolName = ev.ContentBlock.Name
+				}
+			}
+
+		case "content_block_delta":
+			switch ev.Delta.Type {
+			case "text_delta":
+				s.textBuf.WriteString(ev.Delta.Text)
+				return llm.StreamEvent{TextDelta: ev.Delta.Text}, nil
+			case "input_json_delta":
+				// Buffer partial JSON internally; consumers never see it.
+				s.argsBuf.WriteString(ev.Delta.PartialJSON)
+			default:
+				// thinking_delta / signature_delta: tolerated, skipped.
+			}
+
+		case "content_block_stop":
+			if event, ok := s.finishBlock(); ok {
+				return event, nil
+			}
+
+		case "message_delta":
+			if ev.Delta.StopReason != "" {
+				s.finish = mapStopReason(ev.Delta.StopReason)
+			}
+			if ev.Usage != nil {
+				// Output tokens arrive cumulatively in the final delta;
+				// input tokens were reported in message_start.
+				s.usage.OutputTokens = ev.Usage.OutputTokens
+			}
+
+		case "message_stop":
+			s.done = true
+			return llm.StreamEvent{Response: &llm.Response{
+				Parts:        s.parts,
+				ToolCalls:    s.toolCalls,
+				FinishReason: s.finish,
+				Usage:        s.usage,
+				Model:        s.full,
+			}}, nil
+
+		case "error":
+			// Mid-stream failure after the 200 (e.g. overloaded_error).
+			// Status stays 0: there is no HTTP status for it, and the
+			// default Classify treats it as transient, which fits overload.
+			apiErr := &llm.APIError{Provider: s.provider, Model: s.model}
+			if ev.Error != nil {
+				apiErr.Code = ev.Error.Type
+				apiErr.Message = ev.Error.Message
+			}
+			return llm.StreamEvent{}, apiErr
+
+		default:
+			// ping and unknown event types: ignored.
+		}
+	}
+}
+
+// finishBlock closes out the current content block, appending its result to
+// the accumulated response. Tool-use blocks produce a stream event.
+func (s *stream) finishBlock() (llm.StreamEvent, bool) {
+	defer func() {
+		s.blockType = ""
+		s.textBuf.Reset()
+		s.argsBuf.Reset()
+	}()
+	switch s.blockType {
+	case "text":
+		if s.textBuf.Len() > 0 {
+			s.parts = append(s.parts, llm.TextPart{Text: s.textBuf.String()})
+		}
+	case "tool_use":
+		args := s.argsBuf.String()
+		if args == "" {
+			// A tool called with no arguments streams zero (or empty)
+			// input_json_delta fragments; the canonical form is "{}".
+			args = "{}"
+		}
+		call := llm.ToolCall{ID: s.toolID, Name: s.toolName, Arguments: json.RawMessage(args)}
+		s.toolCalls = append(s.toolCalls, call)
+		return llm.StreamEvent{ToolCall: &call}, true
+	}
+	return llm.StreamEvent{}, false
+}
+
+// nextData reads SSE lines until one complete event's data is assembled
+// (multi-line data fields are joined with "\n" per the SSE spec). "event:"
+// lines and comments are ignored; dispatch keys off the JSON "type" field.
+func (s *stream) nextData() (string, error) {
+	var data strings.Builder
+	for s.scanner.Scan() {
+		line := s.scanner.Text()
+		if line == "" {
+			if data.Len() > 0 {
+				return data.String(), nil
+			}
+			continue
+		}
+		if rest, ok := strings.CutPrefix(line, "data:"); ok {
+			if data.Len() > 0 {
+				data.WriteByte('\n')
+			}
+			data.WriteString(strings.TrimPrefix(rest, " "))
+		}
+	}
+	if err := s.scanner.Err(); err != nil {
+		return "", fmt.Errorf("%s: read stream: %w", s.provider, err)
+	}
+	if data.Len() > 0 {
+		return data.String(), nil
+	}
+	// EOF before message_stop: the connection dropped mid-response.
+	return "", fmt.Errorf("%s: stream ended before message_stop: %w", s.provider, io.ErrUnexpectedEOF)
+}