feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
@@ -0,0 +1,183 @@
+package openai
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"io"
+	"strings"
+	"sync"
+
+	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
+)
+
+// stream consumes the data-only SSE stream of chat.completion.chunk events.
+//
+// Delivery contract: TextDelta events as content fragments arrive; ToolCall
+// events only once fully assembled (fragments are buffered internally and
+// flushed at stream end — simplest correct handling of interleaved parallel
+// calls); exactly one final Response event; then io.EOF.
+type stream struct {
+	m    *model
+	body io.ReadCloser
+	sc   *bufio.Scanner
+
+	closeOnce sync.Once
+	closeErr  error
+
+	queue []llm.StreamEvent
+	done  bool // finalize ran; drain queue then io.EOF
+
+	text    strings.Builder
+	calls   []*toolCallAcc // first-appearance order
+	byIndex map[int]*toolCallAcc
+	finish  string
+	usage   llm.Usage
+}
+
+// toolCallAcc accumulates one tool call's fragments. The id and name arrive
+// on the first fragment for an index; arguments arrive as string pieces to
+// concatenate.
+type toolCallAcc struct {
+	id   string
+	name string
+	args strings.Builder
+}
+
+// Next implements llm.Stream.
+func (s *stream) Next() (llm.StreamEvent, error) {
+	for {
+		if len(s.queue) > 0 {
+			ev := s.queue[0]
+			s.queue = s.queue[1:]
+			return ev, nil
+		}
+		if s.done {
+			return llm.StreamEvent{}, io.EOF
+		}
+		if !s.sc.Scan() {
+			if err := s.sc.Err(); err != nil {
+				return llm.StreamEvent{}, fmt.Errorf("openai: read stream: %w", err)
+			}
+			// Why: some compat servers close the body without a [DONE]
+			// sentinel; a clean EOF still finalizes with what arrived.
+			s.finalize()
+			continue
+		}
+		line := strings.TrimSpace(s.sc.Text())
+		if !strings.HasPrefix(line, "data:") {
+			continue // SSE comments, event:/id: fields, blank separators
+		}
+		payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
+		if payload == "" {
+			continue
+		}
+		if payload == "[DONE]" {
+			s.finalize()
+			continue
+		}
+		if err := s.handleChunk([]byte(payload)); err != nil {
+			return llm.StreamEvent{}, err
+		}
+	}
+}
+
+// handleChunk folds one chat.completion.chunk into the stream state,
+// queueing any events it produces.
+func (s *stream) handleChunk(data []byte) error {
+	var chunk streamChunk
+	if err := json.Unmarshal(data, &chunk); err != nil {
+		return fmt.Errorf("openai: decode stream chunk: %w", err)
+	}
+	if chunk.Error != nil {
+		// Mid-stream error event on an otherwise-200 stream. Status stays 0:
+		// there is no failing HTTP status to report.
+		apiErr := &llm.APIError{
+			Provider: s.m.p.name,
+			Model:    s.m.id,
+			Code:     chunk.Error.Code,
+			Message:  chunk.Error.Message,
+		}
+		if apiErr.Code == "" {
+			apiErr.Code = chunk.Error.Type
+		}
+		return apiErr
+	}
+	if chunk.Usage != nil {
+		s.usage = llm.Usage{
+			InputTokens:  chunk.Usage.PromptTokens,
+			OutputTokens: chunk.Usage.CompletionTokens,
+		}
+	}
+	// Why the guard: the include_usage chunk arrives with an EMPTY choices
+	// array; indexing choices[0] unconditionally would panic on it.
+	if len(chunk.Choices) == 0 {
+		return nil
+	}
+	choice := chunk.Choices[0]
+	if choice.FinishReason != "" {
+		s.finish = choice.FinishReason
+	}
+	if choice.Delta.Content != "" {
+		s.text.WriteString(choice.Delta.Content)
+		s.queue = append(s.queue, llm.StreamEvent{TextDelta: choice.Delta.Content})
+	}
+	for _, tc := range choice.Delta.ToolCalls {
+		acc := s.byIndex[tc.Index]
+		if acc == nil {
+			if s.byIndex == nil {
+				s.byIndex = make(map[int]*toolCallAcc)
+			}
+			acc = &toolCallAcc{}
+			s.byIndex[tc.Index] = acc
+			s.calls = append(s.calls, acc)
+		}
+		if tc.ID != "" {
+			acc.id = tc.ID
+		}
+		if tc.Function.Name != "" {
+			acc.name = tc.Function.Name
+		}
+		acc.args.WriteString(tc.Function.Arguments)
+	}
+	return nil
+}
+
+// finalize assembles the buffered tool calls and the final Response, queues
+// them (ToolCall events first, Response last), and marks the stream done.
+func (s *stream) finalize() {
+	if s.done {
+		return
+	}
+	s.done = true
+	resp := &llm.Response{Model: s.m.p.name + "/" + s.m.id, Usage: s.usage}
+	if s.text.Len() > 0 {
+		resp.Parts = []llm.Part{llm.TextPart{Text: s.text.String()}}
+	}
+	for i, acc := range s.calls {
+		id := acc.id
+		if id == "" {
+			// Why: ToolResult.ID must echo ToolCall.ID; synthesize for
+			// compat servers that stream calls without ids.
+			id = fmt.Sprintf("call_%d", i)
+		}
+		resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
+			ID:        id,
+			Name:      acc.name,
+			Arguments: json.RawMessage(acc.args.String()),
+		})
+	}
+	resp.FinishReason = mapFinish(s.finish, len(resp.ToolCalls) > 0)
+	for i := range resp.ToolCalls {
+		tc := resp.ToolCalls[i] // copy so the event doesn't alias the slice
+		s.queue = append(s.queue, llm.StreamEvent{ToolCall: &tc})
+	}
+	s.queue = append(s.queue, llm.StreamEvent{Response: resp})
+}
+
+// Close implements llm.Stream. Closing the body unblocks any in-flight read
+// and aborts the HTTP stream; safe to call at any time, including twice.
+func (s *stream) Close() error {
+	s.closeOnce.Do(func() { s.closeErr = s.body.Close() })
+	return s.closeErr
+}