feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline

Phase 3:
- provider/openai: Chat Completions for OpenAI + compat endpoints (SSE
  streaming with by-index tool-call assembly, response_format json_schema,
  legacy max_tokens option, reasoning_effort)
- provider/anthropic: Messages API (tool_use/tool_result, GA structured
  output via output_config.format, full SSE event parser, 529 transient)
- provider/ollama: one native /api/chat client behind the ollama,
  ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant
  of foreman's buffered single-object responses; object tool arguments;
  format-schema structured output; think mapping)
- media/: capability normalization (sniff, downscale, transcode, byte
  ladder, ErrUnsupported), wired into the chain executor per target with
  penalty-free advance past incapable elements
- registry: real provider + scheme wiring, WithHTTPClient option, required
  env-foreman TLS chat round-trip test
- ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README
  matrix + CLAUDE.md synced

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 12:58:08 +02:00
parent 323558ed72
commit 043249e0e1
31 changed files with 6194 additions and 74 deletions
+247
View File
@@ -0,0 +1,247 @@
package anthropic
import (
"bufio"
"encoding/json"
"fmt"
"io"
"strings"
"sync"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
// wireStreamEvent is the union of all SSE data payloads the Messages API
// emits. Dispatch is on Type (the data always carries one), so the SSE
// "event:" line is informational only.
type wireStreamEvent struct {
Type string `json:"type"`
Index int `json:"index"`
// message_start
Message *struct {
Usage wireUsage `json:"usage"`
} `json:"message"`
// content_block_start
ContentBlock *struct {
Type string `json:"type"`
ID string `json:"id"`
Name string `json:"name"`
} `json:"content_block"`
// content_block_delta / message_delta
Delta struct {
Type string `json:"type"`
Text string `json:"text"`
PartialJSON string `json:"partial_json"`
StopReason string `json:"stop_reason"`
} `json:"delta"`
// message_delta
Usage *wireUsage `json:"usage"`
// error
Error *struct {
Type string `json:"type"`
Message string `json:"message"`
} `json:"error"`
}
// stream adapts the Messages API SSE stream to llm.Stream.
//
// Why single-threaded pull (no reader goroutine): Next is already the
// consumer's pull point, so parsing lazily inside Next keeps cancellation,
// buffering, and error propagation trivial — Close just closes the body and
// the next read fails.
type stream struct {
provider string
model string
full string // provider/model
body io.ReadCloser
scanner *bufio.Scanner
// accumulated response
parts []llm.Part
toolCalls []llm.ToolCall
usage llm.Usage
finish llm.FinishReason
// current content block state
blockType string
textBuf strings.Builder
toolID string
toolName string
argsBuf strings.Builder
done bool // final Response event emitted
closeOnce sync.Once
closeErr error
}
func newStream(m *model, body io.ReadCloser) *stream {
sc := bufio.NewScanner(body)
// Why a large limit: one SSE line carries one whole delta; default 64K
// can be exceeded by large structured-output or tool-argument deltas.
sc.Buffer(make([]byte, 0, 64*1024), 10*1024*1024)
return &stream{
provider: m.provider.name,
model: m.id,
full: m.fullName(),
body: body,
scanner: sc,
finish: llm.FinishOther,
}
}
// Close implements llm.Stream. Safe to call at any time and more than once.
func (s *stream) Close() error {
s.closeOnce.Do(func() { s.closeErr = s.body.Close() })
return s.closeErr
}
// Next implements llm.Stream. It emits TextDelta fragments as they arrive,
// fully-assembled ToolCalls at content_block_stop, exactly one final
// Response event at message_stop, then io.EOF.
func (s *stream) Next() (llm.StreamEvent, error) {
if s.done {
return llm.StreamEvent{}, io.EOF
}
for {
data, err := s.nextData()
if err != nil {
return llm.StreamEvent{}, err
}
var ev wireStreamEvent
if err := json.Unmarshal([]byte(data), &ev); err != nil {
return llm.StreamEvent{}, fmt.Errorf("%s: decode stream event: %w", s.provider, err)
}
switch ev.Type {
case "message_start":
if ev.Message != nil {
s.usage = ev.Message.Usage.toUsage()
}
case "content_block_start":
s.blockType = ""
s.textBuf.Reset()
s.argsBuf.Reset()
if ev.ContentBlock != nil {
s.blockType = ev.ContentBlock.Type
if s.blockType == "tool_use" {
s.toolID = ev.ContentBlock.ID
s.toolName = ev.ContentBlock.Name
}
}
case "content_block_delta":
switch ev.Delta.Type {
case "text_delta":
s.textBuf.WriteString(ev.Delta.Text)
return llm.StreamEvent{TextDelta: ev.Delta.Text}, nil
case "input_json_delta":
// Buffer partial JSON internally; consumers never see it.
s.argsBuf.WriteString(ev.Delta.PartialJSON)
default:
// thinking_delta / signature_delta: tolerated, skipped.
}
case "content_block_stop":
if event, ok := s.finishBlock(); ok {
return event, nil
}
case "message_delta":
if ev.Delta.StopReason != "" {
s.finish = mapStopReason(ev.Delta.StopReason)
}
if ev.Usage != nil {
// Output tokens arrive cumulatively in the final delta;
// input tokens were reported in message_start.
s.usage.OutputTokens = ev.Usage.OutputTokens
}
case "message_stop":
s.done = true
return llm.StreamEvent{Response: &llm.Response{
Parts: s.parts,
ToolCalls: s.toolCalls,
FinishReason: s.finish,
Usage: s.usage,
Model: s.full,
}}, nil
case "error":
// Mid-stream failure after the 200 (e.g. overloaded_error).
// Status stays 0: there is no HTTP status for it, and the
// default Classify treats it as transient, which fits overload.
apiErr := &llm.APIError{Provider: s.provider, Model: s.model}
if ev.Error != nil {
apiErr.Code = ev.Error.Type
apiErr.Message = ev.Error.Message
}
return llm.StreamEvent{}, apiErr
default:
// ping and unknown event types: ignored.
}
}
}
// finishBlock closes out the current content block, appending its result to
// the accumulated response. Tool-use blocks produce a stream event.
func (s *stream) finishBlock() (llm.StreamEvent, bool) {
defer func() {
s.blockType = ""
s.textBuf.Reset()
s.argsBuf.Reset()
}()
switch s.blockType {
case "text":
if s.textBuf.Len() > 0 {
s.parts = append(s.parts, llm.TextPart{Text: s.textBuf.String()})
}
case "tool_use":
args := s.argsBuf.String()
if args == "" {
// A tool called with no arguments streams zero (or empty)
// input_json_delta fragments; the canonical form is "{}".
args = "{}"
}
call := llm.ToolCall{ID: s.toolID, Name: s.toolName, Arguments: json.RawMessage(args)}
s.toolCalls = append(s.toolCalls, call)
return llm.StreamEvent{ToolCall: &call}, true
}
return llm.StreamEvent{}, false
}
// nextData reads SSE lines until one complete event's data is assembled
// (multi-line data fields are joined with "\n" per the SSE spec). "event:"
// lines and comments are ignored; dispatch keys off the JSON "type" field.
func (s *stream) nextData() (string, error) {
var data strings.Builder
for s.scanner.Scan() {
line := s.scanner.Text()
if line == "" {
if data.Len() > 0 {
return data.String(), nil
}
continue
}
if rest, ok := strings.CutPrefix(line, "data:"); ok {
if data.Len() > 0 {
data.WriteByte('\n')
}
data.WriteString(strings.TrimPrefix(rest, " "))
}
}
if err := s.scanner.Err(); err != nil {
return "", fmt.Errorf("%s: read stream: %w", s.provider, err)
}
if data.Len() > 0 {
return data.String(), nil
}
// EOF before message_stop: the connection dropped mid-response.
return "", fmt.Errorf("%s: stream ended before message_stop: %w", s.provider, io.ErrUnexpectedEOF)
}