feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
// stream consumes the data-only SSE stream of chat.completion.chunk events.
|
||||
//
|
||||
// Delivery contract: TextDelta events as content fragments arrive; ToolCall
|
||||
// events only once fully assembled (fragments are buffered internally and
|
||||
// flushed at stream end — simplest correct handling of interleaved parallel
|
||||
// calls); exactly one final Response event; then io.EOF.
|
||||
type stream struct {
|
||||
m *model
|
||||
body io.ReadCloser
|
||||
sc *bufio.Scanner
|
||||
|
||||
closeOnce sync.Once
|
||||
closeErr error
|
||||
|
||||
queue []llm.StreamEvent
|
||||
done bool // finalize ran; drain queue then io.EOF
|
||||
|
||||
text strings.Builder
|
||||
calls []*toolCallAcc // first-appearance order
|
||||
byIndex map[int]*toolCallAcc
|
||||
finish string
|
||||
usage llm.Usage
|
||||
}
|
||||
|
||||
// toolCallAcc accumulates one tool call's fragments. The id and name arrive
|
||||
// on the first fragment for an index; arguments arrive as string pieces to
|
||||
// concatenate.
|
||||
type toolCallAcc struct {
|
||||
id string
|
||||
name string
|
||||
args strings.Builder
|
||||
}
|
||||
|
||||
// Next implements llm.Stream.
|
||||
func (s *stream) Next() (llm.StreamEvent, error) {
|
||||
for {
|
||||
if len(s.queue) > 0 {
|
||||
ev := s.queue[0]
|
||||
s.queue = s.queue[1:]
|
||||
return ev, nil
|
||||
}
|
||||
if s.done {
|
||||
return llm.StreamEvent{}, io.EOF
|
||||
}
|
||||
if !s.sc.Scan() {
|
||||
if err := s.sc.Err(); err != nil {
|
||||
return llm.StreamEvent{}, fmt.Errorf("openai: read stream: %w", err)
|
||||
}
|
||||
// Why: some compat servers close the body without a [DONE]
|
||||
// sentinel; a clean EOF still finalizes with what arrived.
|
||||
s.finalize()
|
||||
continue
|
||||
}
|
||||
line := strings.TrimSpace(s.sc.Text())
|
||||
if !strings.HasPrefix(line, "data:") {
|
||||
continue // SSE comments, event:/id: fields, blank separators
|
||||
}
|
||||
payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
|
||||
if payload == "" {
|
||||
continue
|
||||
}
|
||||
if payload == "[DONE]" {
|
||||
s.finalize()
|
||||
continue
|
||||
}
|
||||
if err := s.handleChunk([]byte(payload)); err != nil {
|
||||
return llm.StreamEvent{}, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleChunk folds one chat.completion.chunk into the stream state,
|
||||
// queueing any events it produces.
|
||||
func (s *stream) handleChunk(data []byte) error {
|
||||
var chunk streamChunk
|
||||
if err := json.Unmarshal(data, &chunk); err != nil {
|
||||
return fmt.Errorf("openai: decode stream chunk: %w", err)
|
||||
}
|
||||
if chunk.Error != nil {
|
||||
// Mid-stream error event on an otherwise-200 stream. Status stays 0:
|
||||
// there is no failing HTTP status to report.
|
||||
apiErr := &llm.APIError{
|
||||
Provider: s.m.p.name,
|
||||
Model: s.m.id,
|
||||
Code: chunk.Error.Code,
|
||||
Message: chunk.Error.Message,
|
||||
}
|
||||
if apiErr.Code == "" {
|
||||
apiErr.Code = chunk.Error.Type
|
||||
}
|
||||
return apiErr
|
||||
}
|
||||
if chunk.Usage != nil {
|
||||
s.usage = llm.Usage{
|
||||
InputTokens: chunk.Usage.PromptTokens,
|
||||
OutputTokens: chunk.Usage.CompletionTokens,
|
||||
}
|
||||
}
|
||||
// Why the guard: the include_usage chunk arrives with an EMPTY choices
|
||||
// array; indexing choices[0] unconditionally would panic on it.
|
||||
if len(chunk.Choices) == 0 {
|
||||
return nil
|
||||
}
|
||||
choice := chunk.Choices[0]
|
||||
if choice.FinishReason != "" {
|
||||
s.finish = choice.FinishReason
|
||||
}
|
||||
if choice.Delta.Content != "" {
|
||||
s.text.WriteString(choice.Delta.Content)
|
||||
s.queue = append(s.queue, llm.StreamEvent{TextDelta: choice.Delta.Content})
|
||||
}
|
||||
for _, tc := range choice.Delta.ToolCalls {
|
||||
acc := s.byIndex[tc.Index]
|
||||
if acc == nil {
|
||||
if s.byIndex == nil {
|
||||
s.byIndex = make(map[int]*toolCallAcc)
|
||||
}
|
||||
acc = &toolCallAcc{}
|
||||
s.byIndex[tc.Index] = acc
|
||||
s.calls = append(s.calls, acc)
|
||||
}
|
||||
if tc.ID != "" {
|
||||
acc.id = tc.ID
|
||||
}
|
||||
if tc.Function.Name != "" {
|
||||
acc.name = tc.Function.Name
|
||||
}
|
||||
acc.args.WriteString(tc.Function.Arguments)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalize assembles the buffered tool calls and the final Response, queues
|
||||
// them (ToolCall events first, Response last), and marks the stream done.
|
||||
func (s *stream) finalize() {
|
||||
if s.done {
|
||||
return
|
||||
}
|
||||
s.done = true
|
||||
resp := &llm.Response{Model: s.m.p.name + "/" + s.m.id, Usage: s.usage}
|
||||
if s.text.Len() > 0 {
|
||||
resp.Parts = []llm.Part{llm.TextPart{Text: s.text.String()}}
|
||||
}
|
||||
for i, acc := range s.calls {
|
||||
id := acc.id
|
||||
if id == "" {
|
||||
// Why: ToolResult.ID must echo ToolCall.ID; synthesize for
|
||||
// compat servers that stream calls without ids.
|
||||
id = fmt.Sprintf("call_%d", i)
|
||||
}
|
||||
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
||||
ID: id,
|
||||
Name: acc.name,
|
||||
Arguments: json.RawMessage(acc.args.String()),
|
||||
})
|
||||
}
|
||||
resp.FinishReason = mapFinish(s.finish, len(resp.ToolCalls) > 0)
|
||||
for i := range resp.ToolCalls {
|
||||
tc := resp.ToolCalls[i] // copy so the event doesn't alias the slice
|
||||
s.queue = append(s.queue, llm.StreamEvent{ToolCall: &tc})
|
||||
}
|
||||
s.queue = append(s.queue, llm.StreamEvent{Response: resp})
|
||||
}
|
||||
|
||||
// Close implements llm.Stream. Closing the body unblocks any in-flight read
|
||||
// and aborts the HTTP stream; safe to call at any time, including twice.
|
||||
func (s *stream) Close() error {
|
||||
s.closeOnce.Do(func() { s.closeErr = s.body.Close() })
|
||||
return s.closeErr
|
||||
}
|
||||
Reference in New Issue
Block a user