043249e0e1
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
184 lines
4.9 KiB
Go
184 lines
4.9 KiB
Go
package openai
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
"sync"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
// stream consumes the data-only SSE stream of chat.completion.chunk events.
|
|
//
|
|
// Delivery contract: TextDelta events as content fragments arrive; ToolCall
|
|
// events only once fully assembled (fragments are buffered internally and
|
|
// flushed at stream end — simplest correct handling of interleaved parallel
|
|
// calls); exactly one final Response event; then io.EOF.
|
|
type stream struct {
|
|
m *model
|
|
body io.ReadCloser
|
|
sc *bufio.Scanner
|
|
|
|
closeOnce sync.Once
|
|
closeErr error
|
|
|
|
queue []llm.StreamEvent
|
|
done bool // finalize ran; drain queue then io.EOF
|
|
|
|
text strings.Builder
|
|
calls []*toolCallAcc // first-appearance order
|
|
byIndex map[int]*toolCallAcc
|
|
finish string
|
|
usage llm.Usage
|
|
}
|
|
|
|
// toolCallAcc accumulates one tool call's fragments. The id and name arrive
|
|
// on the first fragment for an index; arguments arrive as string pieces to
|
|
// concatenate.
|
|
type toolCallAcc struct {
|
|
id string
|
|
name string
|
|
args strings.Builder
|
|
}
|
|
|
|
// Next implements llm.Stream.
|
|
func (s *stream) Next() (llm.StreamEvent, error) {
|
|
for {
|
|
if len(s.queue) > 0 {
|
|
ev := s.queue[0]
|
|
s.queue = s.queue[1:]
|
|
return ev, nil
|
|
}
|
|
if s.done {
|
|
return llm.StreamEvent{}, io.EOF
|
|
}
|
|
if !s.sc.Scan() {
|
|
if err := s.sc.Err(); err != nil {
|
|
return llm.StreamEvent{}, fmt.Errorf("openai: read stream: %w", err)
|
|
}
|
|
// Why: some compat servers close the body without a [DONE]
|
|
// sentinel; a clean EOF still finalizes with what arrived.
|
|
s.finalize()
|
|
continue
|
|
}
|
|
line := strings.TrimSpace(s.sc.Text())
|
|
if !strings.HasPrefix(line, "data:") {
|
|
continue // SSE comments, event:/id: fields, blank separators
|
|
}
|
|
payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
|
|
if payload == "" {
|
|
continue
|
|
}
|
|
if payload == "[DONE]" {
|
|
s.finalize()
|
|
continue
|
|
}
|
|
if err := s.handleChunk([]byte(payload)); err != nil {
|
|
return llm.StreamEvent{}, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleChunk folds one chat.completion.chunk into the stream state,
|
|
// queueing any events it produces.
|
|
func (s *stream) handleChunk(data []byte) error {
|
|
var chunk streamChunk
|
|
if err := json.Unmarshal(data, &chunk); err != nil {
|
|
return fmt.Errorf("openai: decode stream chunk: %w", err)
|
|
}
|
|
if chunk.Error != nil {
|
|
// Mid-stream error event on an otherwise-200 stream. Status stays 0:
|
|
// there is no failing HTTP status to report.
|
|
apiErr := &llm.APIError{
|
|
Provider: s.m.p.name,
|
|
Model: s.m.id,
|
|
Code: chunk.Error.Code,
|
|
Message: chunk.Error.Message,
|
|
}
|
|
if apiErr.Code == "" {
|
|
apiErr.Code = chunk.Error.Type
|
|
}
|
|
return apiErr
|
|
}
|
|
if chunk.Usage != nil {
|
|
s.usage = llm.Usage{
|
|
InputTokens: chunk.Usage.PromptTokens,
|
|
OutputTokens: chunk.Usage.CompletionTokens,
|
|
}
|
|
}
|
|
// Why the guard: the include_usage chunk arrives with an EMPTY choices
|
|
// array; indexing choices[0] unconditionally would panic on it.
|
|
if len(chunk.Choices) == 0 {
|
|
return nil
|
|
}
|
|
choice := chunk.Choices[0]
|
|
if choice.FinishReason != "" {
|
|
s.finish = choice.FinishReason
|
|
}
|
|
if choice.Delta.Content != "" {
|
|
s.text.WriteString(choice.Delta.Content)
|
|
s.queue = append(s.queue, llm.StreamEvent{TextDelta: choice.Delta.Content})
|
|
}
|
|
for _, tc := range choice.Delta.ToolCalls {
|
|
acc := s.byIndex[tc.Index]
|
|
if acc == nil {
|
|
if s.byIndex == nil {
|
|
s.byIndex = make(map[int]*toolCallAcc)
|
|
}
|
|
acc = &toolCallAcc{}
|
|
s.byIndex[tc.Index] = acc
|
|
s.calls = append(s.calls, acc)
|
|
}
|
|
if tc.ID != "" {
|
|
acc.id = tc.ID
|
|
}
|
|
if tc.Function.Name != "" {
|
|
acc.name = tc.Function.Name
|
|
}
|
|
acc.args.WriteString(tc.Function.Arguments)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// finalize assembles the buffered tool calls and the final Response, queues
|
|
// them (ToolCall events first, Response last), and marks the stream done.
|
|
func (s *stream) finalize() {
|
|
if s.done {
|
|
return
|
|
}
|
|
s.done = true
|
|
resp := &llm.Response{Model: s.m.p.name + "/" + s.m.id, Usage: s.usage}
|
|
if s.text.Len() > 0 {
|
|
resp.Parts = []llm.Part{llm.TextPart{Text: s.text.String()}}
|
|
}
|
|
for i, acc := range s.calls {
|
|
id := acc.id
|
|
if id == "" {
|
|
// Why: ToolResult.ID must echo ToolCall.ID; synthesize for
|
|
// compat servers that stream calls without ids.
|
|
id = fmt.Sprintf("call_%d", i)
|
|
}
|
|
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
|
ID: id,
|
|
Name: acc.name,
|
|
Arguments: json.RawMessage(acc.args.String()),
|
|
})
|
|
}
|
|
resp.FinishReason = mapFinish(s.finish, len(resp.ToolCalls) > 0)
|
|
for i := range resp.ToolCalls {
|
|
tc := resp.ToolCalls[i] // copy so the event doesn't alias the slice
|
|
s.queue = append(s.queue, llm.StreamEvent{ToolCall: &tc})
|
|
}
|
|
s.queue = append(s.queue, llm.StreamEvent{Response: resp})
|
|
}
|
|
|
|
// Close implements llm.Stream. Closing the body unblocks any in-flight read
|
|
// and aborts the HTTP stream; safe to call at any time, including twice.
|
|
func (s *stream) Close() error {
|
|
s.closeOnce.Do(func() { s.closeErr = s.body.Close() })
|
|
return s.closeErr
|
|
}
|