Files
steve 043249e0e1 feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3:
- provider/openai: Chat Completions for OpenAI + compat endpoints (SSE
  streaming with by-index tool-call assembly, response_format json_schema,
  legacy max_tokens option, reasoning_effort)
- provider/anthropic: Messages API (tool_use/tool_result, GA structured
  output via output_config.format, full SSE event parser, 529 transient)
- provider/ollama: one native /api/chat client behind the ollama,
  ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant
  of foreman's buffered single-object responses; object tool arguments;
  format-schema structured output; think mapping)
- media/: capability normalization (sniff, downscale, transcode, byte
  ladder, ErrUnsupported), wired into the chain executor per target with
  penalty-free advance past incapable elements
- registry: real provider + scheme wiring, WithHTTPClient option, required
  env-foreman TLS chat round-trip test
- ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README
  matrix + CLAUDE.md synced

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00

134 lines
4.7 KiB
Go

// Package openai implements llm.Provider for the OpenAI Chat Completions
// API and, via WithBaseURL/WithName, any OpenAI-compatible endpoint
// (vLLM, Groq, Together, LM Studio, Ollama's /v1 shim, ...).
//
// Targeted API surface (verified against developers.openai.com, June 2026):
// POST {base}/chat/completions with
// - messages: plain-string content for text-only turns, part arrays with
// base64 data-URL image_url entries for multimodal turns, assistant
// tool_calls history, and {"role":"tool","tool_call_id",...} results;
// - tools as {"type":"function","function":{...}} with tool_choice
// "auto"/"none"/"required" or a named-function object;
// - response_format {"type":"json_schema",...} structured output;
// - max_completion_tokens (or legacy max_tokens via WithLegacyMaxTokens
// for compat servers), temperature, top_p, stop, reasoning_effort;
// - data-only SSE streaming with stream_options.include_usage, the
// "data: [DONE]" sentinel, and tool-call deltas accumulated by index.
//
// Newer response fields (refusal, annotations, usage *_details, delta
// obfuscation) are tolerated and ignored so both api.openai.com and older
// compat servers decode cleanly.
package openai
import (
"net/http"
"os"
"strings"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
const defaultBaseURL = "https://api.openai.com/v1"
// Provider is an llm.Provider backed by an OpenAI Chat Completions endpoint.
type Provider struct {
name string
apiKey string
baseURL string
client *http.Client
caps llm.Capabilities
legacyMaxTokens bool
}
// Option configures the provider at construction.
type Option func(*Provider)
// WithAPIKey sets the API key. When absent, New reads OPENAI_API_KEY from
// the environment at construction time.
func WithAPIKey(key string) Option {
return func(p *Provider) { p.apiKey = key }
}
// WithBaseURL points the client at a different endpoint (compat servers).
// The path "/chat/completions" is appended; a trailing slash is trimmed.
func WithBaseURL(u string) Option {
return func(p *Provider) { p.baseURL = u }
}
// WithHTTPClient substitutes the HTTP client (timeouts, proxies, tests).
func WithHTTPClient(c *http.Client) Option {
return func(p *Provider) {
if c != nil {
p.client = c
}
}
}
// WithName overrides the registry name ("openai" by default). Why: the same
// client serves many OpenAI-compatible endpoints, and each needs a distinct
// name in "provider/model" specs and error reporting.
func WithName(name string) Option {
return func(p *Provider) { p.name = name }
}
// WithDefaultCapabilities replaces the provider-default capabilities.
// Per-model overrides via llm.WithCapabilities still take precedence.
func WithDefaultCapabilities(caps llm.Capabilities) Option {
return func(p *Provider) { p.caps = caps }
}
// WithLegacyMaxTokens sends Request.MaxTokens as "max_tokens" instead of
// "max_completion_tokens". Why: OpenAI deprecated max_tokens, but many
// third-party compat servers still only honor the legacy field.
func WithLegacyMaxTokens() Option {
return func(p *Provider) { p.legacyMaxTokens = true }
}
// defaultCapabilities reflects OpenAI's current vision-capable chat models.
// Why these limits: the published per-request caps (1500 images, 512 MB)
// are far beyond what compat servers accept; 100 images / 20 MB each is a
// conservative envelope, and the MIME list is the documented set (PNG,
// JPEG, WEBP, non-animated GIF).
func defaultCapabilities() llm.Capabilities {
return llm.Capabilities{
SupportsTools: true,
SupportsStructured: true,
SupportsStreaming: true,
MaxImagesPerReq: 100,
MaxImageBytes: 20 << 20,
AllowedImageMIME: []string{"image/jpeg", "image/png", "image/webp", "image/gif"},
}
}
// New creates a Provider. It never fails: a missing API key surfaces as a
// 401-style *llm.APIError at request time, not at construction.
func New(opts ...Option) *Provider {
p := &Provider{
name: "openai",
apiKey: os.Getenv("OPENAI_API_KEY"),
baseURL: defaultBaseURL,
client: http.DefaultClient,
caps: defaultCapabilities(),
}
for _, opt := range opts {
opt(p)
}
p.baseURL = strings.TrimRight(p.baseURL, "/")
return p
}
// Name implements llm.Provider.
func (p *Provider) Name() string { return p.name }
// Model implements llm.Provider. The id is passed through verbatim — no
// catalog validation; unknown models fail at request time with the
// backend's own error.
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
cfg := llm.ApplyModelOptions(opts)
caps := p.caps
if cfg.Capabilities != nil {
caps = *cfg.Capabilities
}
return &model{p: p, id: id, caps: caps}, nil
}