majordomo/provider/openai/openai.go

// Package openai implements llm.Provider for the OpenAI Chat Completions
// API and, via WithBaseURL/WithName, any OpenAI-compatible endpoint
// (vLLM, Groq, Together, LM Studio, Ollama's /v1 shim, ...).
//
// Targeted API surface (verified against developers.openai.com, June 2026):
// POST {base}/chat/completions with
//   - messages: plain-string content for text-only turns, part arrays with
//     base64 data-URL image_url entries for multimodal turns, assistant
//     tool_calls history, and {"role":"tool","tool_call_id",...} results;
//   - tools as {"type":"function","function":{...}} with tool_choice
//     "auto"/"none"/"required" or a named-function object;
//   - response_format {"type":"json_schema",...} structured output;
//   - max_completion_tokens (or legacy max_tokens via WithLegacyMaxTokens
//     for compat servers), temperature, top_p, stop, reasoning_effort;
//   - data-only SSE streaming with stream_options.include_usage, the
//     "data: [DONE]" sentinel, and tool-call deltas accumulated by index.
//
// Newer response fields (refusal, annotations, usage *_details, delta
// obfuscation) are tolerated and ignored so both api.openai.com and older
// compat servers decode cleanly.
package openai

import (
	"net/http"
	"os"
	"strings"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

const defaultBaseURL = "https://api.openai.com/v1"

// Provider is an llm.Provider backed by an OpenAI Chat Completions endpoint.
type Provider struct {
	name            string
	apiKey          string
	baseURL         string
	client          *http.Client
	caps            llm.Capabilities
	legacyMaxTokens bool
}

// Option configures the provider at construction.
type Option func(*Provider)

// WithAPIKey sets the API key. When absent, New reads OPENAI_API_KEY from
// the environment at construction time.
func WithAPIKey(key string) Option {
	return func(p *Provider) { p.apiKey = key }
}

// WithBaseURL points the client at a different endpoint (compat servers).
// The path "/chat/completions" is appended; a trailing slash is trimmed.
func WithBaseURL(u string) Option {
	return func(p *Provider) { p.baseURL = u }
}

// WithHTTPClient substitutes the HTTP client (timeouts, proxies, tests).
func WithHTTPClient(c *http.Client) Option {
	return func(p *Provider) {
		if c != nil {
			p.client = c
		}
	}
}

// WithName overrides the registry name ("openai" by default). Why: the same
// client serves many OpenAI-compatible endpoints, and each needs a distinct
// name in "provider/model" specs and error reporting.
func WithName(name string) Option {
	return func(p *Provider) { p.name = name }
}

// WithDefaultCapabilities replaces the provider-default capabilities.
// Per-model overrides via llm.WithCapabilities still take precedence.
func WithDefaultCapabilities(caps llm.Capabilities) Option {
	return func(p *Provider) { p.caps = caps }
}

// WithLegacyMaxTokens sends Request.MaxTokens as "max_tokens" instead of
// "max_completion_tokens". Why: OpenAI deprecated max_tokens, but many
// third-party compat servers still only honor the legacy field.
func WithLegacyMaxTokens() Option {
	return func(p *Provider) { p.legacyMaxTokens = true }
}

// defaultCapabilities reflects OpenAI's current vision-capable chat models.
// Why these limits: the published per-request caps (1500 images, 512 MB)
// are far beyond what compat servers accept; 100 images / 20 MB each is a
// conservative envelope, and the MIME list is the documented set (PNG,
// JPEG, WEBP, non-animated GIF).
func defaultCapabilities() llm.Capabilities {
	return llm.Capabilities{
		SupportsTools:      true,
		SupportsStructured: true,
		SupportsStreaming:  true,
		MaxImagesPerReq:    100,
		MaxImageBytes:      20 << 20,
		AllowedImageMIME:   []string{"image/jpeg", "image/png", "image/webp", "image/gif"},
	}
}

// New creates a Provider. It never fails: a missing API key surfaces as a
// 401-style *llm.APIError at request time, not at construction.
func New(opts ...Option) *Provider {
	p := &Provider{
		name:    "openai",
		apiKey:  os.Getenv("OPENAI_API_KEY"),
		baseURL: defaultBaseURL,
		client:  http.DefaultClient,
		caps:    defaultCapabilities(),
	}
	for _, opt := range opts {
		opt(p)
	}
	p.baseURL = strings.TrimRight(p.baseURL, "/")
	return p
}

// Name implements llm.Provider.
func (p *Provider) Name() string { return p.name }

// Model implements llm.Provider. The id is passed through verbatim — no
// catalog validation; unknown models fail at request time with the
// backend's own error.
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
	cfg := llm.ApplyModelOptions(opts)
	caps := p.caps
	if cfg.Capabilities != nil {
		caps = *cfg.Capabilities
	}
	return &model{p: p, id: id, caps: caps}, nil
}