feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,319 @@
|
||||
// Package anthropic implements llm.Provider for the Anthropic Messages API
|
||||
// and Anthropic-compatible endpoints.
|
||||
//
|
||||
// API surface targeted: POST {base}/v1/messages with headers x-api-key,
|
||||
// anthropic-version: 2023-06-01, and content-type: application/json, per the
|
||||
// platform.claude.com Messages API reference as of June 2026. Streaming uses
|
||||
// the documented SSE event sequence (message_start, content_block_start,
|
||||
// content_block_delta, content_block_stop, message_delta, message_stop).
|
||||
// Structured output uses the GA output_config.format mechanism with
|
||||
// {"type":"json_schema"}; the result arrives as JSON text in the first text
|
||||
// content block.
|
||||
//
|
||||
// Why a hand-rolled client (no SDK): ADR-0007 — majordomo is stdlib-first,
|
||||
// and the canonical llm contract needs only a narrow slice of the API.
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultName = "anthropic"
|
||||
defaultBaseURL = "https://api.anthropic.com"
|
||||
|
||||
// apiVersion is the anthropic-version header value. 2023-06-01 remains
|
||||
// the current (and only) stable version string as of June 2026.
|
||||
apiVersion = "2023-06-01"
|
||||
|
||||
// defaultMaxTokens is used when Request.MaxTokens is 0, because the
|
||||
// Messages API requires max_tokens on every request.
|
||||
defaultMaxTokens = 4096
|
||||
)
|
||||
|
||||
// defaultCapabilities reflects the documented first-party API image limits:
|
||||
// 100 images per request (200K-context models), 10 MB per image, 8000 px per
|
||||
// side, and the four supported media types.
|
||||
func defaultCapabilities() llm.Capabilities {
|
||||
return llm.Capabilities{
|
||||
SupportsTools: true,
|
||||
SupportsStructured: true,
|
||||
SupportsStreaming: true,
|
||||
MaxImagesPerReq: 100,
|
||||
MaxImageBytes: 10 << 20,
|
||||
MaxImageDimension: 8000,
|
||||
AllowedImageMIME: []string{
|
||||
"image/jpeg", "image/png", "image/gif", "image/webp",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Provider is an llm.Provider backed by the Anthropic Messages API.
|
||||
type Provider struct {
|
||||
name string
|
||||
apiKey string
|
||||
baseURL string
|
||||
client *http.Client
|
||||
caps llm.Capabilities
|
||||
maxTokens int
|
||||
}
|
||||
|
||||
// Option configures the provider at construction.
|
||||
type Option func(*Provider)
|
||||
|
||||
// WithAPIKey sets the API key explicitly, bypassing the ANTHROPIC_API_KEY
|
||||
// environment default.
|
||||
func WithAPIKey(key string) Option {
|
||||
return func(p *Provider) { p.apiKey = key }
|
||||
}
|
||||
|
||||
// WithBaseURL points the provider at an Anthropic-compatible endpoint. A
|
||||
// trailing slash is trimmed; "/v1/messages" is appended per request.
|
||||
func WithBaseURL(u string) Option {
|
||||
return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
|
||||
}
|
||||
|
||||
// WithHTTPClient replaces the HTTP client (timeouts, proxies, test doubles).
|
||||
func WithHTTPClient(c *http.Client) Option {
|
||||
return func(p *Provider) { p.client = c }
|
||||
}
|
||||
|
||||
// WithName overrides the registry name. Why: an Anthropic-compatible
|
||||
// endpoint registered under its own name must surface that name in
|
||||
// Response.Model and errors, not "anthropic".
|
||||
func WithName(name string) Option {
|
||||
return func(p *Provider) { p.name = name }
|
||||
}
|
||||
|
||||
// WithDefaultCapabilities replaces the provider-default capabilities.
|
||||
func WithDefaultCapabilities(caps llm.Capabilities) Option {
|
||||
return func(p *Provider) { p.caps = caps }
|
||||
}
|
||||
|
||||
// WithDefaultMaxTokens overrides the max_tokens value used when
|
||||
// Request.MaxTokens is 0. Why: the Messages API rejects requests without
|
||||
// max_tokens, so the provider must always send something.
|
||||
func WithDefaultMaxTokens(n int) Option {
|
||||
return func(p *Provider) { p.maxTokens = n }
|
||||
}
|
||||
|
||||
// New creates an Anthropic provider. It never fails: a missing API key
|
||||
// (no WithAPIKey and no ANTHROPIC_API_KEY in the environment) surfaces as a
|
||||
// 401-style *llm.APIError at request time, not at construction.
|
||||
func New(opts ...Option) *Provider {
|
||||
p := &Provider{
|
||||
name: defaultName,
|
||||
baseURL: defaultBaseURL,
|
||||
client: http.DefaultClient,
|
||||
caps: defaultCapabilities(),
|
||||
maxTokens: defaultMaxTokens,
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(p)
|
||||
}
|
||||
if p.apiKey == "" {
|
||||
p.apiKey = os.Getenv("ANTHROPIC_API_KEY")
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// Name implements llm.Provider.
|
||||
func (p *Provider) Name() string { return p.name }
|
||||
|
||||
// Model implements llm.Provider. The id is passed through verbatim — it is
|
||||
// never validated against a catalog.
|
||||
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
|
||||
cfg := llm.ApplyModelOptions(opts)
|
||||
caps := p.caps
|
||||
if cfg.Capabilities != nil {
|
||||
caps = *cfg.Capabilities
|
||||
}
|
||||
return &model{provider: p, id: id, caps: caps}, nil
|
||||
}
|
||||
|
||||
type model struct {
|
||||
provider *Provider
|
||||
id string
|
||||
caps llm.Capabilities
|
||||
}
|
||||
|
||||
// Capabilities implements llm.Model.
|
||||
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
||||
|
||||
// fullName is the "provider/model" identifier used in Response.Model.
|
||||
func (m *model) fullName() string { return m.provider.name + "/" + m.id }
|
||||
|
||||
// Generate implements llm.Model.
|
||||
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := m.enforceCapabilities(req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
httpResp, err := m.do(ctx, req, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer httpResp.Body.Close()
|
||||
if httpResp.StatusCode/100 != 2 {
|
||||
return nil, m.apiError(httpResp)
|
||||
}
|
||||
var wr wireResponse
|
||||
if err := json.NewDecoder(httpResp.Body).Decode(&wr); err != nil {
|
||||
return nil, fmt.Errorf("%s: decode response: %w", m.provider.name, err)
|
||||
}
|
||||
return m.toResponse(&wr), nil
|
||||
}
|
||||
|
||||
// Stream implements llm.Model. A non-2xx status is returned as an error from
|
||||
// Stream itself, before any events are delivered.
|
||||
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
|
||||
req = req.Apply(opts...)
|
||||
if err := m.enforceCapabilities(req); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
httpResp, err := m.do(ctx, req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if httpResp.StatusCode/100 != 2 {
|
||||
defer httpResp.Body.Close()
|
||||
return nil, m.apiError(httpResp)
|
||||
}
|
||||
return newStream(m, httpResp.Body), nil
|
||||
}
|
||||
|
||||
// enforceCapabilities is the honest backstop behind the media layer: it
|
||||
// rejects (rather than silently mutates) requests the target cannot serve.
|
||||
// Why: a separate media layer resizes/transcodes images BEFORE requests
|
||||
// reach the provider, so anything still out of bounds here is a real error.
|
||||
func (m *model) enforceCapabilities(req llm.Request) error {
|
||||
images := 0
|
||||
for _, msg := range req.Messages {
|
||||
for _, part := range msg.Parts {
|
||||
img, ok := part.(llm.ImagePart)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
images++
|
||||
if !m.caps.SupportsImages() {
|
||||
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.fullName())
|
||||
}
|
||||
if !m.caps.MIMEAllowed(img.MIME) {
|
||||
return fmt.Errorf("%w: %s does not accept image MIME %q", llm.ErrUnsupported, m.fullName(), img.MIME)
|
||||
}
|
||||
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
|
||||
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d bytes",
|
||||
llm.ErrUnsupported, len(img.Data), m.fullName(), m.caps.MaxImageBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.caps.MaxImagesPerReq > 0 && images > m.caps.MaxImagesPerReq {
|
||||
return fmt.Errorf("%w: request carries %d images, %s allows at most %d",
|
||||
llm.ErrUnsupported, images, m.fullName(), m.caps.MaxImagesPerReq)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// do builds and executes one Messages API call. Transport errors are wrapped
|
||||
// with context but NOT converted to *llm.APIError, so llm.Classify still
|
||||
// sees the underlying net.Error / syscall errno.
|
||||
func (m *model) do(ctx context.Context, req llm.Request, streaming bool) (*http.Response, error) {
|
||||
p := m.provider
|
||||
if p.apiKey == "" {
|
||||
// Why request-time, not construction-time: New never fails by
|
||||
// convention, and a 401-shaped APIError classifies permanent so
|
||||
// chains fail fast past a misconfigured target.
|
||||
return nil, &llm.APIError{
|
||||
Provider: p.name,
|
||||
Model: m.id,
|
||||
Status: http.StatusUnauthorized,
|
||||
Code: "authentication_error",
|
||||
Message: "no API key configured: set ANTHROPIC_API_KEY or use WithAPIKey",
|
||||
}
|
||||
}
|
||||
|
||||
body, err := json.Marshal(buildWireRequest(m.id, req, p.maxTokens, streaming))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: encode request: %w", p.name, err)
|
||||
}
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/messages", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: build request: %w", p.name, err)
|
||||
}
|
||||
httpReq.Header.Set("x-api-key", p.apiKey)
|
||||
httpReq.Header.Set("anthropic-version", apiVersion)
|
||||
httpReq.Header.Set("content-type", "application/json")
|
||||
if streaming {
|
||||
httpReq.Header.Set("accept", "text/event-stream")
|
||||
}
|
||||
|
||||
resp, err := p.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: do request: %w", p.name, err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// apiError converts a non-2xx response into *llm.APIError, filling Code and
|
||||
// Message from the documented {"type":"error","error":{...}} body when it
|
||||
// parses, and falling back to the raw body text when it does not.
|
||||
func (m *model) apiError(resp *http.Response) error {
|
||||
apiErr := &llm.APIError{
|
||||
Provider: m.provider.name,
|
||||
Model: m.id,
|
||||
Status: resp.StatusCode,
|
||||
}
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||
if err != nil {
|
||||
return apiErr
|
||||
}
|
||||
var we wireErrorEnvelope
|
||||
if json.Unmarshal(body, &we) == nil && we.Error.Type != "" {
|
||||
apiErr.Code = we.Error.Type
|
||||
apiErr.Message = we.Error.Message
|
||||
} else {
|
||||
apiErr.Message = strings.TrimSpace(string(body))
|
||||
}
|
||||
return apiErr
|
||||
}
|
||||
|
||||
// toResponse maps a wire response onto the canonical llm.Response. Thinking
|
||||
// and other unrecognized block types are tolerated and skipped — they are
|
||||
// not part of the canonical content vocabulary.
|
||||
func (m *model) toResponse(wr *wireResponse) *llm.Response {
|
||||
resp := &llm.Response{
|
||||
FinishReason: mapStopReason(wr.StopReason),
|
||||
Usage: wr.Usage.toUsage(),
|
||||
Model: m.fullName(),
|
||||
Raw: wr,
|
||||
}
|
||||
for _, block := range wr.Content {
|
||||
switch block.Type {
|
||||
case "text":
|
||||
resp.Parts = append(resp.Parts, llm.TextPart{Text: block.Text})
|
||||
case "tool_use":
|
||||
args := block.Input
|
||||
if len(args) == 0 {
|
||||
args = json.RawMessage("{}")
|
||||
}
|
||||
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
||||
ID: block.ID,
|
||||
Name: block.Name,
|
||||
Arguments: args,
|
||||
})
|
||||
default:
|
||||
// thinking, redacted_thinking, server-tool blocks, and any
|
||||
// future types are skipped, not surfaced as parts.
|
||||
}
|
||||
}
|
||||
return resp
|
||||
}
|
||||
Reference in New Issue
Block a user