Files
majordomo/provider/anthropic/anthropic.go
T
steve 043249e0e1 feat: OpenAI, Anthropic, and native-Ollama providers + media pipeline
Phase 3:
- provider/openai: Chat Completions for OpenAI + compat endpoints (SSE
  streaming with by-index tool-call assembly, response_format json_schema,
  legacy max_tokens option, reasoning_effort)
- provider/anthropic: Messages API (tool_use/tool_result, GA structured
  output via output_config.format, full SSE event parser, 529 transient)
- provider/ollama: one native /api/chat client behind the ollama,
  ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant
  of foreman's buffered single-object responses; object tool arguments;
  format-schema structured output; think mapping)
- media/: capability normalization (sniff, downscale, transcode, byte
  ladder, ErrUnsupported), wired into the chain executor per target with
  penalty-free advance past incapable elements
- registry: real provider + scheme wiring, WithHTTPClient option, required
  env-foreman TLS chat round-trip test
- ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README
  matrix + CLAUDE.md synced

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:58:08 +02:00

320 lines
10 KiB
Go

// Package anthropic implements llm.Provider for the Anthropic Messages API
// and Anthropic-compatible endpoints.
//
// API surface targeted: POST {base}/v1/messages with headers x-api-key,
// anthropic-version: 2023-06-01, and content-type: application/json, per the
// platform.claude.com Messages API reference as of June 2026. Streaming uses
// the documented SSE event sequence (message_start, content_block_start,
// content_block_delta, content_block_stop, message_delta, message_stop).
// Structured output uses the GA output_config.format mechanism with
// {"type":"json_schema"}; the result arrives as JSON text in the first text
// content block.
//
// Why a hand-rolled client (no SDK): ADR-0007 — majordomo is stdlib-first,
// and the canonical llm contract needs only a narrow slice of the API.
package anthropic
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strings"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)
const (
defaultName = "anthropic"
defaultBaseURL = "https://api.anthropic.com"
// apiVersion is the anthropic-version header value. 2023-06-01 remains
// the current (and only) stable version string as of June 2026.
apiVersion = "2023-06-01"
// defaultMaxTokens is used when Request.MaxTokens is 0, because the
// Messages API requires max_tokens on every request.
defaultMaxTokens = 4096
)
// defaultCapabilities reflects the documented first-party API image limits:
// 100 images per request (200K-context models), 10 MB per image, 8000 px per
// side, and the four supported media types.
func defaultCapabilities() llm.Capabilities {
return llm.Capabilities{
SupportsTools: true,
SupportsStructured: true,
SupportsStreaming: true,
MaxImagesPerReq: 100,
MaxImageBytes: 10 << 20,
MaxImageDimension: 8000,
AllowedImageMIME: []string{
"image/jpeg", "image/png", "image/gif", "image/webp",
},
}
}
// Provider is an llm.Provider backed by the Anthropic Messages API.
type Provider struct {
name string
apiKey string
baseURL string
client *http.Client
caps llm.Capabilities
maxTokens int
}
// Option configures the provider at construction.
type Option func(*Provider)
// WithAPIKey sets the API key explicitly, bypassing the ANTHROPIC_API_KEY
// environment default.
func WithAPIKey(key string) Option {
return func(p *Provider) { p.apiKey = key }
}
// WithBaseURL points the provider at an Anthropic-compatible endpoint. A
// trailing slash is trimmed; "/v1/messages" is appended per request.
func WithBaseURL(u string) Option {
return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
}
// WithHTTPClient replaces the HTTP client (timeouts, proxies, test doubles).
func WithHTTPClient(c *http.Client) Option {
return func(p *Provider) { p.client = c }
}
// WithName overrides the registry name. Why: an Anthropic-compatible
// endpoint registered under its own name must surface that name in
// Response.Model and errors, not "anthropic".
func WithName(name string) Option {
return func(p *Provider) { p.name = name }
}
// WithDefaultCapabilities replaces the provider-default capabilities.
func WithDefaultCapabilities(caps llm.Capabilities) Option {
return func(p *Provider) { p.caps = caps }
}
// WithDefaultMaxTokens overrides the max_tokens value used when
// Request.MaxTokens is 0. Why: the Messages API rejects requests without
// max_tokens, so the provider must always send something.
func WithDefaultMaxTokens(n int) Option {
return func(p *Provider) { p.maxTokens = n }
}
// New creates an Anthropic provider. It never fails: a missing API key
// (no WithAPIKey and no ANTHROPIC_API_KEY in the environment) surfaces as a
// 401-style *llm.APIError at request time, not at construction.
func New(opts ...Option) *Provider {
p := &Provider{
name: defaultName,
baseURL: defaultBaseURL,
client: http.DefaultClient,
caps: defaultCapabilities(),
maxTokens: defaultMaxTokens,
}
for _, opt := range opts {
opt(p)
}
if p.apiKey == "" {
p.apiKey = os.Getenv("ANTHROPIC_API_KEY")
}
return p
}
// Name implements llm.Provider.
func (p *Provider) Name() string { return p.name }
// Model implements llm.Provider. The id is passed through verbatim — it is
// never validated against a catalog.
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
cfg := llm.ApplyModelOptions(opts)
caps := p.caps
if cfg.Capabilities != nil {
caps = *cfg.Capabilities
}
return &model{provider: p, id: id, caps: caps}, nil
}
type model struct {
provider *Provider
id string
caps llm.Capabilities
}
// Capabilities implements llm.Model.
func (m *model) Capabilities() llm.Capabilities { return m.caps }
// fullName is the "provider/model" identifier used in Response.Model.
func (m *model) fullName() string { return m.provider.name + "/" + m.id }
// Generate implements llm.Model.
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
req = req.Apply(opts...)
if err := m.enforceCapabilities(req); err != nil {
return nil, err
}
httpResp, err := m.do(ctx, req, false)
if err != nil {
return nil, err
}
defer httpResp.Body.Close()
if httpResp.StatusCode/100 != 2 {
return nil, m.apiError(httpResp)
}
var wr wireResponse
if err := json.NewDecoder(httpResp.Body).Decode(&wr); err != nil {
return nil, fmt.Errorf("%s: decode response: %w", m.provider.name, err)
}
return m.toResponse(&wr), nil
}
// Stream implements llm.Model. A non-2xx status is returned as an error from
// Stream itself, before any events are delivered.
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
req = req.Apply(opts...)
if err := m.enforceCapabilities(req); err != nil {
return nil, err
}
httpResp, err := m.do(ctx, req, true)
if err != nil {
return nil, err
}
if httpResp.StatusCode/100 != 2 {
defer httpResp.Body.Close()
return nil, m.apiError(httpResp)
}
return newStream(m, httpResp.Body), nil
}
// enforceCapabilities is the honest backstop behind the media layer: it
// rejects (rather than silently mutates) requests the target cannot serve.
// Why: a separate media layer resizes/transcodes images BEFORE requests
// reach the provider, so anything still out of bounds here is a real error.
func (m *model) enforceCapabilities(req llm.Request) error {
images := 0
for _, msg := range req.Messages {
for _, part := range msg.Parts {
img, ok := part.(llm.ImagePart)
if !ok {
continue
}
images++
if !m.caps.SupportsImages() {
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.fullName())
}
if !m.caps.MIMEAllowed(img.MIME) {
return fmt.Errorf("%w: %s does not accept image MIME %q", llm.ErrUnsupported, m.fullName(), img.MIME)
}
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d bytes",
llm.ErrUnsupported, len(img.Data), m.fullName(), m.caps.MaxImageBytes)
}
}
}
if m.caps.MaxImagesPerReq > 0 && images > m.caps.MaxImagesPerReq {
return fmt.Errorf("%w: request carries %d images, %s allows at most %d",
llm.ErrUnsupported, images, m.fullName(), m.caps.MaxImagesPerReq)
}
return nil
}
// do builds and executes one Messages API call. Transport errors are wrapped
// with context but NOT converted to *llm.APIError, so llm.Classify still
// sees the underlying net.Error / syscall errno.
func (m *model) do(ctx context.Context, req llm.Request, streaming bool) (*http.Response, error) {
p := m.provider
if p.apiKey == "" {
// Why request-time, not construction-time: New never fails by
// convention, and a 401-shaped APIError classifies permanent so
// chains fail fast past a misconfigured target.
return nil, &llm.APIError{
Provider: p.name,
Model: m.id,
Status: http.StatusUnauthorized,
Code: "authentication_error",
Message: "no API key configured: set ANTHROPIC_API_KEY or use WithAPIKey",
}
}
body, err := json.Marshal(buildWireRequest(m.id, req, p.maxTokens, streaming))
if err != nil {
return nil, fmt.Errorf("%s: encode request: %w", p.name, err)
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/messages", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("%s: build request: %w", p.name, err)
}
httpReq.Header.Set("x-api-key", p.apiKey)
httpReq.Header.Set("anthropic-version", apiVersion)
httpReq.Header.Set("content-type", "application/json")
if streaming {
httpReq.Header.Set("accept", "text/event-stream")
}
resp, err := p.client.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("%s: do request: %w", p.name, err)
}
return resp, nil
}
// apiError converts a non-2xx response into *llm.APIError, filling Code and
// Message from the documented {"type":"error","error":{...}} body when it
// parses, and falling back to the raw body text when it does not.
func (m *model) apiError(resp *http.Response) error {
apiErr := &llm.APIError{
Provider: m.provider.name,
Model: m.id,
Status: resp.StatusCode,
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if err != nil {
return apiErr
}
var we wireErrorEnvelope
if json.Unmarshal(body, &we) == nil && we.Error.Type != "" {
apiErr.Code = we.Error.Type
apiErr.Message = we.Error.Message
} else {
apiErr.Message = strings.TrimSpace(string(body))
}
return apiErr
}
// toResponse maps a wire response onto the canonical llm.Response. Thinking
// and other unrecognized block types are tolerated and skipped — they are
// not part of the canonical content vocabulary.
func (m *model) toResponse(wr *wireResponse) *llm.Response {
resp := &llm.Response{
FinishReason: mapStopReason(wr.StopReason),
Usage: wr.Usage.toUsage(),
Model: m.fullName(),
Raw: wr,
}
for _, block := range wr.Content {
switch block.Type {
case "text":
resp.Parts = append(resp.Parts, llm.TextPart{Text: block.Text})
case "tool_use":
args := block.Input
if len(args) == 0 {
args = json.RawMessage("{}")
}
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
ID: block.ID,
Name: block.Name,
Arguments: args,
})
default:
// thinking, redacted_thinking, server-tool blocks, and any
// future types are skipped, not surfaced as parts.
}
}
return resp
}