043249e0e1
Phase 3: - provider/openai: Chat Completions for OpenAI + compat endpoints (SSE streaming with by-index tool-call assembly, response_format json_schema, legacy max_tokens option, reasoning_effort) - provider/anthropic: Messages API (tool_use/tool_result, GA structured output via output_config.format, full SSE event parser, 529 transient) - provider/ollama: one native /api/chat client behind the ollama, ollama-cloud, and foreman built-ins (presets; NDJSON streaming tolerant of foreman's buffered single-object responses; object tool arguments; format-schema structured output; think mapping) - media/: capability normalization (sniff, downscale, transcode, byte ladder, ErrUnsupported), wired into the chain executor per target with penalty-free advance past incapable elements - registry: real provider + scheme wiring, WithHTTPClient option, required env-foreman TLS chat round-trip test - ADR-0009 multimodal strategy, ADR-0010 tools/structured mapping; README matrix + CLAUDE.md synced Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
320 lines
10 KiB
Go
320 lines
10 KiB
Go
// Package anthropic implements llm.Provider for the Anthropic Messages API
|
|
// and Anthropic-compatible endpoints.
|
|
//
|
|
// API surface targeted: POST {base}/v1/messages with headers x-api-key,
|
|
// anthropic-version: 2023-06-01, and content-type: application/json, per the
|
|
// platform.claude.com Messages API reference as of June 2026. Streaming uses
|
|
// the documented SSE event sequence (message_start, content_block_start,
|
|
// content_block_delta, content_block_stop, message_delta, message_stop).
|
|
// Structured output uses the GA output_config.format mechanism with
|
|
// {"type":"json_schema"}; the result arrives as JSON text in the first text
|
|
// content block.
|
|
//
|
|
// Why a hand-rolled client (no SDK): ADR-0007 — majordomo is stdlib-first,
|
|
// and the canonical llm contract needs only a narrow slice of the API.
|
|
package anthropic
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
|
|
)
|
|
|
|
const (
|
|
defaultName = "anthropic"
|
|
defaultBaseURL = "https://api.anthropic.com"
|
|
|
|
// apiVersion is the anthropic-version header value. 2023-06-01 remains
|
|
// the current (and only) stable version string as of June 2026.
|
|
apiVersion = "2023-06-01"
|
|
|
|
// defaultMaxTokens is used when Request.MaxTokens is 0, because the
|
|
// Messages API requires max_tokens on every request.
|
|
defaultMaxTokens = 4096
|
|
)
|
|
|
|
// defaultCapabilities reflects the documented first-party API image limits:
|
|
// 100 images per request (200K-context models), 10 MB per image, 8000 px per
|
|
// side, and the four supported media types.
|
|
func defaultCapabilities() llm.Capabilities {
|
|
return llm.Capabilities{
|
|
SupportsTools: true,
|
|
SupportsStructured: true,
|
|
SupportsStreaming: true,
|
|
MaxImagesPerReq: 100,
|
|
MaxImageBytes: 10 << 20,
|
|
MaxImageDimension: 8000,
|
|
AllowedImageMIME: []string{
|
|
"image/jpeg", "image/png", "image/gif", "image/webp",
|
|
},
|
|
}
|
|
}
|
|
|
|
// Provider is an llm.Provider backed by the Anthropic Messages API.
|
|
type Provider struct {
|
|
name string
|
|
apiKey string
|
|
baseURL string
|
|
client *http.Client
|
|
caps llm.Capabilities
|
|
maxTokens int
|
|
}
|
|
|
|
// Option configures the provider at construction.
|
|
type Option func(*Provider)
|
|
|
|
// WithAPIKey sets the API key explicitly, bypassing the ANTHROPIC_API_KEY
|
|
// environment default.
|
|
func WithAPIKey(key string) Option {
|
|
return func(p *Provider) { p.apiKey = key }
|
|
}
|
|
|
|
// WithBaseURL points the provider at an Anthropic-compatible endpoint. A
|
|
// trailing slash is trimmed; "/v1/messages" is appended per request.
|
|
func WithBaseURL(u string) Option {
|
|
return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
|
|
}
|
|
|
|
// WithHTTPClient replaces the HTTP client (timeouts, proxies, test doubles).
|
|
func WithHTTPClient(c *http.Client) Option {
|
|
return func(p *Provider) { p.client = c }
|
|
}
|
|
|
|
// WithName overrides the registry name. Why: an Anthropic-compatible
|
|
// endpoint registered under its own name must surface that name in
|
|
// Response.Model and errors, not "anthropic".
|
|
func WithName(name string) Option {
|
|
return func(p *Provider) { p.name = name }
|
|
}
|
|
|
|
// WithDefaultCapabilities replaces the provider-default capabilities.
|
|
func WithDefaultCapabilities(caps llm.Capabilities) Option {
|
|
return func(p *Provider) { p.caps = caps }
|
|
}
|
|
|
|
// WithDefaultMaxTokens overrides the max_tokens value used when
|
|
// Request.MaxTokens is 0. Why: the Messages API rejects requests without
|
|
// max_tokens, so the provider must always send something.
|
|
func WithDefaultMaxTokens(n int) Option {
|
|
return func(p *Provider) { p.maxTokens = n }
|
|
}
|
|
|
|
// New creates an Anthropic provider. It never fails: a missing API key
|
|
// (no WithAPIKey and no ANTHROPIC_API_KEY in the environment) surfaces as a
|
|
// 401-style *llm.APIError at request time, not at construction.
|
|
func New(opts ...Option) *Provider {
|
|
p := &Provider{
|
|
name: defaultName,
|
|
baseURL: defaultBaseURL,
|
|
client: http.DefaultClient,
|
|
caps: defaultCapabilities(),
|
|
maxTokens: defaultMaxTokens,
|
|
}
|
|
for _, opt := range opts {
|
|
opt(p)
|
|
}
|
|
if p.apiKey == "" {
|
|
p.apiKey = os.Getenv("ANTHROPIC_API_KEY")
|
|
}
|
|
return p
|
|
}
|
|
|
|
// Name implements llm.Provider.
|
|
func (p *Provider) Name() string { return p.name }
|
|
|
|
// Model implements llm.Provider. The id is passed through verbatim — it is
|
|
// never validated against a catalog.
|
|
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
|
|
cfg := llm.ApplyModelOptions(opts)
|
|
caps := p.caps
|
|
if cfg.Capabilities != nil {
|
|
caps = *cfg.Capabilities
|
|
}
|
|
return &model{provider: p, id: id, caps: caps}, nil
|
|
}
|
|
|
|
type model struct {
|
|
provider *Provider
|
|
id string
|
|
caps llm.Capabilities
|
|
}
|
|
|
|
// Capabilities implements llm.Model.
|
|
func (m *model) Capabilities() llm.Capabilities { return m.caps }
|
|
|
|
// fullName is the "provider/model" identifier used in Response.Model.
|
|
func (m *model) fullName() string { return m.provider.name + "/" + m.id }
|
|
|
|
// Generate implements llm.Model.
|
|
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
|
|
req = req.Apply(opts...)
|
|
if err := m.enforceCapabilities(req); err != nil {
|
|
return nil, err
|
|
}
|
|
httpResp, err := m.do(ctx, req, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer httpResp.Body.Close()
|
|
if httpResp.StatusCode/100 != 2 {
|
|
return nil, m.apiError(httpResp)
|
|
}
|
|
var wr wireResponse
|
|
if err := json.NewDecoder(httpResp.Body).Decode(&wr); err != nil {
|
|
return nil, fmt.Errorf("%s: decode response: %w", m.provider.name, err)
|
|
}
|
|
return m.toResponse(&wr), nil
|
|
}
|
|
|
|
// Stream implements llm.Model. A non-2xx status is returned as an error from
|
|
// Stream itself, before any events are delivered.
|
|
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
|
|
req = req.Apply(opts...)
|
|
if err := m.enforceCapabilities(req); err != nil {
|
|
return nil, err
|
|
}
|
|
httpResp, err := m.do(ctx, req, true)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if httpResp.StatusCode/100 != 2 {
|
|
defer httpResp.Body.Close()
|
|
return nil, m.apiError(httpResp)
|
|
}
|
|
return newStream(m, httpResp.Body), nil
|
|
}
|
|
|
|
// enforceCapabilities is the honest backstop behind the media layer: it
|
|
// rejects (rather than silently mutates) requests the target cannot serve.
|
|
// Why: a separate media layer resizes/transcodes images BEFORE requests
|
|
// reach the provider, so anything still out of bounds here is a real error.
|
|
func (m *model) enforceCapabilities(req llm.Request) error {
|
|
images := 0
|
|
for _, msg := range req.Messages {
|
|
for _, part := range msg.Parts {
|
|
img, ok := part.(llm.ImagePart)
|
|
if !ok {
|
|
continue
|
|
}
|
|
images++
|
|
if !m.caps.SupportsImages() {
|
|
return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.fullName())
|
|
}
|
|
if !m.caps.MIMEAllowed(img.MIME) {
|
|
return fmt.Errorf("%w: %s does not accept image MIME %q", llm.ErrUnsupported, m.fullName(), img.MIME)
|
|
}
|
|
if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
|
|
return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d bytes",
|
|
llm.ErrUnsupported, len(img.Data), m.fullName(), m.caps.MaxImageBytes)
|
|
}
|
|
}
|
|
}
|
|
if m.caps.MaxImagesPerReq > 0 && images > m.caps.MaxImagesPerReq {
|
|
return fmt.Errorf("%w: request carries %d images, %s allows at most %d",
|
|
llm.ErrUnsupported, images, m.fullName(), m.caps.MaxImagesPerReq)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// do builds and executes one Messages API call. Transport errors are wrapped
|
|
// with context but NOT converted to *llm.APIError, so llm.Classify still
|
|
// sees the underlying net.Error / syscall errno.
|
|
func (m *model) do(ctx context.Context, req llm.Request, streaming bool) (*http.Response, error) {
|
|
p := m.provider
|
|
if p.apiKey == "" {
|
|
// Why request-time, not construction-time: New never fails by
|
|
// convention, and a 401-shaped APIError classifies permanent so
|
|
// chains fail fast past a misconfigured target.
|
|
return nil, &llm.APIError{
|
|
Provider: p.name,
|
|
Model: m.id,
|
|
Status: http.StatusUnauthorized,
|
|
Code: "authentication_error",
|
|
Message: "no API key configured: set ANTHROPIC_API_KEY or use WithAPIKey",
|
|
}
|
|
}
|
|
|
|
body, err := json.Marshal(buildWireRequest(m.id, req, p.maxTokens, streaming))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%s: encode request: %w", p.name, err)
|
|
}
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/messages", bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%s: build request: %w", p.name, err)
|
|
}
|
|
httpReq.Header.Set("x-api-key", p.apiKey)
|
|
httpReq.Header.Set("anthropic-version", apiVersion)
|
|
httpReq.Header.Set("content-type", "application/json")
|
|
if streaming {
|
|
httpReq.Header.Set("accept", "text/event-stream")
|
|
}
|
|
|
|
resp, err := p.client.Do(httpReq)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%s: do request: %w", p.name, err)
|
|
}
|
|
return resp, nil
|
|
}
|
|
|
|
// apiError converts a non-2xx response into *llm.APIError, filling Code and
|
|
// Message from the documented {"type":"error","error":{...}} body when it
|
|
// parses, and falling back to the raw body text when it does not.
|
|
func (m *model) apiError(resp *http.Response) error {
|
|
apiErr := &llm.APIError{
|
|
Provider: m.provider.name,
|
|
Model: m.id,
|
|
Status: resp.StatusCode,
|
|
}
|
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
|
if err != nil {
|
|
return apiErr
|
|
}
|
|
var we wireErrorEnvelope
|
|
if json.Unmarshal(body, &we) == nil && we.Error.Type != "" {
|
|
apiErr.Code = we.Error.Type
|
|
apiErr.Message = we.Error.Message
|
|
} else {
|
|
apiErr.Message = strings.TrimSpace(string(body))
|
|
}
|
|
return apiErr
|
|
}
|
|
|
|
// toResponse maps a wire response onto the canonical llm.Response. Thinking
|
|
// and other unrecognized block types are tolerated and skipped — they are
|
|
// not part of the canonical content vocabulary.
|
|
func (m *model) toResponse(wr *wireResponse) *llm.Response {
|
|
resp := &llm.Response{
|
|
FinishReason: mapStopReason(wr.StopReason),
|
|
Usage: wr.Usage.toUsage(),
|
|
Model: m.fullName(),
|
|
Raw: wr,
|
|
}
|
|
for _, block := range wr.Content {
|
|
switch block.Type {
|
|
case "text":
|
|
resp.Parts = append(resp.Parts, llm.TextPart{Text: block.Text})
|
|
case "tool_use":
|
|
args := block.Input
|
|
if len(args) == 0 {
|
|
args = json.RawMessage("{}")
|
|
}
|
|
resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
|
|
ID: block.ID,
|
|
Name: block.Name,
|
|
Arguments: args,
|
|
})
|
|
default:
|
|
// thinking, redacted_thinking, server-tool blocks, and any
|
|
// future types are skipped, not surfaced as parts.
|
|
}
|
|
}
|
|
return resp
|
|
}
|