// Package anthropic implements llm.Provider for the Anthropic Messages API // and Anthropic-compatible endpoints. // // API surface targeted: POST {base}/v1/messages with headers x-api-key, // anthropic-version: 2023-06-01, and content-type: application/json, per the // platform.claude.com Messages API reference as of June 2026. Streaming uses // the documented SSE event sequence (message_start, content_block_start, // content_block_delta, content_block_stop, message_delta, message_stop). // Structured output uses the GA output_config.format mechanism with // {"type":"json_schema"}; the result arrives as JSON text in the first text // content block. // // Why a hand-rolled client (no SDK): ADR-0007 — majordomo is stdlib-first, // and the canonical llm contract needs only a narrow slice of the API. package anthropic import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "os" "strings" "gitea.stevedudenhoeffer.com/steve/majordomo/llm" ) const ( defaultName = "anthropic" defaultBaseURL = "https://api.anthropic.com" // apiVersion is the anthropic-version header value. 2023-06-01 remains // the current (and only) stable version string as of June 2026. apiVersion = "2023-06-01" // defaultMaxTokens is used when Request.MaxTokens is 0, because the // Messages API requires max_tokens on every request. defaultMaxTokens = 4096 ) // defaultCapabilities reflects the documented first-party API image limits: // 100 images per request (200K-context models), 10 MB per image, 8000 px per // side, and the four supported media types. func defaultCapabilities() llm.Capabilities { return llm.Capabilities{ SupportsTools: true, SupportsStructured: true, SupportsStreaming: true, MaxImagesPerReq: 100, MaxImageBytes: 10 << 20, MaxImageDimension: 8000, AllowedImageMIME: []string{ "image/jpeg", "image/png", "image/gif", "image/webp", }, } } // Provider is an llm.Provider backed by the Anthropic Messages API. type Provider struct { name string apiKey string baseURL string client *http.Client caps llm.Capabilities maxTokens int } // Option configures the provider at construction. type Option func(*Provider) // WithAPIKey sets the API key explicitly, bypassing the ANTHROPIC_API_KEY // environment default. func WithAPIKey(key string) Option { return func(p *Provider) { p.apiKey = key } } // WithBaseURL points the provider at an Anthropic-compatible endpoint. A // trailing slash is trimmed; "/v1/messages" is appended per request. func WithBaseURL(u string) Option { return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") } } // WithHTTPClient replaces the HTTP client (timeouts, proxies, test doubles). func WithHTTPClient(c *http.Client) Option { return func(p *Provider) { p.client = c } } // WithName overrides the registry name. Why: an Anthropic-compatible // endpoint registered under its own name must surface that name in // Response.Model and errors, not "anthropic". func WithName(name string) Option { return func(p *Provider) { p.name = name } } // WithDefaultCapabilities replaces the provider-default capabilities. func WithDefaultCapabilities(caps llm.Capabilities) Option { return func(p *Provider) { p.caps = caps } } // WithDefaultMaxTokens overrides the max_tokens value used when // Request.MaxTokens is 0. Why: the Messages API rejects requests without // max_tokens, so the provider must always send something. func WithDefaultMaxTokens(n int) Option { return func(p *Provider) { p.maxTokens = n } } // New creates an Anthropic provider. It never fails: a missing API key // (no WithAPIKey and no ANTHROPIC_API_KEY in the environment) surfaces as a // 401-style *llm.APIError at request time, not at construction. func New(opts ...Option) *Provider { p := &Provider{ name: defaultName, baseURL: defaultBaseURL, client: http.DefaultClient, caps: defaultCapabilities(), maxTokens: defaultMaxTokens, } for _, opt := range opts { opt(p) } if p.apiKey == "" { p.apiKey = os.Getenv("ANTHROPIC_API_KEY") } return p } // Name implements llm.Provider. func (p *Provider) Name() string { return p.name } // Model implements llm.Provider. The id is passed through verbatim — it is // never validated against a catalog. func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) { cfg := llm.ApplyModelOptions(opts) caps := p.caps if cfg.Capabilities != nil { caps = *cfg.Capabilities } return &model{provider: p, id: id, caps: caps}, nil } type model struct { provider *Provider id string caps llm.Capabilities } // Capabilities implements llm.Model. func (m *model) Capabilities() llm.Capabilities { return m.caps } // fullName is the "provider/model" identifier used in Response.Model. func (m *model) fullName() string { return m.provider.name + "/" + m.id } // Generate implements llm.Model. func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) { req = req.Apply(opts...) if err := m.enforceCapabilities(req); err != nil { return nil, err } httpResp, err := m.do(ctx, req, false) if err != nil { return nil, err } defer httpResp.Body.Close() if httpResp.StatusCode/100 != 2 { return nil, m.apiError(httpResp) } var wr wireResponse if err := json.NewDecoder(httpResp.Body).Decode(&wr); err != nil { return nil, fmt.Errorf("%s: decode response: %w", m.provider.name, err) } return m.toResponse(&wr), nil } // Stream implements llm.Model. A non-2xx status is returned as an error from // Stream itself, before any events are delivered. func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) { req = req.Apply(opts...) if err := m.enforceCapabilities(req); err != nil { return nil, err } httpResp, err := m.do(ctx, req, true) if err != nil { return nil, err } if httpResp.StatusCode/100 != 2 { defer httpResp.Body.Close() return nil, m.apiError(httpResp) } return newStream(m, httpResp.Body), nil } // enforceCapabilities is the honest backstop behind the media layer: it // rejects (rather than silently mutates) requests the target cannot serve. // Why: a separate media layer resizes/transcodes images BEFORE requests // reach the provider, so anything still out of bounds here is a real error. func (m *model) enforceCapabilities(req llm.Request) error { images := 0 for _, msg := range req.Messages { for _, part := range msg.Parts { img, ok := part.(llm.ImagePart) if !ok { continue } images++ if !m.caps.SupportsImages() { return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.fullName()) } if !m.caps.MIMEAllowed(img.MIME) { return fmt.Errorf("%w: %s does not accept image MIME %q", llm.ErrUnsupported, m.fullName(), img.MIME) } if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes { return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d bytes", llm.ErrUnsupported, len(img.Data), m.fullName(), m.caps.MaxImageBytes) } } } if m.caps.MaxImagesPerReq > 0 && images > m.caps.MaxImagesPerReq { return fmt.Errorf("%w: request carries %d images, %s allows at most %d", llm.ErrUnsupported, images, m.fullName(), m.caps.MaxImagesPerReq) } return nil } // do builds and executes one Messages API call. Transport errors are wrapped // with context but NOT converted to *llm.APIError, so llm.Classify still // sees the underlying net.Error / syscall errno. func (m *model) do(ctx context.Context, req llm.Request, streaming bool) (*http.Response, error) { p := m.provider if p.apiKey == "" { // Why request-time, not construction-time: New never fails by // convention, and a 401-shaped APIError classifies permanent so // chains fail fast past a misconfigured target. return nil, &llm.APIError{ Provider: p.name, Model: m.id, Status: http.StatusUnauthorized, Code: "authentication_error", Message: "no API key configured: set ANTHROPIC_API_KEY or use WithAPIKey", } } body, err := json.Marshal(buildWireRequest(m.id, req, p.maxTokens, streaming)) if err != nil { return nil, fmt.Errorf("%s: encode request: %w", p.name, err) } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/messages", bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("%s: build request: %w", p.name, err) } httpReq.Header.Set("x-api-key", p.apiKey) httpReq.Header.Set("anthropic-version", apiVersion) httpReq.Header.Set("content-type", "application/json") if streaming { httpReq.Header.Set("accept", "text/event-stream") } resp, err := p.client.Do(httpReq) if err != nil { return nil, fmt.Errorf("%s: do request: %w", p.name, err) } return resp, nil } // apiError converts a non-2xx response into *llm.APIError, filling Code and // Message from the documented {"type":"error","error":{...}} body when it // parses, and falling back to the raw body text when it does not. func (m *model) apiError(resp *http.Response) error { apiErr := &llm.APIError{ Provider: m.provider.name, Model: m.id, Status: resp.StatusCode, } body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) if err != nil { return apiErr } var we wireErrorEnvelope if json.Unmarshal(body, &we) == nil && we.Error.Type != "" { apiErr.Code = we.Error.Type apiErr.Message = we.Error.Message } else { apiErr.Message = strings.TrimSpace(string(body)) } return apiErr } // toResponse maps a wire response onto the canonical llm.Response. Thinking // and other unrecognized block types are tolerated and skipped — they are // not part of the canonical content vocabulary. func (m *model) toResponse(wr *wireResponse) *llm.Response { resp := &llm.Response{ FinishReason: mapStopReason(wr.StopReason), Usage: wr.Usage.toUsage(), Model: m.fullName(), Raw: wr, } for _, block := range wr.Content { switch block.Type { case "text": resp.Parts = append(resp.Parts, llm.TextPart{Text: block.Text}) case "tool_use": args := block.Input if len(args) == 0 { args = json.RawMessage("{}") } resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{ ID: block.ID, Name: block.Name, Arguments: args, }) default: // thinking, redacted_thinking, server-tool blocks, and any // future types are skipped, not surfaced as parts. } } return resp }