majordomo/provider/anthropic/anthropic.go

// Package anthropic implements llm.Provider for the Anthropic Messages API
// and Anthropic-compatible endpoints.
//
// API surface targeted: POST {base}/v1/messages with headers x-api-key,
// anthropic-version: 2023-06-01, and content-type: application/json, per the
// platform.claude.com Messages API reference as of June 2026. Streaming uses
// the documented SSE event sequence (message_start, content_block_start,
// content_block_delta, content_block_stop, message_delta, message_stop).
// Structured output uses the GA output_config.format mechanism with
// {"type":"json_schema"}; the result arrives as JSON text in the first text
// content block.
//
// Why a hand-rolled client (no SDK): ADR-0007 — majordomo is stdlib-first,
// and the canonical llm contract needs only a narrow slice of the API.
package anthropic

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"os"
	"strings"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

const (
	defaultName    = "anthropic"
	defaultBaseURL = "https://api.anthropic.com"

	// apiVersion is the anthropic-version header value. 2023-06-01 remains
	// the current (and only) stable version string as of June 2026.
	apiVersion = "2023-06-01"

	// defaultMaxTokens is used when Request.MaxTokens is 0, because the
	// Messages API requires max_tokens on every request.
	defaultMaxTokens = 4096
)

// defaultCapabilities reflects the documented first-party API image limits:
// 100 images per request (200K-context models), 10 MB per image, 8000 px per
// side, and the four supported media types.
func defaultCapabilities() llm.Capabilities {
	return llm.Capabilities{
		SupportsTools:      true,
		SupportsStructured: true,
		SupportsStreaming:  true,
		MaxImagesPerReq:    100,
		MaxImageBytes:      10 << 20,
		MaxImageDimension:  8000,
		AllowedImageMIME: []string{
			"image/jpeg", "image/png", "image/gif", "image/webp",
		},
	}
}

// Provider is an llm.Provider backed by the Anthropic Messages API.
type Provider struct {
	name      string
	apiKey    string
	baseURL   string
	client    *http.Client
	caps      llm.Capabilities
	maxTokens int
}

// Option configures the provider at construction.
type Option func(*Provider)

// WithAPIKey sets the API key explicitly, bypassing the ANTHROPIC_API_KEY
// environment default.
func WithAPIKey(key string) Option {
	return func(p *Provider) { p.apiKey = key }
}

// WithBaseURL points the provider at an Anthropic-compatible endpoint. A
// trailing slash is trimmed; "/v1/messages" is appended per request.
func WithBaseURL(u string) Option {
	return func(p *Provider) { p.baseURL = strings.TrimRight(u, "/") }
}

// WithHTTPClient replaces the HTTP client (timeouts, proxies, test doubles).
func WithHTTPClient(c *http.Client) Option {
	return func(p *Provider) { p.client = c }
}

// WithName overrides the registry name. Why: an Anthropic-compatible
// endpoint registered under its own name must surface that name in
// Response.Model and errors, not "anthropic".
func WithName(name string) Option {
	return func(p *Provider) { p.name = name }
}

// WithDefaultCapabilities replaces the provider-default capabilities.
func WithDefaultCapabilities(caps llm.Capabilities) Option {
	return func(p *Provider) { p.caps = caps }
}

// WithDefaultMaxTokens overrides the max_tokens value used when
// Request.MaxTokens is 0. Why: the Messages API rejects requests without
// max_tokens, so the provider must always send something.
func WithDefaultMaxTokens(n int) Option {
	return func(p *Provider) { p.maxTokens = n }
}

// New creates an Anthropic provider. It never fails: a missing API key
// (no WithAPIKey and no ANTHROPIC_API_KEY in the environment) surfaces as a
// 401-style *llm.APIError at request time, not at construction.
func New(opts ...Option) *Provider {
	p := &Provider{
		name:      defaultName,
		baseURL:   defaultBaseURL,
		client:    http.DefaultClient,
		caps:      defaultCapabilities(),
		maxTokens: defaultMaxTokens,
	}
	for _, opt := range opts {
		opt(p)
	}
	if p.apiKey == "" {
		p.apiKey = os.Getenv("ANTHROPIC_API_KEY")
	}
	return p
}

// Name implements llm.Provider.
func (p *Provider) Name() string { return p.name }

// Model implements llm.Provider. The id is passed through verbatim — it is
// never validated against a catalog.
func (p *Provider) Model(id string, opts ...llm.ModelOption) (llm.Model, error) {
	cfg := llm.ApplyModelOptions(opts)
	caps := p.caps
	if cfg.Capabilities != nil {
		caps = *cfg.Capabilities
	}
	return &model{provider: p, id: id, caps: caps}, nil
}

type model struct {
	provider *Provider
	id       string
	caps     llm.Capabilities
}

// Capabilities implements llm.Model.
func (m *model) Capabilities() llm.Capabilities { return m.caps }

// fullName is the "provider/model" identifier used in Response.Model.
func (m *model) fullName() string { return m.provider.name + "/" + m.id }

// Generate implements llm.Model.
func (m *model) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
	req = req.Apply(opts...)
	if err := m.enforceCapabilities(req); err != nil {
		return nil, err
	}
	httpResp, err := m.do(ctx, req, false)
	if err != nil {
		return nil, err
	}
	defer httpResp.Body.Close()
	if httpResp.StatusCode/100 != 2 {
		return nil, m.apiError(httpResp)
	}
	var wr wireResponse
	if err := json.NewDecoder(httpResp.Body).Decode(&wr); err != nil {
		return nil, fmt.Errorf("%s: decode response: %w", m.provider.name, err)
	}
	return m.toResponse(&wr), nil
}

// Stream implements llm.Model. A non-2xx status is returned as an error from
// Stream itself, before any events are delivered.
func (m *model) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
	req = req.Apply(opts...)
	if err := m.enforceCapabilities(req); err != nil {
		return nil, err
	}
	httpResp, err := m.do(ctx, req, true)
	if err != nil {
		return nil, err
	}
	if httpResp.StatusCode/100 != 2 {
		defer httpResp.Body.Close()
		return nil, m.apiError(httpResp)
	}
	return newStream(m, httpResp.Body), nil
}

// enforceCapabilities is the honest backstop behind the media layer: it
// rejects (rather than silently mutates) requests the target cannot serve.
// Why: a separate media layer resizes/transcodes images BEFORE requests
// reach the provider, so anything still out of bounds here is a real error.
func (m *model) enforceCapabilities(req llm.Request) error {
	images := 0
	for _, msg := range req.Messages {
		for _, part := range msg.Parts {
			img, ok := part.(llm.ImagePart)
			if !ok {
				continue
			}
			images++
			if !m.caps.SupportsImages() {
				return fmt.Errorf("%w: %s does not accept image input", llm.ErrUnsupported, m.fullName())
			}
			if !m.caps.MIMEAllowed(img.MIME) {
				return fmt.Errorf("%w: %s does not accept image MIME %q", llm.ErrUnsupported, m.fullName(), img.MIME)
			}
			if m.caps.MaxImageBytes > 0 && len(img.Data) > m.caps.MaxImageBytes {
				return fmt.Errorf("%w: image of %d bytes exceeds %s limit of %d bytes",
					llm.ErrUnsupported, len(img.Data), m.fullName(), m.caps.MaxImageBytes)
			}
		}
	}
	if m.caps.MaxImagesPerReq > 0 && images > m.caps.MaxImagesPerReq {
		return fmt.Errorf("%w: request carries %d images, %s allows at most %d",
			llm.ErrUnsupported, images, m.fullName(), m.caps.MaxImagesPerReq)
	}
	return nil
}

// do builds and executes one Messages API call. Transport errors are wrapped
// with context but NOT converted to *llm.APIError, so llm.Classify still
// sees the underlying net.Error / syscall errno.
func (m *model) do(ctx context.Context, req llm.Request, streaming bool) (*http.Response, error) {
	p := m.provider
	if p.apiKey == "" {
		// Why request-time, not construction-time: New never fails by
		// convention, and a 401-shaped APIError classifies permanent so
		// chains fail fast past a misconfigured target.
		return nil, &llm.APIError{
			Provider: p.name,
			Model:    m.id,
			Status:   http.StatusUnauthorized,
			Code:     "authentication_error",
			Message:  "no API key configured: set ANTHROPIC_API_KEY or use WithAPIKey",
		}
	}

	body, err := json.Marshal(buildWireRequest(m.id, req, p.maxTokens, streaming))
	if err != nil {
		return nil, fmt.Errorf("%s: encode request: %w", p.name, err)
	}
	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/messages", bytes.NewReader(body))
	if err != nil {
		return nil, fmt.Errorf("%s: build request: %w", p.name, err)
	}
	httpReq.Header.Set("x-api-key", p.apiKey)
	httpReq.Header.Set("anthropic-version", apiVersion)
	httpReq.Header.Set("content-type", "application/json")
	if streaming {
		httpReq.Header.Set("accept", "text/event-stream")
	}

	resp, err := p.client.Do(httpReq)
	if err != nil {
		return nil, fmt.Errorf("%s: do request: %w", p.name, err)
	}
	return resp, nil
}

// apiError converts a non-2xx response into *llm.APIError, filling Code and
// Message from the documented {"type":"error","error":{...}} body when it
// parses, and falling back to the raw body text when it does not.
func (m *model) apiError(resp *http.Response) error {
	apiErr := &llm.APIError{
		Provider: m.provider.name,
		Model:    m.id,
		Status:   resp.StatusCode,
	}
	body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
	if err != nil {
		return apiErr
	}
	var we wireErrorEnvelope
	if json.Unmarshal(body, &we) == nil && we.Error.Type != "" {
		apiErr.Code = we.Error.Type
		apiErr.Message = we.Error.Message
	} else {
		apiErr.Message = strings.TrimSpace(string(body))
	}
	return apiErr
}

// toResponse maps a wire response onto the canonical llm.Response. Thinking
// and other unrecognized block types are tolerated and skipped — they are
// not part of the canonical content vocabulary.
func (m *model) toResponse(wr *wireResponse) *llm.Response {
	resp := &llm.Response{
		FinishReason: mapStopReason(wr.StopReason),
		Usage:        wr.Usage.toUsage(),
		Model:        m.fullName(),
		Raw:          wr,
	}
	for _, block := range wr.Content {
		switch block.Type {
		case "text":
			resp.Parts = append(resp.Parts, llm.TextPart{Text: block.Text})
		case "tool_use":
			args := block.Input
			if len(args) == 0 {
				args = json.RawMessage("{}")
			}
			resp.ToolCalls = append(resp.ToolCalls, llm.ToolCall{
				ID:        block.ID,
				Name:      block.Name,
				Arguments: args,
			})
		default:
			// thinking, redacted_thinking, server-tool blocks, and any
			// future types are skipped, not surfaced as parts.
		}
	}
	return resp
}