majordomo/provider/anthropic/wire.go

package anthropic

import (
	"encoding/base64"
	"encoding/json"
	"strings"

	"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
)

// Wire types mirror the Messages API JSON shapes (June 2026 docs). Only the
// fields majordomo uses are modeled; unknown response fields are ignored by
// encoding/json.

type wireRequest struct {
	Model         string            `json:"model"`
	MaxTokens     int               `json:"max_tokens"`
	System        string            `json:"system,omitempty"`
	Messages      []wireMessage     `json:"messages"`
	Stream        bool              `json:"stream,omitempty"`
	Tools         []wireTool        `json:"tools,omitempty"`
	ToolChoice    *wireToolChoice   `json:"tool_choice,omitempty"`
	Temperature   *float64          `json:"temperature,omitempty"`
	TopP          *float64          `json:"top_p,omitempty"`
	StopSequences []string          `json:"stop_sequences,omitempty"`
	OutputConfig  *wireOutputConfig `json:"output_config,omitempty"`
	// CacheControl is the top-level auto-placement form of prompt caching:
	// the API puts the breakpoint on the last cacheable block.
	CacheControl *wireCacheControl `json:"cache_control,omitempty"`
}

type wireCacheControl struct {
	Type string `json:"type"`
}

type wireMessage struct {
	Role    string      `json:"role"`
	Content []wireBlock `json:"content"`
}

// wireBlock is a request-side content block. Exactly one shape is populated
// per block, keyed by Type: text, image, tool_use, or tool_result.
type wireBlock struct {
	Type string `json:"type"`

	// text
	Text string `json:"text,omitempty"`

	// image
	Source *wireImageSource `json:"source,omitempty"`

	// tool_use
	ID    string          `json:"id,omitempty"`
	Name  string          `json:"name,omitempty"`
	Input json.RawMessage `json:"input,omitempty"`

	// tool_result
	ToolUseID string `json:"tool_use_id,omitempty"`
	Content   string `json:"content,omitempty"`
	IsError   bool   `json:"is_error,omitempty"`
}

type wireImageSource struct {
	Type      string `json:"type"`
	MediaType string `json:"media_type"`
	Data      string `json:"data"`
}

type wireTool struct {
	Name        string          `json:"name"`
	Description string          `json:"description,omitempty"`
	InputSchema json.RawMessage `json:"input_schema"`
}

type wireToolChoice struct {
	Type string `json:"type"`
	Name string `json:"name,omitempty"`
}

type wireOutputConfig struct {
	Format *wireOutputFormat `json:"format,omitempty"`
}

type wireOutputFormat struct {
	Type   string          `json:"type"`
	Schema json.RawMessage `json:"schema"`
}

type wireResponse struct {
	ID         string          `json:"id"`
	Type       string          `json:"type"`
	Role       string          `json:"role"`
	Model      string          `json:"model"`
	Content    []wireRespBlock `json:"content"`
	StopReason string          `json:"stop_reason"`
	Usage      wireUsage       `json:"usage"`
}

type wireRespBlock struct {
	Type  string          `json:"type"`
	Text  string          `json:"text"`
	ID    string          `json:"id"`
	Name  string          `json:"name"`
	Input json.RawMessage `json:"input"`
}

type wireUsage struct {
	InputTokens              int `json:"input_tokens"`
	OutputTokens             int `json:"output_tokens"`
	CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
	CacheReadInputTokens     int `json:"cache_read_input_tokens"`
}

// toUsage maps API token accounting onto the canonical Usage. Why the sum:
// the API's input_tokens counts only tokens after the last cache breakpoint;
// real total input is input + cache_creation + cache_read.
func (u wireUsage) toUsage() llm.Usage {
	return llm.Usage{
		InputTokens:      u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
		OutputTokens:     u.OutputTokens,
		CacheReadTokens:  u.CacheReadInputTokens,
		CacheWriteTokens: u.CacheCreationInputTokens,
	}
}

type wireErrorEnvelope struct {
	Type  string `json:"type"`
	Error struct {
		Type    string `json:"type"`
		Message string `json:"message"`
	} `json:"error"`
}

// buildWireRequest translates the canonical request into the Messages API
// shape.
//
// Request.ReasoningEffort is intentionally ignored: the current Messages API
// has no low/medium/high reasoning knob — thinking is adaptive on current
// models, and the legacy budget/disable parameters 400 on them. The llm
// contract says providers ignore ReasoningEffort where no mapping exists.
//
// Request.SchemaName is likewise ignored: output_config.format takes a bare
// schema with no name field.
func buildWireRequest(modelID string, req llm.Request, defaultMax int, stream bool) wireRequest {
	maxTokens := req.MaxTokens
	if maxTokens == 0 {
		// max_tokens is required by the API; 0 means "provider default".
		maxTokens = defaultMax
	}

	wr := wireRequest{
		Model:         modelID,
		MaxTokens:     maxTokens,
		System:        foldSystem(req),
		Messages:      toWireMessages(req.Messages),
		Stream:        stream,
		Tools:         toWireTools(req.Tools),
		ToolChoice:    toWireToolChoice(req.ToolChoice),
		Temperature:   req.Temperature,
		TopP:          req.TopP,
		StopSequences: req.StopSequences,
	}
	if req.Schema != nil {
		wr.OutputConfig = &wireOutputConfig{Format: &wireOutputFormat{
			Type:   "json_schema",
			Schema: req.Schema,
		}}
	}
	if req.PromptCache {
		// Top-level auto-placement: the API puts the cache breakpoint on
		// the last cacheable block.
		wr.CacheControl = &wireCacheControl{Type: "ephemeral"}
	}
	return wr
}

// foldSystem joins Request.System with the text of every RoleSystem message
// (System field first, original order, "\n\n" separators). Why: the API
// takes the system prompt as a top-level field and rejects system roles
// inside messages, so canonical RoleSystem messages must fold in here.
func foldSystem(req llm.Request) string {
	parts := make([]string, 0, 2)
	if req.System != "" {
		parts = append(parts, req.System)
	}
	for _, msg := range req.Messages {
		if msg.Role != llm.RoleSystem {
			continue
		}
		if text := msg.Text(); text != "" {
			parts = append(parts, text)
		}
	}
	return strings.Join(parts, "\n\n")
}

func toWireMessages(msgs []llm.Message) []wireMessage {
	out := make([]wireMessage, 0, len(msgs))
	for _, msg := range msgs {
		switch msg.Role {
		case llm.RoleSystem:
			// Folded into the top-level system field by foldSystem.
			continue

		case llm.RoleTool:
			// One user message carrying one tool_result block per result.
			blocks := make([]wireBlock, 0, len(msg.ToolResults))
			for _, res := range msg.ToolResults {
				blocks = append(blocks, wireBlock{
					Type:      "tool_result",
					ToolUseID: res.ID,
					Content:   res.Content,
					IsError:   res.IsError,
				})
			}
			out = append(out, wireMessage{Role: "user", Content: blocks})

		case llm.RoleAssistant:
			blocks := toWireBlocks(msg.Parts)
			for _, call := range msg.ToolCalls {
				args := call.Arguments
				if len(args) == 0 {
					// The API requires input to be a JSON object.
					args = json.RawMessage("{}")
				}
				blocks = append(blocks, wireBlock{
					Type:  "tool_use",
					ID:    call.ID,
					Name:  call.Name,
					Input: args,
				})
			}
			out = append(out, wireMessage{Role: "assistant", Content: blocks})

		default: // llm.RoleUser and anything unrecognized
			out = append(out, wireMessage{Role: "user", Content: toWireBlocks(msg.Parts)})
		}
	}
	return out
}

func toWireBlocks(parts []llm.Part) []wireBlock {
	blocks := make([]wireBlock, 0, len(parts))
	for _, part := range parts {
		switch p := part.(type) {
		case llm.TextPart:
			blocks = append(blocks, wireBlock{Type: "text", Text: p.Text})
		case llm.ImagePart:
			blocks = append(blocks, wireBlock{Type: "image", Source: &wireImageSource{
				Type:      "base64",
				MediaType: p.MIME,
				Data:      base64.StdEncoding.EncodeToString(p.Data),
			}})
		}
	}
	return blocks
}

func toWireTools(tools []llm.Tool) []wireTool {
	if len(tools) == 0 {
		return nil
	}
	out := make([]wireTool, 0, len(tools))
	for _, t := range tools {
		schema := t.Parameters
		if len(schema) == 0 {
			// Why: input_schema is required by the API; a tool with no
			// arguments still needs an (empty) object schema.
			schema = json.RawMessage(`{"type":"object","properties":{}}`)
		}
		out = append(out, wireTool{
			Name:        t.Name,
			Description: t.Description,
			InputSchema: schema,
		})
	}
	return out
}

// toWireToolChoice maps the canonical tool-choice policy. "" omits the field
// (API default is auto); any value other than the three keywords names the
// one tool the model must call.
func toWireToolChoice(choice string) *wireToolChoice {
	switch choice {
	case "":
		return nil
	case "auto":
		return &wireToolChoice{Type: "auto"}
	case "required":
		return &wireToolChoice{Type: "any"}
	case "none":
		return &wireToolChoice{Type: "none"}
	default:
		return &wireToolChoice{Type: "tool", Name: choice}
	}
}

// mapStopReason maps the API stop_reason onto the canonical FinishReason.
func mapStopReason(stop string) llm.FinishReason {
	switch stop {
	case "end_turn", "stop_sequence":
		return llm.FinishStop
	case "max_tokens", "model_context_window_exceeded":
		return llm.FinishLength
	case "tool_use":
		return llm.FinishToolCalls
	case "refusal":
		return llm.FinishContentFilter
	default:
		// pause_turn and any future provider-specific reasons.
		return llm.FinishOther
	}
}