go-llm/v2/provider/provider.go

// Package provider defines the interface that LLM backend implementations must satisfy.
package provider

import "context"

// Message is the provider-level message representation.
type Message struct {
	Role       string
	Content    string
	Images     []Image
	Audio      []Audio
	ToolCalls  []ToolCall
	ToolCallID string
}

// Image represents an image attachment at the provider level.
type Image struct {
	URL         string
	Base64      string
	ContentType string
}

// Audio represents an audio attachment at the provider level.
type Audio struct {
	URL         string
	Base64      string
	ContentType string
}

// ToolCall represents a tool invocation requested by the model.
type ToolCall struct {
	ID        string
	Name      string
	Arguments string // raw JSON
}

// ToolDef defines a tool available to the model.
type ToolDef struct {
	Name        string
	Description string
	Schema      map[string]any // JSON Schema
}

// CacheHints describes where a provider should attach prompt-cache breakpoints
// when the model / provider supports prompt caching. The public `llm` package
// populates this from `WithPromptCaching()`. Providers without cache support
// ignore this field.
//
// Anthropic allows at most 4 cache_control markers per request; this struct
// represents at most 3 (tools, system, last non-system message) to leave one
// breakpoint slot for future use.
type CacheHints struct {
	// CacheTools, when true, requests a cache breakpoint on the final tool
	// definition in Request.Tools. Has no effect when Tools is empty.
	CacheTools bool

	// CacheSystem, when true, requests a cache breakpoint on the final
	// system-role message in Request.Messages. Has no effect when no
	// system message is present.
	CacheSystem bool

	// LastCacheableMessageIndex is the index into Request.Messages at which
	// to place a message-level cache breakpoint. A value of -1 means "no
	// message-level breakpoint". Points at the last non-system message in
	// the conversation; providers that merge consecutive same-role messages
	// must map this index to the correct merged output message.
	LastCacheableMessageIndex int
}

// Request is a completion request at the provider level.
type Request struct {
	Model       string
	Messages    []Message
	Tools       []ToolDef
	Temperature *float64
	MaxTokens   *int
	TopP        *float64
	Stop        []string

	// CacheHints requests prompt-cache breakpoints at specified positions
	// on providers that support it (currently Anthropic). nil = no caching.
	CacheHints *CacheHints

	// Reasoning, when non-empty, asks the model to spend extra inference
	// budget reasoning before answering. Each provider translates this to
	// its native parameter (Anthropic thinking.budget_tokens, OpenAI/xAI
	// reasoning_effort, Google thinking_config, etc.). Models that do not
	// support reasoning silently ignore it.
	//
	// Allowed values: "" (no reasoning, default), "low", "medium", "high".
	Reasoning string
}

// Response is a completion response at the provider level.
type Response struct {
	Text      string
	ToolCalls []ToolCall
	Usage     *Usage

	// Thinking holds the model's reasoning/thinking trace, when one was
	// requested and the provider exposed it. Empty for providers/models
	// that do not surface a thinking trace.
	Thinking string
}

// Usage captures token consumption.
type Usage struct {
	InputTokens  int
	OutputTokens int
	TotalTokens  int
	Details      map[string]int // provider-specific breakdown (e.g., cached, reasoning tokens)
}

// Standardized detail keys for provider-specific token breakdowns.
const (
	UsageDetailReasoningTokens     = "reasoning_tokens"
	UsageDetailCachedInputTokens   = "cached_input_tokens"
	UsageDetailCacheCreationTokens = "cache_creation_tokens"
	UsageDetailAudioInputTokens    = "audio_input_tokens"
	UsageDetailAudioOutputTokens   = "audio_output_tokens"
	UsageDetailThoughtsTokens      = "thoughts_tokens"
)

// StreamEventType identifies the kind of stream event.
type StreamEventType int

const (
	StreamEventText      StreamEventType = iota // Text content delta
	StreamEventToolStart                        // Tool call begins
	StreamEventToolDelta                        // Tool call argument delta
	StreamEventToolEnd                          // Tool call complete
	StreamEventDone                             // Stream complete
	StreamEventError                            // Error occurred
	StreamEventThinking                         // Reasoning/thinking content delta
)

// StreamEvent represents a single event in a streaming response.
type StreamEvent struct {
	Type      StreamEventType
	Text      string
	ToolCall  *ToolCall
	ToolIndex int
	Error     error
	Response  *Response
}

// Provider is the interface that LLM backends implement.
type Provider interface {
	// Complete performs a non-streaming completion.
	Complete(ctx context.Context, req Request) (Response, error)

	// Stream performs a streaming completion, sending events to the channel.
	// The provider MUST close the channel when done.
	// The provider MUST send exactly one StreamEventDone as the last non-error event.
	Stream(ctx context.Context, req Request, events chan<- StreamEvent) error
}