cbaf41f50c
Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
157 lines
5.1 KiB
Go
157 lines
5.1 KiB
Go
// Package provider defines the interface that LLM backend implementations must satisfy.
|
|
package provider
|
|
|
|
import "context"
|
|
|
|
// Message is the provider-level message representation.
|
|
type Message struct {
|
|
Role string
|
|
Content string
|
|
Images []Image
|
|
Audio []Audio
|
|
ToolCalls []ToolCall
|
|
ToolCallID string
|
|
}
|
|
|
|
// Image represents an image attachment at the provider level.
|
|
type Image struct {
|
|
URL string
|
|
Base64 string
|
|
ContentType string
|
|
}
|
|
|
|
// Audio represents an audio attachment at the provider level.
|
|
type Audio struct {
|
|
URL string
|
|
Base64 string
|
|
ContentType string
|
|
}
|
|
|
|
// ToolCall represents a tool invocation requested by the model.
|
|
type ToolCall struct {
|
|
ID string
|
|
Name string
|
|
Arguments string // raw JSON
|
|
}
|
|
|
|
// ToolDef defines a tool available to the model.
|
|
type ToolDef struct {
|
|
Name string
|
|
Description string
|
|
Schema map[string]any // JSON Schema
|
|
}
|
|
|
|
// CacheHints describes where a provider should attach prompt-cache breakpoints
|
|
// when the model / provider supports prompt caching. The public `llm` package
|
|
// populates this from `WithPromptCaching()`. Providers without cache support
|
|
// ignore this field.
|
|
//
|
|
// Anthropic allows at most 4 cache_control markers per request; this struct
|
|
// represents at most 3 (tools, system, last non-system message) to leave one
|
|
// breakpoint slot for future use.
|
|
type CacheHints struct {
|
|
// CacheTools, when true, requests a cache breakpoint on the final tool
|
|
// definition in Request.Tools. Has no effect when Tools is empty.
|
|
CacheTools bool
|
|
|
|
// CacheSystem, when true, requests a cache breakpoint on the final
|
|
// system-role message in Request.Messages. Has no effect when no
|
|
// system message is present.
|
|
CacheSystem bool
|
|
|
|
// LastCacheableMessageIndex is the index into Request.Messages at which
|
|
// to place a message-level cache breakpoint. A value of -1 means "no
|
|
// message-level breakpoint". Points at the last non-system message in
|
|
// the conversation; providers that merge consecutive same-role messages
|
|
// must map this index to the correct merged output message.
|
|
LastCacheableMessageIndex int
|
|
}
|
|
|
|
// Request is a completion request at the provider level.
|
|
type Request struct {
|
|
Model string
|
|
Messages []Message
|
|
Tools []ToolDef
|
|
Temperature *float64
|
|
MaxTokens *int
|
|
TopP *float64
|
|
Stop []string
|
|
|
|
// CacheHints requests prompt-cache breakpoints at specified positions
|
|
// on providers that support it (currently Anthropic). nil = no caching.
|
|
CacheHints *CacheHints
|
|
|
|
// Reasoning, when non-empty, asks the model to spend extra inference
|
|
// budget reasoning before answering. Each provider translates this to
|
|
// its native parameter (Anthropic thinking.budget_tokens, OpenAI/xAI
|
|
// reasoning_effort, Google thinking_config, etc.). Models that do not
|
|
// support reasoning silently ignore it.
|
|
//
|
|
// Allowed values: "" (no reasoning, default), "low", "medium", "high".
|
|
Reasoning string
|
|
}
|
|
|
|
// Response is a completion response at the provider level.
|
|
type Response struct {
|
|
Text string
|
|
ToolCalls []ToolCall
|
|
Usage *Usage
|
|
|
|
// Thinking holds the model's reasoning/thinking trace, when one was
|
|
// requested and the provider exposed it. Empty for providers/models
|
|
// that do not surface a thinking trace.
|
|
Thinking string
|
|
}
|
|
|
|
// Usage captures token consumption.
|
|
type Usage struct {
|
|
InputTokens int
|
|
OutputTokens int
|
|
TotalTokens int
|
|
Details map[string]int // provider-specific breakdown (e.g., cached, reasoning tokens)
|
|
}
|
|
|
|
// Standardized detail keys for provider-specific token breakdowns.
|
|
const (
|
|
UsageDetailReasoningTokens = "reasoning_tokens"
|
|
UsageDetailCachedInputTokens = "cached_input_tokens"
|
|
UsageDetailCacheCreationTokens = "cache_creation_tokens"
|
|
UsageDetailAudioInputTokens = "audio_input_tokens"
|
|
UsageDetailAudioOutputTokens = "audio_output_tokens"
|
|
UsageDetailThoughtsTokens = "thoughts_tokens"
|
|
)
|
|
|
|
// StreamEventType identifies the kind of stream event.
|
|
type StreamEventType int
|
|
|
|
const (
|
|
StreamEventText StreamEventType = iota // Text content delta
|
|
StreamEventToolStart // Tool call begins
|
|
StreamEventToolDelta // Tool call argument delta
|
|
StreamEventToolEnd // Tool call complete
|
|
StreamEventDone // Stream complete
|
|
StreamEventError // Error occurred
|
|
StreamEventThinking // Reasoning/thinking content delta
|
|
)
|
|
|
|
// StreamEvent represents a single event in a streaming response.
|
|
type StreamEvent struct {
|
|
Type StreamEventType
|
|
Text string
|
|
ToolCall *ToolCall
|
|
ToolIndex int
|
|
Error error
|
|
Response *Response
|
|
}
|
|
|
|
// Provider is the interface that LLM backends implement.
|
|
type Provider interface {
|
|
// Complete performs a non-streaming completion.
|
|
Complete(ctx context.Context, req Request) (Response, error)
|
|
|
|
// Stream performs a streaming completion, sending events to the channel.
|
|
// The provider MUST close the channel when done.
|
|
// The provider MUST send exactly one StreamEventDone as the last non-error event.
|
|
Stream(ctx context.Context, req Request, events chan<- StreamEvent) error
|
|
}
|