Introduces an opt-in RequestOption that callers can pass to enable automatic prompt-caching markers. The option populates a cacheConfig on requestConfig but has no effect yet — plumbing through to provider.Request and on to the Anthropic provider lands in subsequent commits. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
69 lines
2.2 KiB
Go
69 lines
2.2 KiB
Go
package llm
|
|
|
|
// RequestOption configures a single completion request.
|
|
type RequestOption func(*requestConfig)
|
|
|
|
type requestConfig struct {
|
|
tools *ToolBox
|
|
temperature *float64
|
|
maxTokens *int
|
|
topP *float64
|
|
stop []string
|
|
cacheConfig *cacheConfig
|
|
}
|
|
|
|
// cacheConfig holds prompt-caching settings. nil = disabled.
|
|
type cacheConfig struct {
|
|
enabled bool
|
|
}
|
|
|
|
// WithTools attaches a toolbox to the request.
|
|
func WithTools(tb *ToolBox) RequestOption {
|
|
return func(c *requestConfig) { c.tools = tb }
|
|
}
|
|
|
|
// WithTemperature sets the sampling temperature.
|
|
func WithTemperature(t float64) RequestOption {
|
|
return func(c *requestConfig) { c.temperature = &t }
|
|
}
|
|
|
|
// WithMaxTokens sets the maximum number of tokens to generate.
|
|
func WithMaxTokens(n int) RequestOption {
|
|
return func(c *requestConfig) { c.maxTokens = &n }
|
|
}
|
|
|
|
// WithTopP sets the nucleus sampling parameter.
|
|
func WithTopP(p float64) RequestOption {
|
|
return func(c *requestConfig) { c.topP = &p }
|
|
}
|
|
|
|
// WithStop sets stop sequences.
|
|
func WithStop(sequences ...string) RequestOption {
|
|
return func(c *requestConfig) { c.stop = sequences }
|
|
}
|
|
|
|
// WithPromptCaching enables automatic prompt-caching markers on providers
|
|
// that support it (currently Anthropic). On providers that don't support
|
|
// explicit cache markers (OpenAI, Google), this is a no-op.
|
|
//
|
|
// When enabled, the library places cache breakpoints at natural seams:
|
|
// - the last tool definition (caches all tools)
|
|
// - the last system message (caches tools + system)
|
|
// - the last non-system message in the history (caches tools + system +
|
|
// conversation so far)
|
|
//
|
|
// Breakpoints are placed only when the corresponding section is non-empty.
|
|
// Up to 3 markers are emitted per request, leaving one of Anthropic's 4
|
|
// marker slots for future use.
|
|
//
|
|
// Cache hits give a 90% discount on cached input tokens (5-minute ephemeral
|
|
// tier). Cache writes cost 25% more than normal input tokens, so this option
|
|
// is only worth enabling for prompts whose cacheable prefix exceeds the
|
|
// minimum (1024 tokens on Opus/Sonnet, 2048 tokens on Haiku) AND is re-sent
|
|
// at least twice within the 5-minute TTL.
|
|
func WithPromptCaching() RequestOption {
|
|
return func(c *requestConfig) {
|
|
c.cacheConfig = &cacheConfig{enabled: true}
|
|
}
|
|
}
|