package llm // RequestOption configures a single completion request. type RequestOption func(*requestConfig) type requestConfig struct { tools *ToolBox temperature *float64 maxTokens *int topP *float64 stop []string cacheConfig *cacheConfig } // cacheConfig holds prompt-caching settings. nil = disabled. type cacheConfig struct { enabled bool } // WithTools attaches a toolbox to the request. func WithTools(tb *ToolBox) RequestOption { return func(c *requestConfig) { c.tools = tb } } // WithTemperature sets the sampling temperature. func WithTemperature(t float64) RequestOption { return func(c *requestConfig) { c.temperature = &t } } // WithMaxTokens sets the maximum number of tokens to generate. func WithMaxTokens(n int) RequestOption { return func(c *requestConfig) { c.maxTokens = &n } } // WithTopP sets the nucleus sampling parameter. func WithTopP(p float64) RequestOption { return func(c *requestConfig) { c.topP = &p } } // WithStop sets stop sequences. func WithStop(sequences ...string) RequestOption { return func(c *requestConfig) { c.stop = sequences } } // WithPromptCaching enables automatic prompt-caching markers on providers // that support it (currently Anthropic). On providers that don't support // explicit cache markers (OpenAI, Google), this is a no-op. // // When enabled, the library places cache breakpoints at natural seams: // - the last tool definition (caches all tools) // - the last system message (caches tools + system) // - the last non-system message in the history (caches tools + system + // conversation so far) // // Breakpoints are placed only when the corresponding section is non-empty. // Up to 3 markers are emitted per request, leaving one of Anthropic's 4 // marker slots for future use. // // Cache hits give a 90% discount on cached input tokens (5-minute ephemeral // tier). Cache writes cost 25% more than normal input tokens, so this option // is only worth enabling for prompts whose cacheable prefix exceeds the // minimum (1024 tokens on Opus/Sonnet, 2048 tokens on Haiku) AND is re-sent // at least twice within the 5-minute TTL. func WithPromptCaching() RequestOption { return func(c *requestConfig) { c.cacheConfig = &cacheConfig{enabled: true} } }