feat(v2/provider): add CacheHints to Request for prompt caching
Adds an optional CacheHints field on provider.Request that carries cache-breakpoint placement directives from the public llm package down to individual provider implementations. Anthropic will consume these in a follow-up commit; OpenAI and Google ignore them. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,32 @@ type ToolDef struct {
|
|||||||
Schema map[string]any // JSON Schema
|
Schema map[string]any // JSON Schema
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CacheHints describes where a provider should attach prompt-cache breakpoints
|
||||||
|
// when the model / provider supports prompt caching. The public `llm` package
|
||||||
|
// populates this from `WithPromptCaching()`. Providers without cache support
|
||||||
|
// ignore this field.
|
||||||
|
//
|
||||||
|
// Anthropic allows at most 4 cache_control markers per request; this struct
|
||||||
|
// represents at most 3 (tools, system, last non-system message) to leave one
|
||||||
|
// breakpoint slot for future use.
|
||||||
|
type CacheHints struct {
|
||||||
|
// CacheTools, when true, requests a cache breakpoint on the final tool
|
||||||
|
// definition in Request.Tools. Has no effect when Tools is empty.
|
||||||
|
CacheTools bool
|
||||||
|
|
||||||
|
// CacheSystem, when true, requests a cache breakpoint on the final
|
||||||
|
// system-role message in Request.Messages. Has no effect when no
|
||||||
|
// system message is present.
|
||||||
|
CacheSystem bool
|
||||||
|
|
||||||
|
// LastCacheableMessageIndex is the index into Request.Messages at which
|
||||||
|
// to place a message-level cache breakpoint. A value of -1 means "no
|
||||||
|
// message-level breakpoint". Points at the last non-system message in
|
||||||
|
// the conversation; providers that merge consecutive same-role messages
|
||||||
|
// must map this index to the correct merged output message.
|
||||||
|
LastCacheableMessageIndex int
|
||||||
|
}
|
||||||
|
|
||||||
// Request is a completion request at the provider level.
|
// Request is a completion request at the provider level.
|
||||||
type Request struct {
|
type Request struct {
|
||||||
Model string
|
Model string
|
||||||
@@ -50,6 +76,10 @@ type Request struct {
|
|||||||
MaxTokens *int
|
MaxTokens *int
|
||||||
TopP *float64
|
TopP *float64
|
||||||
Stop []string
|
Stop []string
|
||||||
|
|
||||||
|
// CacheHints requests prompt-cache breakpoints at specified positions
|
||||||
|
// on providers that support it (currently Anthropic). nil = no caching.
|
||||||
|
CacheHints *CacheHints
|
||||||
}
|
}
|
||||||
|
|
||||||
// Response is a completion response at the provider level.
|
// Response is a completion response at the provider level.
|
||||||
|
|||||||
Reference in New Issue
Block a user