From b4bf73136a49805acead73cfa171f7dc6f3bc0ce Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Thu, 9 Apr 2026 19:14:44 +0000 Subject: [PATCH] feat(v2/provider): add CacheHints to Request for prompt caching Adds an optional CacheHints field on provider.Request that carries cache-breakpoint placement directives from the public llm package down to individual provider implementations. Anthropic will consume these in a follow-up commit; OpenAI and Google ignore them. Co-Authored-By: Claude Opus 4.6 --- v2/provider/provider.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/v2/provider/provider.go b/v2/provider/provider.go index 5a7774a..9e2b5fa 100644 --- a/v2/provider/provider.go +++ b/v2/provider/provider.go @@ -41,6 +41,32 @@ type ToolDef struct { Schema map[string]any // JSON Schema } +// CacheHints describes where a provider should attach prompt-cache breakpoints +// when the model / provider supports prompt caching. The public `llm` package +// populates this from `WithPromptCaching()`. Providers without cache support +// ignore this field. +// +// Anthropic allows at most 4 cache_control markers per request; this struct +// represents at most 3 (tools, system, last non-system message) to leave one +// breakpoint slot for future use. +type CacheHints struct { + // CacheTools, when true, requests a cache breakpoint on the final tool + // definition in Request.Tools. Has no effect when Tools is empty. + CacheTools bool + + // CacheSystem, when true, requests a cache breakpoint on the final + // system-role message in Request.Messages. Has no effect when no + // system message is present. + CacheSystem bool + + // LastCacheableMessageIndex is the index into Request.Messages at which + // to place a message-level cache breakpoint. A value of -1 means "no + // message-level breakpoint". Points at the last non-system message in + // the conversation; providers that merge consecutive same-role messages + // must map this index to the correct merged output message. + LastCacheableMessageIndex int +} + // Request is a completion request at the provider level. type Request struct { Model string @@ -50,6 +76,10 @@ type Request struct { MaxTokens *int TopP *float64 Stop []string + + // CacheHints requests prompt-cache breakpoints at specified positions + // on providers that support it (currently Anthropic). nil = no caching. + CacheHints *CacheHints } // Response is a completion response at the provider level.