From b4bf73136a49805acead73cfa171f7dc6f3bc0ce Mon Sep 17 00:00:00 2001
From: Steve Dudenhoeffer <steve@stevedudenhoeffer.com>
Date: Thu, 9 Apr 2026 19:14:44 +0000
Subject: [PATCH] feat(v2/provider): add CacheHints to Request for prompt
 caching

Adds an optional CacheHints field on provider.Request that carries
cache-breakpoint placement directives from the public llm package down
to individual provider implementations. Anthropic will consume these in
a follow-up commit; OpenAI and Google ignore them.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 v2/provider/provider.go | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/v2/provider/provider.go b/v2/provider/provider.go
index 5a7774a..9e2b5fa 100644
--- a/v2/provider/provider.go
+++ b/v2/provider/provider.go
@@ -41,6 +41,32 @@ type ToolDef struct {
 	Schema      map[string]any // JSON Schema
 }
 
+// CacheHints describes where a provider should attach prompt-cache breakpoints
+// when the model / provider supports prompt caching. The public `llm` package
+// populates this from `WithPromptCaching()`. Providers without cache support
+// ignore this field.
+//
+// Anthropic allows at most 4 cache_control markers per request; this struct
+// represents at most 3 (tools, system, last non-system message) to leave one
+// breakpoint slot for future use.
+type CacheHints struct {
+	// CacheTools, when true, requests a cache breakpoint on the final tool
+	// definition in Request.Tools. Has no effect when Tools is empty.
+	CacheTools bool
+
+	// CacheSystem, when true, requests a cache breakpoint on the final
+	// system-role message in Request.Messages. Has no effect when no
+	// system message is present.
+	CacheSystem bool
+
+	// LastCacheableMessageIndex is the index into Request.Messages at which
+	// to place a message-level cache breakpoint. A value of -1 means "no
+	// message-level breakpoint". Points at the last non-system message in
+	// the conversation; providers that merge consecutive same-role messages
+	// must map this index to the correct merged output message.
+	LastCacheableMessageIndex int
+}
+
 // Request is a completion request at the provider level.
 type Request struct {
 	Model       string
@@ -50,6 +76,10 @@ type Request struct {
 	MaxTokens   *int
 	TopP        *float64
 	Stop        []string
+
+	// CacheHints requests prompt-cache breakpoints at specified positions
+	// on providers that support it (currently Anthropic). nil = no caching.
+	CacheHints *CacheHints
 }
 
 // Response is a completion response at the provider level.