feat: conversion-driven extensions — resolvers, DefineTool, hooks, ops controls

Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers; DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired through anthropic/openai/google; WithPromptCaching (Anthropic cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer, WithCompactor, WithToolErrorLimits + ErrToolLoop); health Bench/Unbench/Snapshot; ChainConfig.Observer failover events. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 13:30:06 +02:00
parent 04b21fdad2
commit 0147a79d18
21 changed files with 767 additions and 29 deletions
@@ -24,6 +24,13 @@ type wireRequest struct {
 	TopP          *float64          `json:"top_p,omitempty"`
 	StopSequences []string          `json:"stop_sequences,omitempty"`
 	OutputConfig  *wireOutputConfig `json:"output_config,omitempty"`
+	// CacheControl is the top-level auto-placement form of prompt caching:
+	// the API puts the breakpoint on the last cacheable block.
+	CacheControl *wireCacheControl `json:"cache_control,omitempty"`
+}
+
+type wireCacheControl struct {
+	Type string `json:"type"`
 }

 type wireMessage struct {
@@ -109,8 +116,10 @@ type wireUsage struct {
 // real total input is input + cache_creation + cache_read.
 func (u wireUsage) toUsage() llm.Usage {
 	return llm.Usage{
-		InputTokens:  u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
-		OutputTokens: u.OutputTokens,
+		InputTokens:      u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
+		OutputTokens:     u.OutputTokens,
+		CacheReadTokens:  u.CacheReadInputTokens,
+		CacheWriteTokens: u.CacheCreationInputTokens,
 	}
 }

@@ -157,6 +166,11 @@ func buildWireRequest(modelID string, req llm.Request, defaultMax int, stream bo
 			Schema: req.Schema,
 		}}
 	}
+	if req.PromptCache {
+		// Top-level auto-placement: the API puts the cache breakpoint on
+		// the last cacheable block.
+		wr.CacheControl = &wireCacheControl{Type: "ephemeral"}
+	}
 	return wr
 }

@@ -364,8 +364,10 @@ func (m *model) toResponse(resp *genai.GenerateContentResponse) *llm.Response {
 	out := &llm.Response{Model: m.qualified(), Raw: resp}
 	if resp.UsageMetadata != nil {
 		out.Usage = llm.Usage{
-			InputTokens:  int(resp.UsageMetadata.PromptTokenCount),
-			OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount),
+			InputTokens:     int(resp.UsageMetadata.PromptTokenCount),
+			OutputTokens:    int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount),
+			CacheReadTokens: int(resp.UsageMetadata.CachedContentTokenCount),
+			ReasoningTokens: int(resp.UsageMetadata.ThoughtsTokenCount),
 		}
 	}
 	if len(resp.Candidates) == 0 {
@@ -78,8 +78,10 @@ func (s *stream) Next() (llm.StreamEvent, error) {

 		if chunk.UsageMetadata != nil {
 			s.usage = llm.Usage{
-				InputTokens:  int(chunk.UsageMetadata.PromptTokenCount),
-				OutputTokens: int(chunk.UsageMetadata.CandidatesTokenCount + chunk.UsageMetadata.ThoughtsTokenCount),
+				InputTokens:     int(chunk.UsageMetadata.PromptTokenCount),
+				OutputTokens:    int(chunk.UsageMetadata.CandidatesTokenCount + chunk.UsageMetadata.ThoughtsTokenCount),
+				CacheReadTokens: int(chunk.UsageMetadata.CachedContentTokenCount),
+				ReasoningTokens: int(chunk.UsageMetadata.ThoughtsTokenCount),
 			}
 		}
 		if len(chunk.Candidates) == 0 {
@@ -130,10 +130,7 @@ func (m *model) apiError(httpResp *http.Response) error {
 func (m *model) toResponse(wire *chatResponse) *llm.Response {
 	resp := &llm.Response{Model: m.p.name + "/" + m.id, Raw: wire}
 	if wire.Usage != nil {
-		resp.Usage = llm.Usage{
-			InputTokens:  wire.Usage.PromptTokens,
-			OutputTokens: wire.Usage.CompletionTokens,
-		}
+		resp.Usage = wire.Usage.toUsage()
 	}
 	if len(wire.Choices) == 0 {
 		resp.FinishReason = llm.FinishOther
@@ -104,10 +104,7 @@ func (s *stream) handleChunk(data []byte) error {
 		return apiErr
 	}
 	if chunk.Usage != nil {
-		s.usage = llm.Usage{
-			InputTokens:  chunk.Usage.PromptTokens,
-			OutputTokens: chunk.Usage.CompletionTokens,
-		}
+		s.usage = chunk.Usage.toUsage()
 	}
 	// Why the guard: the include_usage chunk arrives with an EMPTY choices
 	// array; indexing choices[0] unconditionally would panic on it.
@@ -125,9 +125,32 @@ type wireRespMessage struct {
 }

 type wireUsage struct {
-	PromptTokens     int `json:"prompt_tokens"`
-	CompletionTokens int `json:"completion_tokens"`
-	TotalTokens      int `json:"total_tokens"`
+	PromptTokens            int               `json:"prompt_tokens"`
+	CompletionTokens        int               `json:"completion_tokens"`
+	TotalTokens             int               `json:"total_tokens"`
+	PromptTokensDetails     *wirePromptDetail `json:"prompt_tokens_details"`
+	CompletionTokensDetails *wireOutputDetail `json:"completion_tokens_details"`
+}
+
+type wirePromptDetail struct {
+	CachedTokens int `json:"cached_tokens"`
+}
+
+type wireOutputDetail struct {
+	ReasoningTokens int `json:"reasoning_tokens"`
+}
+
+// toUsage maps wire usage (with optional detail objects — absent on many
+// compat servers) onto the canonical Usage.
+func (u *wireUsage) toUsage() llm.Usage {
+	out := llm.Usage{InputTokens: u.PromptTokens, OutputTokens: u.CompletionTokens}
+	if u.PromptTokensDetails != nil {
+		out.CacheReadTokens = u.PromptTokensDetails.CachedTokens
+	}
+	if u.CompletionTokensDetails != nil {
+		out.ReasoningTokens = u.CompletionTokensDetails.ReasoningTokens
+	}
+	return out
 }

 type errorEnvelope struct {