feat: conversion-driven extensions — resolvers, DefineTool, hooks, ops controls
CI / Tidy (push) Successful in 9m31s
CI / Build & Test (push) Successful in 10m13s

Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers;
DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired
through anthropic/openai/google; WithPromptCaching (Anthropic
cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer,
WithCompactor, WithToolErrorLimits + ErrToolLoop); health
Bench/Unbench/Snapshot; ChainConfig.Observer failover events.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 13:30:06 +02:00
parent 04b21fdad2
commit 0147a79d18
21 changed files with 767 additions and 29 deletions
+16 -2
View File
@@ -24,6 +24,13 @@ type wireRequest struct {
TopP *float64 `json:"top_p,omitempty"`
StopSequences []string `json:"stop_sequences,omitempty"`
OutputConfig *wireOutputConfig `json:"output_config,omitempty"`
// CacheControl is the top-level auto-placement form of prompt caching:
// the API puts the breakpoint on the last cacheable block.
CacheControl *wireCacheControl `json:"cache_control,omitempty"`
}
type wireCacheControl struct {
Type string `json:"type"`
}
type wireMessage struct {
@@ -109,8 +116,10 @@ type wireUsage struct {
// real total input is input + cache_creation + cache_read.
func (u wireUsage) toUsage() llm.Usage {
return llm.Usage{
InputTokens: u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
OutputTokens: u.OutputTokens,
InputTokens: u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
OutputTokens: u.OutputTokens,
CacheReadTokens: u.CacheReadInputTokens,
CacheWriteTokens: u.CacheCreationInputTokens,
}
}
@@ -157,6 +166,11 @@ func buildWireRequest(modelID string, req llm.Request, defaultMax int, stream bo
Schema: req.Schema,
}}
}
if req.PromptCache {
// Top-level auto-placement: the API puts the cache breakpoint on
// the last cacheable block.
wr.CacheControl = &wireCacheControl{Type: "ephemeral"}
}
return wr
}
+4 -2
View File
@@ -364,8 +364,10 @@ func (m *model) toResponse(resp *genai.GenerateContentResponse) *llm.Response {
out := &llm.Response{Model: m.qualified(), Raw: resp}
if resp.UsageMetadata != nil {
out.Usage = llm.Usage{
InputTokens: int(resp.UsageMetadata.PromptTokenCount),
OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount),
InputTokens: int(resp.UsageMetadata.PromptTokenCount),
OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount),
CacheReadTokens: int(resp.UsageMetadata.CachedContentTokenCount),
ReasoningTokens: int(resp.UsageMetadata.ThoughtsTokenCount),
}
}
if len(resp.Candidates) == 0 {
+4 -2
View File
@@ -78,8 +78,10 @@ func (s *stream) Next() (llm.StreamEvent, error) {
if chunk.UsageMetadata != nil {
s.usage = llm.Usage{
InputTokens: int(chunk.UsageMetadata.PromptTokenCount),
OutputTokens: int(chunk.UsageMetadata.CandidatesTokenCount + chunk.UsageMetadata.ThoughtsTokenCount),
InputTokens: int(chunk.UsageMetadata.PromptTokenCount),
OutputTokens: int(chunk.UsageMetadata.CandidatesTokenCount + chunk.UsageMetadata.ThoughtsTokenCount),
CacheReadTokens: int(chunk.UsageMetadata.CachedContentTokenCount),
ReasoningTokens: int(chunk.UsageMetadata.ThoughtsTokenCount),
}
}
if len(chunk.Candidates) == 0 {
+1 -4
View File
@@ -130,10 +130,7 @@ func (m *model) apiError(httpResp *http.Response) error {
func (m *model) toResponse(wire *chatResponse) *llm.Response {
resp := &llm.Response{Model: m.p.name + "/" + m.id, Raw: wire}
if wire.Usage != nil {
resp.Usage = llm.Usage{
InputTokens: wire.Usage.PromptTokens,
OutputTokens: wire.Usage.CompletionTokens,
}
resp.Usage = wire.Usage.toUsage()
}
if len(wire.Choices) == 0 {
resp.FinishReason = llm.FinishOther
+1 -4
View File
@@ -104,10 +104,7 @@ func (s *stream) handleChunk(data []byte) error {
return apiErr
}
if chunk.Usage != nil {
s.usage = llm.Usage{
InputTokens: chunk.Usage.PromptTokens,
OutputTokens: chunk.Usage.CompletionTokens,
}
s.usage = chunk.Usage.toUsage()
}
// Why the guard: the include_usage chunk arrives with an EMPTY choices
// array; indexing choices[0] unconditionally would panic on it.
+26 -3
View File
@@ -125,9 +125,32 @@ type wireRespMessage struct {
}
type wireUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptTokensDetails *wirePromptDetail `json:"prompt_tokens_details"`
CompletionTokensDetails *wireOutputDetail `json:"completion_tokens_details"`
}
type wirePromptDetail struct {
CachedTokens int `json:"cached_tokens"`
}
type wireOutputDetail struct {
ReasoningTokens int `json:"reasoning_tokens"`
}
// toUsage maps wire usage (with optional detail objects — absent on many
// compat servers) onto the canonical Usage.
func (u *wireUsage) toUsage() llm.Usage {
out := llm.Usage{InputTokens: u.PromptTokens, OutputTokens: u.CompletionTokens}
if u.PromptTokensDetails != nil {
out.CacheReadTokens = u.PromptTokensDetails.CachedTokens
}
if u.CompletionTokensDetails != nil {
out.ReasoningTokens = u.CompletionTokensDetails.ReasoningTokens
}
return out
}
type errorEnvelope struct {