feat: conversion-driven extensions — resolvers, DefineTool, hooks, ops controls
Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers; DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired through anthropic/openai/google; WithPromptCaching (Anthropic cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer, WithCompactor, WithToolErrorLimits + ErrToolLoop); health Bench/Unbench/Snapshot; ChainConfig.Observer failover events. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -24,6 +24,13 @@ type wireRequest struct {
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
StopSequences []string `json:"stop_sequences,omitempty"`
|
||||
OutputConfig *wireOutputConfig `json:"output_config,omitempty"`
|
||||
// CacheControl is the top-level auto-placement form of prompt caching:
|
||||
// the API puts the breakpoint on the last cacheable block.
|
||||
CacheControl *wireCacheControl `json:"cache_control,omitempty"`
|
||||
}
|
||||
|
||||
type wireCacheControl struct {
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type wireMessage struct {
|
||||
@@ -109,8 +116,10 @@ type wireUsage struct {
|
||||
// real total input is input + cache_creation + cache_read.
|
||||
func (u wireUsage) toUsage() llm.Usage {
|
||||
return llm.Usage{
|
||||
InputTokens: u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
|
||||
OutputTokens: u.OutputTokens,
|
||||
InputTokens: u.InputTokens + u.CacheCreationInputTokens + u.CacheReadInputTokens,
|
||||
OutputTokens: u.OutputTokens,
|
||||
CacheReadTokens: u.CacheReadInputTokens,
|
||||
CacheWriteTokens: u.CacheCreationInputTokens,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,6 +166,11 @@ func buildWireRequest(modelID string, req llm.Request, defaultMax int, stream bo
|
||||
Schema: req.Schema,
|
||||
}}
|
||||
}
|
||||
if req.PromptCache {
|
||||
// Top-level auto-placement: the API puts the cache breakpoint on
|
||||
// the last cacheable block.
|
||||
wr.CacheControl = &wireCacheControl{Type: "ephemeral"}
|
||||
}
|
||||
return wr
|
||||
}
|
||||
|
||||
|
||||
@@ -364,8 +364,10 @@ func (m *model) toResponse(resp *genai.GenerateContentResponse) *llm.Response {
|
||||
out := &llm.Response{Model: m.qualified(), Raw: resp}
|
||||
if resp.UsageMetadata != nil {
|
||||
out.Usage = llm.Usage{
|
||||
InputTokens: int(resp.UsageMetadata.PromptTokenCount),
|
||||
OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount),
|
||||
InputTokens: int(resp.UsageMetadata.PromptTokenCount),
|
||||
OutputTokens: int(resp.UsageMetadata.CandidatesTokenCount + resp.UsageMetadata.ThoughtsTokenCount),
|
||||
CacheReadTokens: int(resp.UsageMetadata.CachedContentTokenCount),
|
||||
ReasoningTokens: int(resp.UsageMetadata.ThoughtsTokenCount),
|
||||
}
|
||||
}
|
||||
if len(resp.Candidates) == 0 {
|
||||
|
||||
@@ -78,8 +78,10 @@ func (s *stream) Next() (llm.StreamEvent, error) {
|
||||
|
||||
if chunk.UsageMetadata != nil {
|
||||
s.usage = llm.Usage{
|
||||
InputTokens: int(chunk.UsageMetadata.PromptTokenCount),
|
||||
OutputTokens: int(chunk.UsageMetadata.CandidatesTokenCount + chunk.UsageMetadata.ThoughtsTokenCount),
|
||||
InputTokens: int(chunk.UsageMetadata.PromptTokenCount),
|
||||
OutputTokens: int(chunk.UsageMetadata.CandidatesTokenCount + chunk.UsageMetadata.ThoughtsTokenCount),
|
||||
CacheReadTokens: int(chunk.UsageMetadata.CachedContentTokenCount),
|
||||
ReasoningTokens: int(chunk.UsageMetadata.ThoughtsTokenCount),
|
||||
}
|
||||
}
|
||||
if len(chunk.Candidates) == 0 {
|
||||
|
||||
@@ -130,10 +130,7 @@ func (m *model) apiError(httpResp *http.Response) error {
|
||||
func (m *model) toResponse(wire *chatResponse) *llm.Response {
|
||||
resp := &llm.Response{Model: m.p.name + "/" + m.id, Raw: wire}
|
||||
if wire.Usage != nil {
|
||||
resp.Usage = llm.Usage{
|
||||
InputTokens: wire.Usage.PromptTokens,
|
||||
OutputTokens: wire.Usage.CompletionTokens,
|
||||
}
|
||||
resp.Usage = wire.Usage.toUsage()
|
||||
}
|
||||
if len(wire.Choices) == 0 {
|
||||
resp.FinishReason = llm.FinishOther
|
||||
|
||||
@@ -104,10 +104,7 @@ func (s *stream) handleChunk(data []byte) error {
|
||||
return apiErr
|
||||
}
|
||||
if chunk.Usage != nil {
|
||||
s.usage = llm.Usage{
|
||||
InputTokens: chunk.Usage.PromptTokens,
|
||||
OutputTokens: chunk.Usage.CompletionTokens,
|
||||
}
|
||||
s.usage = chunk.Usage.toUsage()
|
||||
}
|
||||
// Why the guard: the include_usage chunk arrives with an EMPTY choices
|
||||
// array; indexing choices[0] unconditionally would panic on it.
|
||||
|
||||
+26
-3
@@ -125,9 +125,32 @@ type wireRespMessage struct {
|
||||
}
|
||||
|
||||
type wireUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
PromptTokensDetails *wirePromptDetail `json:"prompt_tokens_details"`
|
||||
CompletionTokensDetails *wireOutputDetail `json:"completion_tokens_details"`
|
||||
}
|
||||
|
||||
type wirePromptDetail struct {
|
||||
CachedTokens int `json:"cached_tokens"`
|
||||
}
|
||||
|
||||
type wireOutputDetail struct {
|
||||
ReasoningTokens int `json:"reasoning_tokens"`
|
||||
}
|
||||
|
||||
// toUsage maps wire usage (with optional detail objects — absent on many
|
||||
// compat servers) onto the canonical Usage.
|
||||
func (u *wireUsage) toUsage() llm.Usage {
|
||||
out := llm.Usage{InputTokens: u.PromptTokens, OutputTokens: u.CompletionTokens}
|
||||
if u.PromptTokensDetails != nil {
|
||||
out.CacheReadTokens = u.PromptTokensDetails.CachedTokens
|
||||
}
|
||||
if u.CompletionTokensDetails != nil {
|
||||
out.ReasoningTokens = u.CompletionTokensDetails.ReasoningTokens
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type errorEnvelope struct {
|
||||
|
||||
Reference in New Issue
Block a user