feat: conversion-driven extensions — resolvers, DefineTool, hooks, ops controls

Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers; DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired through anthropic/openai/google; WithPromptCaching (Anthropic cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer, WithCompactor, WithToolErrorLimits + ErrToolLoop); health Bench/Unbench/Snapshot; ChainConfig.Observer failover events. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 13:30:06 +02:00
parent 04b21fdad2
commit 0147a79d18
21 changed files with 767 additions and 29 deletions
@@ -43,6 +43,11 @@ type Request struct {
 	// Providers map it to their native knob (OpenAI reasoning_effort,
 	// Ollama think levels) and ignore it where no mapping exists.
 	ReasoningEffort string
+
+	// PromptCache opts the request into the provider's prompt caching
+	// (Anthropic cache_control; ignored by providers that cache
+	// automatically or not at all).
+	PromptCache bool
 }

 // Option mutates a Request before it is sent. Options passed to Generate or
@@ -100,6 +105,12 @@ func WithReasoningEffort(level string) Option {
 	return func(r *Request) { r.ReasoningEffort = level }
 }

+// WithPromptCaching opts into provider prompt caching where it is an
+// explicit feature (Anthropic); a no-op elsewhere.
+func WithPromptCaching() Option {
+	return func(r *Request) { r.PromptCache = true }
+}
+
 // Apply returns a copy of the request with all options applied. Providers
 // and wrappers call this once at the top of Generate/Stream.
 func (r Request) Apply(opts ...Option) Request {
@@ -18,10 +18,22 @@ const (
 	FinishOther FinishReason = "other"
 )

-// Usage reports token accounting for one request.
+// Usage reports token accounting for one request. InputTokens and
+// OutputTokens are always totals; the detail fields break out portions of
+// those totals where the provider reports them (0 = not reported).
 type Usage struct {
 	InputTokens  int
 	OutputTokens int
+
+	// CacheReadTokens is the portion of InputTokens served from the
+	// provider's prompt cache.
+	CacheReadTokens int
+	// CacheWriteTokens is the portion of InputTokens written to the
+	// provider's prompt cache.
+	CacheWriteTokens int
+	// ReasoningTokens is the portion of OutputTokens spent on
+	// thinking/reasoning.
+	ReasoningTokens int
 }

 // Total returns input plus output tokens.
@@ -31,6 +43,9 @@ func (u Usage) Total() int { return u.InputTokens + u.OutputTokens }
 func (u *Usage) Add(o Usage) {
 	u.InputTokens += o.InputTokens
 	u.OutputTokens += o.OutputTokens
+	u.CacheReadTokens += o.CacheReadTokens
+	u.CacheWriteTokens += o.CacheWriteTokens
+	u.ReasoningTokens += o.ReasoningTokens
 }

 // Response is the canonical generation result.
@@ -22,6 +22,40 @@ type Tool struct {
 	Handler func(ctx context.Context, args json.RawMessage) (any, error)
 }

+// DefineTool builds a typed tool: the parameter schema is derived from
+// Args (see SchemaFor) and the raw JSON arguments are unmarshaled into an
+// Args value before the handler runs.
+//
+//	weather := llm.DefineTool("get_weather", "Current weather for a city",
+//	    func(ctx context.Context, args struct {
+//	        City string `json:"city" description:"city name"`
+//	    }) (any, error) {
+//	        return lookup(args.City)
+//	    })
+//
+// Schema derivation failures panic: tools are defined at startup and an
+// unschematizable Args type is a programming error worth failing loudly on.
+func DefineTool[Args any](name, description string, fn func(ctx context.Context, args Args) (any, error)) Tool {
+	schema, err := SchemaFor[Args]()
+	if err != nil {
+		panic(fmt.Sprintf("llm: DefineTool(%q): %v", name, err))
+	}
+	return Tool{
+		Name:        name,
+		Description: description,
+		Parameters:  schema,
+		Handler: func(ctx context.Context, raw json.RawMessage) (any, error) {
+			var args Args
+			if len(raw) > 0 {
+				if err := json.Unmarshal(raw, &args); err != nil {
+					return nil, fmt.Errorf("invalid arguments for %s: %w", name, err)
+				}
+			}
+			return fn(ctx, args)
+		},
+	}
+}
+
 // ToolCall is a model's request to invoke a tool.
 type ToolCall struct {
 	// ID is the provider-assigned call id; majordomo synthesizes one for