0147a79d18
Phase 9a (ADR-0014): Registry.RegisterResolver for dynamic tiers; DefineTool[Args] typed tools; Usage cache/reasoning detail fields wired through anthropic/openai/google; WithPromptCaching (Anthropic cache_control); agent supervision hooks (WithMaxStepsFunc, WithSteer, WithCompactor, WithToolErrorLimits + ErrToolLoop); health Bench/Unbench/Snapshot; ChainConfig.Observer failover events. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
122 lines
4.1 KiB
Go
122 lines
4.1 KiB
Go
package llm
|
|
|
|
import "encoding/json"
|
|
|
|
// Request is the canonical generation request. Providers translate it to
|
|
// their wire format and enforce their declared Capabilities against it.
|
|
type Request struct {
|
|
// System is the system prompt. Providers map it to their native system
|
|
// mechanism (top-level system field, system message, SystemInstruction).
|
|
// Any RoleSystem messages in Messages are folded in after this field.
|
|
System string
|
|
|
|
// Messages is the conversation so far, oldest first.
|
|
Messages []Message
|
|
|
|
// Tools the model may call.
|
|
Tools []Tool
|
|
|
|
// ToolChoice constrains tool use: "" or "auto" lets the model decide,
|
|
// "none" forbids tool calls, "required" forces some tool call, and any
|
|
// other value names the one tool the model must call.
|
|
ToolChoice string
|
|
|
|
// Schema, when non-nil, is a JSON Schema object the response must
|
|
// conform to (structured output). Providers map it to their native
|
|
// mechanism. SchemaName names the schema for providers that require one.
|
|
Schema json.RawMessage
|
|
SchemaName string
|
|
|
|
// Sampling and limit knobs. Pointer fields distinguish "unset" (provider
|
|
// default) from an explicit zero.
|
|
Temperature *float64
|
|
TopP *float64
|
|
|
|
// MaxTokens caps the response length; 0 means provider default.
|
|
MaxTokens int
|
|
|
|
// StopSequences halt generation when emitted.
|
|
StopSequences []string
|
|
|
|
// ReasoningEffort requests a reasoning/thinking level from models that
|
|
// support one: "low", "medium", or "high" (empty = provider default).
|
|
// Providers map it to their native knob (OpenAI reasoning_effort,
|
|
// Ollama think levels) and ignore it where no mapping exists.
|
|
ReasoningEffort string
|
|
|
|
// PromptCache opts the request into the provider's prompt caching
|
|
// (Anthropic cache_control; ignored by providers that cache
|
|
// automatically or not at all).
|
|
PromptCache bool
|
|
}
|
|
|
|
// Option mutates a Request before it is sent. Options passed to Generate or
|
|
// Stream are applied to a copy of the request, so a Request value can be
|
|
// safely reused across calls.
|
|
type Option func(*Request)
|
|
|
|
// WithSystem sets the system prompt.
|
|
func WithSystem(s string) Option { return func(r *Request) { r.System = s } }
|
|
|
|
// WithTools appends tools to the request.
|
|
func WithTools(tools ...Tool) Option {
|
|
return func(r *Request) { r.Tools = append(r.Tools, tools...) }
|
|
}
|
|
|
|
// WithToolbox appends every tool in the toolbox to the request.
|
|
func WithToolbox(b *Toolbox) Option {
|
|
return func(r *Request) { r.Tools = append(r.Tools, b.Tools()...) }
|
|
}
|
|
|
|
// WithToolChoice sets the tool-choice policy ("auto", "none", "required",
|
|
// or a specific tool name).
|
|
func WithToolChoice(choice string) Option {
|
|
return func(r *Request) { r.ToolChoice = choice }
|
|
}
|
|
|
|
// WithSchema requests structured output conforming to the given JSON Schema.
|
|
// name is optional; providers that require a schema name fall back to
|
|
// "response" when it is empty.
|
|
func WithSchema(schema json.RawMessage, name string) Option {
|
|
return func(r *Request) { r.Schema = schema; r.SchemaName = name }
|
|
}
|
|
|
|
// WithTemperature sets the sampling temperature.
|
|
func WithTemperature(t float64) Option {
|
|
return func(r *Request) { r.Temperature = &t }
|
|
}
|
|
|
|
// WithTopP sets nucleus-sampling top-p.
|
|
func WithTopP(p float64) Option {
|
|
return func(r *Request) { r.TopP = &p }
|
|
}
|
|
|
|
// WithMaxTokens caps the response length.
|
|
func WithMaxTokens(n int) Option { return func(r *Request) { r.MaxTokens = n } }
|
|
|
|
// WithStopSequences sets stop sequences.
|
|
func WithStopSequences(stops ...string) Option {
|
|
return func(r *Request) { r.StopSequences = stops }
|
|
}
|
|
|
|
// WithReasoningEffort requests a reasoning/thinking level ("low", "medium",
|
|
// "high") from models that support one.
|
|
func WithReasoningEffort(level string) Option {
|
|
return func(r *Request) { r.ReasoningEffort = level }
|
|
}
|
|
|
|
// WithPromptCaching opts into provider prompt caching where it is an
|
|
// explicit feature (Anthropic); a no-op elsewhere.
|
|
func WithPromptCaching() Option {
|
|
return func(r *Request) { r.PromptCache = true }
|
|
}
|
|
|
|
// Apply returns a copy of the request with all options applied. Providers
|
|
// and wrappers call this once at the top of Generate/Stream.
|
|
func (r Request) Apply(opts ...Option) Request {
|
|
for _, opt := range opts {
|
|
opt(&r)
|
|
}
|
|
return r
|
|
}
|