323558ed72
Groundwork for the provider phase: reasoning levels map to native knobs (OpenAI reasoning_effort, Ollama think); ErrUnsupported marks declared capability mismatches that chains advance past without health penalty. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
111 lines
3.7 KiB
Go
111 lines
3.7 KiB
Go
package llm
|
|
|
|
import "encoding/json"
|
|
|
|
// Request is the canonical generation request. Providers translate it to
|
|
// their wire format and enforce their declared Capabilities against it.
|
|
type Request struct {
|
|
// System is the system prompt. Providers map it to their native system
|
|
// mechanism (top-level system field, system message, SystemInstruction).
|
|
// Any RoleSystem messages in Messages are folded in after this field.
|
|
System string
|
|
|
|
// Messages is the conversation so far, oldest first.
|
|
Messages []Message
|
|
|
|
// Tools the model may call.
|
|
Tools []Tool
|
|
|
|
// ToolChoice constrains tool use: "" or "auto" lets the model decide,
|
|
// "none" forbids tool calls, "required" forces some tool call, and any
|
|
// other value names the one tool the model must call.
|
|
ToolChoice string
|
|
|
|
// Schema, when non-nil, is a JSON Schema object the response must
|
|
// conform to (structured output). Providers map it to their native
|
|
// mechanism. SchemaName names the schema for providers that require one.
|
|
Schema json.RawMessage
|
|
SchemaName string
|
|
|
|
// Sampling and limit knobs. Pointer fields distinguish "unset" (provider
|
|
// default) from an explicit zero.
|
|
Temperature *float64
|
|
TopP *float64
|
|
|
|
// MaxTokens caps the response length; 0 means provider default.
|
|
MaxTokens int
|
|
|
|
// StopSequences halt generation when emitted.
|
|
StopSequences []string
|
|
|
|
// ReasoningEffort requests a reasoning/thinking level from models that
|
|
// support one: "low", "medium", or "high" (empty = provider default).
|
|
// Providers map it to their native knob (OpenAI reasoning_effort,
|
|
// Ollama think levels) and ignore it where no mapping exists.
|
|
ReasoningEffort string
|
|
}
|
|
|
|
// Option mutates a Request before it is sent. Options passed to Generate or
|
|
// Stream are applied to a copy of the request, so a Request value can be
|
|
// safely reused across calls.
|
|
type Option func(*Request)
|
|
|
|
// WithSystem sets the system prompt.
|
|
func WithSystem(s string) Option { return func(r *Request) { r.System = s } }
|
|
|
|
// WithTools appends tools to the request.
|
|
func WithTools(tools ...Tool) Option {
|
|
return func(r *Request) { r.Tools = append(r.Tools, tools...) }
|
|
}
|
|
|
|
// WithToolbox appends every tool in the toolbox to the request.
|
|
func WithToolbox(b *Toolbox) Option {
|
|
return func(r *Request) { r.Tools = append(r.Tools, b.Tools()...) }
|
|
}
|
|
|
|
// WithToolChoice sets the tool-choice policy ("auto", "none", "required",
|
|
// or a specific tool name).
|
|
func WithToolChoice(choice string) Option {
|
|
return func(r *Request) { r.ToolChoice = choice }
|
|
}
|
|
|
|
// WithSchema requests structured output conforming to the given JSON Schema.
|
|
// name is optional; providers that require a schema name fall back to
|
|
// "response" when it is empty.
|
|
func WithSchema(schema json.RawMessage, name string) Option {
|
|
return func(r *Request) { r.Schema = schema; r.SchemaName = name }
|
|
}
|
|
|
|
// WithTemperature sets the sampling temperature.
|
|
func WithTemperature(t float64) Option {
|
|
return func(r *Request) { r.Temperature = &t }
|
|
}
|
|
|
|
// WithTopP sets nucleus-sampling top-p.
|
|
func WithTopP(p float64) Option {
|
|
return func(r *Request) { r.TopP = &p }
|
|
}
|
|
|
|
// WithMaxTokens caps the response length.
|
|
func WithMaxTokens(n int) Option { return func(r *Request) { r.MaxTokens = n } }
|
|
|
|
// WithStopSequences sets stop sequences.
|
|
func WithStopSequences(stops ...string) Option {
|
|
return func(r *Request) { r.StopSequences = stops }
|
|
}
|
|
|
|
// WithReasoningEffort requests a reasoning/thinking level ("low", "medium",
|
|
// "high") from models that support one.
|
|
func WithReasoningEffort(level string) Option {
|
|
return func(r *Request) { r.ReasoningEffort = level }
|
|
}
|
|
|
|
// Apply returns a copy of the request with all options applied. Providers
|
|
// and wrappers call this once at the top of Generate/Stream.
|
|
func (r Request) Apply(opts ...Option) Request {
|
|
for _, opt := range opts {
|
|
opt(&r)
|
|
}
|
|
return r
|
|
}
|