majordomo/llm/request.go

package llm

import "encoding/json"

// Request is the canonical generation request. Providers translate it to
// their wire format and enforce their declared Capabilities against it.
type Request struct {
	// System is the system prompt. Providers map it to their native system
	// mechanism (top-level system field, system message, SystemInstruction).
	// Any RoleSystem messages in Messages are folded in after this field.
	System string

	// Messages is the conversation so far, oldest first.
	Messages []Message

	// Tools the model may call.
	Tools []Tool

	// ToolChoice constrains tool use: "" or "auto" lets the model decide,
	// "none" forbids tool calls, "required" forces some tool call, and any
	// other value names the one tool the model must call.
	ToolChoice string

	// Schema, when non-nil, is a JSON Schema object the response must
	// conform to (structured output). Providers map it to their native
	// mechanism. SchemaName names the schema for providers that require one.
	Schema     json.RawMessage
	SchemaName string

	// Sampling and limit knobs. Pointer fields distinguish "unset" (provider
	// default) from an explicit zero.
	Temperature *float64
	TopP        *float64

	// MaxTokens caps the response length; 0 means provider default.
	MaxTokens int

	// StopSequences halt generation when emitted.
	StopSequences []string

	// ReasoningEffort requests a reasoning/thinking level from models that
	// support one: "low", "medium", or "high" (empty = provider default).
	// Providers map it to their native knob (OpenAI reasoning_effort,
	// Ollama think levels) and ignore it where no mapping exists.
	ReasoningEffort string

	// PromptCache opts the request into the provider's prompt caching
	// (Anthropic cache_control; ignored by providers that cache
	// automatically or not at all).
	PromptCache bool
}

// Option mutates a Request before it is sent. Options passed to Generate or
// Stream are applied to a copy of the request, so a Request value can be
// safely reused across calls.
type Option func(*Request)

// WithSystem sets the system prompt.
func WithSystem(s string) Option { return func(r *Request) { r.System = s } }

// WithTools appends tools to the request.
func WithTools(tools ...Tool) Option {
	return func(r *Request) { r.Tools = append(r.Tools, tools...) }
}

// WithToolbox appends every tool in the toolbox to the request.
func WithToolbox(b *Toolbox) Option {
	return func(r *Request) { r.Tools = append(r.Tools, b.Tools()...) }
}

// WithToolChoice sets the tool-choice policy ("auto", "none", "required",
// or a specific tool name).
func WithToolChoice(choice string) Option {
	return func(r *Request) { r.ToolChoice = choice }
}

// WithSchema requests structured output conforming to the given JSON Schema.
// name is optional; providers that require a schema name fall back to
// "response" when it is empty.
func WithSchema(schema json.RawMessage, name string) Option {
	return func(r *Request) { r.Schema = schema; r.SchemaName = name }
}

// WithTemperature sets the sampling temperature.
func WithTemperature(t float64) Option {
	return func(r *Request) { r.Temperature = &t }
}

// WithTopP sets nucleus-sampling top-p.
func WithTopP(p float64) Option {
	return func(r *Request) { r.TopP = &p }
}

// WithMaxTokens caps the response length.
func WithMaxTokens(n int) Option { return func(r *Request) { r.MaxTokens = n } }

// WithStopSequences sets stop sequences.
func WithStopSequences(stops ...string) Option {
	return func(r *Request) { r.StopSequences = stops }
}

// WithReasoningEffort requests a reasoning/thinking level ("low", "medium",
// "high") from models that support one.
func WithReasoningEffort(level string) Option {
	return func(r *Request) { r.ReasoningEffort = level }
}

// WithPromptCaching opts into provider prompt caching where it is an
// explicit feature (Anthropic); a no-op elsewhere.
func WithPromptCaching() Option {
	return func(r *Request) { r.PromptCache = true }
}

// Apply returns a copy of the request with all options applied. Providers
// and wrappers call this once at the top of Generate/Stream.
func (r Request) Apply(opts ...Option) Request {
	for _, opt := range opts {
		opt(&r)
	}
	return r
}