feat(v2): add ReasoningLevel option; thinking/reasoning across providers

Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
@@ -38,17 +38,24 @@ func (c *Client) WithMiddleware(mw ...Middleware) *Client {

 // Model represents a specific model from a provider, ready for completions.
 type Model struct {
-	provider   provider.Provider
-	model      string
-	middleware []Middleware
+	provider         provider.Provider
+	model            string
+	middleware       []Middleware
+	defaultReasoning ReasoningLevel
+}
+
+// WithReasoning returns a copy of the Model that uses the given reasoning
+// level by default on every Complete/Stream/Chat call. Per-request use of the
+// WithReasoning request option still takes precedence.
+func (m *Model) WithReasoning(level ReasoningLevel) *Model {
+	c := *m
+	c.defaultReasoning = level
+	return &c
 }

 // Complete sends a non-streaming completion request.
 func (m *Model) Complete(ctx context.Context, messages []Message, opts ...RequestOption) (Response, error) {
-	cfg := &requestConfig{}
-	for _, opt := range opts {
-		opt(cfg)
-	}
+	cfg := m.newRequestConfig(opts)

 	chain := m.buildChain()
 	return chain(ctx, m.model, messages, cfg)
@@ -56,15 +63,24 @@ func (m *Model) Complete(ctx context.Context, messages []Message, opts ...Reques

 // Stream sends a streaming completion request, returning a StreamReader.
 func (m *Model) Stream(ctx context.Context, messages []Message, opts ...RequestOption) (*StreamReader, error) {
-	cfg := &requestConfig{}
-	for _, opt := range opts {
-		opt(cfg)
-	}
+	cfg := m.newRequestConfig(opts)

 	req := buildProviderRequest(m.model, messages, cfg)
 	return newStreamReader(ctx, m.provider, req)
 }

+// newRequestConfig builds a requestConfig pre-populated with the Model's
+// defaults, then applies per-call options on top.
+func (m *Model) newRequestConfig(opts []RequestOption) *requestConfig {
+	cfg := &requestConfig{
+		reasoning: m.defaultReasoning,
+	}
+	for _, opt := range opts {
+		opt(cfg)
+	}
+	return cfg
+}
+
 // WithMiddleware returns a new Model with additional middleware applied.
 func (m *Model) WithMiddleware(mw ...Middleware) *Model {
 	return &Model{
@@ -111,6 +127,9 @@ func buildProviderRequest(model string, messages []Message, cfg *requestConfig)
 	if len(cfg.stop) > 0 {
 		req.Stop = cfg.stop
 	}
+	if cfg.reasoning != "" {
+		req.Reasoning = string(cfg.reasoning)
+	}

 	if cfg.tools != nil {
 		for _, tool := range cfg.tools.AllTools() {
@@ -181,7 +200,8 @@ func convertMessages(msgs []Message) []provider.Message {

 func convertProviderResponse(resp provider.Response) Response {
 	r := Response{
-		Text: resp.Text,
+		Text:     resp.Text,
+		Thinking: resp.Thinking,
 	}

 	for _, tc := range resp.ToolCalls {