feat(v2): add ReasoningLevel option; thinking/reasoning across providers

Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
@@ -12,6 +12,7 @@ package openaicompat
 import (
 	"context"
 	"encoding/base64"
+	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@@ -66,6 +67,19 @@ type Rules struct {
 	// parameters and may mutate them freely (add headers, flip flags, tweak
 	// response_format, etc.).
 	CustomizeRequest func(params *openai.ChatCompletionNewParams)
+
+	// SupportsReasoning, when non-nil and returning false for the request's
+	// model, causes the request's Reasoning field to be silently dropped
+	// from the outgoing request. Used by providers (e.g., OpenAI) where
+	// reasoning_effort is rejected on non-reasoning models. nil = always
+	// pass reasoning_effort through when set.
+	SupportsReasoning func(model string) bool
+
+	// MapReasoningEffort, when non-nil, maps the standardized go-llm
+	// ReasoningLevel ("low"|"medium"|"high") to the provider's wire-level
+	// effort string. Used by xAI which only accepts "low"|"high" (callers
+	// remap "medium" to "high"). nil = pass-through unchanged.
+	MapReasoningEffort func(level string) string
 }

 // FeatureUnsupportedError is returned when a Rules predicate rejects a request
@@ -130,6 +144,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
 	stream := cl.Chat.Completions.NewStreaming(ctx, oaiReq)

 	var fullText strings.Builder
+	var fullThinking strings.Builder
 	var toolCalls []provider.ToolCall
 	toolCallArgs := map[int]*strings.Builder{}
 	var usage *provider.Usage
@@ -157,6 +172,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
 				}
 			}

+			// Reasoning/thinking delta — DeepSeek and Groq use a non-standard
+			// "reasoning_content" field on the delta. Extract it from the
+			// raw JSON since the OpenAI SDK doesn't surface it as a typed
+			// field.
+			if rc := extractReasoningContent(choice.Delta.RawJSON()); rc != "" {
+				fullThinking.WriteString(rc)
+				events <- provider.StreamEvent{
+					Type: provider.StreamEventThinking,
+					Text: rc,
+				}
+			}
+
 			// Tool call deltas
 			for _, tc := range choice.Delta.ToolCalls {
 				idx := int(tc.Index)
@@ -216,6 +243,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
 		Type: provider.StreamEventDone,
 		Response: &provider.Response{
 			Text:      fullText.String(),
+			Thinking:  fullThinking.String(),
 			ToolCalls: toolCalls,
 			Usage:     usage,
 		},
@@ -303,6 +331,16 @@ func (p *Provider) buildRequest(req provider.Request) openai.ChatCompletionNewPa
 		oaiReq.Stop = openai.ChatCompletionNewParamsStopUnion{OfString: openai.String(req.Stop[0])}
 	}

+	if req.Reasoning != "" {
+		if p.rules.SupportsReasoning == nil || p.rules.SupportsReasoning(req.Model) {
+			effort := req.Reasoning
+			if p.rules.MapReasoningEffort != nil {
+				effort = p.rules.MapReasoningEffort(effort)
+			}
+			oaiReq.ReasoningEffort = shared.ReasoningEffort(effort)
+		}
+	}
+
 	return oaiReq
 }

@@ -468,6 +506,7 @@ func (p *Provider) convertResponse(resp *openai.ChatCompletion) provider.Respons

 	choice := resp.Choices[0]
 	res.Text = choice.Message.Content
+	res.Thinking = extractReasoningContent(choice.Message.RawJSON())

 	for _, tc := range choice.Message.ToolCalls {
 		res.ToolCalls = append(res.ToolCalls, provider.ToolCall{
@@ -523,6 +562,25 @@ func extractUsageDetails(usage openai.CompletionUsage) map[string]int {
 	return details
 }

+// extractReasoningContent pulls the non-standard "reasoning_content" string
+// from the raw JSON of a message or delta. DeepSeek's reasoner and several
+// Groq-hosted reasoning models put their thinking trace in this field rather
+// than in OpenAI's standard "reasoning_summary" blocks; the OpenAI Go SDK
+// doesn't surface it as a typed field, so we re-parse the raw JSON. Returns
+// empty string when the field is absent or unparseable.
+func extractReasoningContent(rawJSON string) string {
+	if rawJSON == "" || !strings.Contains(rawJSON, "reasoning_content") {
+		return ""
+	}
+	var d struct {
+		ReasoningContent string `json:"reasoning_content"`
+	}
+	if err := json.Unmarshal([]byte(rawJSON), &d); err != nil {
+		return ""
+	}
+	return d.ReasoningContent
+}
+
 // audioFormatFromURL guesses the audio format from a URL's file extension.
 func audioFormatFromURL(u string) string {
 	ext := strings.ToLower(path.Ext(u))