feat(v2): add ReasoningLevel option; thinking/reasoning across providers

Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
@@ -58,6 +58,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
 	contents, cfg := p.buildRequest(req)

 	var fullText strings.Builder
+	var fullThinking strings.Builder
 	var toolCalls []provider.ToolCall
 	var usage *provider.Usage

@@ -91,10 +92,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
 			}
 			for _, part := range c.Content.Parts {
 				if part.Text != "" {
-					fullText.WriteString(part.Text)
-					events <- provider.StreamEvent{
-						Type: provider.StreamEventText,
-						Text: part.Text,
+					if part.Thought {
+						fullThinking.WriteString(part.Text)
+						events <- provider.StreamEvent{
+							Type: provider.StreamEventThinking,
+							Text: part.Text,
+						}
+					} else {
+						fullText.WriteString(part.Text)
+						events <- provider.StreamEvent{
+							Type: provider.StreamEventText,
+							Text: part.Text,
+						}
 					}
 				}
 				if part.FunctionCall != nil {
@@ -124,6 +133,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
 		Type: provider.StreamEventDone,
 		Response: &provider.Response{
 			Text:      fullText.String(),
+			Thinking:  fullThinking.String(),
 			ToolCalls: toolCalls,
 			Usage:     usage,
 		},
@@ -166,6 +176,18 @@ func (p *Provider) buildRequest(req provider.Request) ([]*genai.Content, *genai.
 		cfg.StopSequences = req.Stop
 	}

+	// Extended thinking via thinking_config. Models that don't support
+	// thinking ignore this field; budgets here mirror the Anthropic
+	// mapping so a single ReasoningLevel produces comparable behavior
+	// across providers.
+	if budget := thinkingBudget(req.Reasoning); budget > 0 {
+		b := int32(budget)
+		cfg.ThinkingConfig = &genai.ThinkingConfig{
+			ThinkingBudget:  &b,
+			IncludeThoughts: true,
+		}
+	}
+
 	for _, msg := range req.Messages {
 		var role genai.Role
 		switch msg.Role {
@@ -286,7 +308,11 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide
 		}
 		for _, part := range c.Content.Parts {
 			if part.Text != "" {
-				res.Text += part.Text
+				if part.Thought {
+					res.Thinking += part.Text
+				} else {
+					res.Text += part.Text
+				}
 			}
 			if part.FunctionCall != nil {
 				args, _ := json.Marshal(part.FunctionCall.Args)
@@ -320,6 +346,29 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide
 	return res, nil
 }

+// Thinking budgets used by Google for low/medium/high reasoning levels.
+// Mirrors the Anthropic mapping so a single go-llm ReasoningLevel produces
+// comparable behavior across providers.
+const (
+	thinkingBudgetLow    = 1024
+	thinkingBudgetMedium = 8000
+	thinkingBudgetHigh   = 24000
+)
+
+// thinkingBudget returns the genai thinking_budget for a go-llm
+// ReasoningLevel, or 0 to disable thinking.
+func thinkingBudget(level string) int {
+	switch level {
+	case "low":
+		return thinkingBudgetLow
+	case "medium":
+		return thinkingBudgetMedium
+	case "high":
+		return thinkingBudgetHigh
+	}
+	return 0
+}
+
 // schemaToGenai converts a JSON Schema map to a genai.Schema.
 func schemaToGenai(s map[string]any) *genai.Schema {
 	if s == nil {