feat(v2): add ReasoningLevel option; thinking/reasoning across providers
CI / Root Module (push) Failing after 1m30s
CI / Lint (push) Failing after 1m1s
CI / V2 Module (push) Successful in 3m41s

Introduces an opt-in level-based reasoning toggle (low/medium/high) that
each provider translates to its native parameter:

- Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature
  forced to default and MaxTokens auto-grown above the budget.
- OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a
  new Rules.SupportsReasoning predicate so non-reasoning models don't
  receive the parameter. xAI uses Rules.MapReasoningEffort to remap
  "medium" to "high" since its API only accepts low|high.
- Google: thinking_config.thinking_budget + include_thoughts:true.
- DeepSeek: SupportsReasoning=false (reasoner is always-on; the
  reasoning_content trace was already extracted via openaicompat).

Reasoning content is surfaced as Response.Thinking on Complete and as
StreamEventThinking deltas during streaming. Provider-side: extracted
from Anthropic thinking content blocks, Google's part.Thought=true
parts, and the non-standard reasoning_content field that DeepSeek and
Groq emit (parsed out of raw JSON since openai-go doesn't type it).

Public API:
  - llm.ReasoningLevel + ReasoningLow/Medium/High constants
  - llm.WithReasoning(level) request option
  - Model.WithReasoning(level) for baked-in defaults
  - provider.Request.Reasoning, provider.Response.Thinking
  - provider.StreamEventThinking

Tests cover Rules-based gating, MapReasoningEffort, reasoning_content
extraction (Complete + Stream), Anthropic budget mapping, and
temperature suppression when thinking is enabled. Existing behavior is
unchanged when Reasoning is the empty string.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
+54 -5
View File
@@ -58,6 +58,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
contents, cfg := p.buildRequest(req)
var fullText strings.Builder
var fullThinking strings.Builder
var toolCalls []provider.ToolCall
var usage *provider.Usage
@@ -91,10 +92,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
}
for _, part := range c.Content.Parts {
if part.Text != "" {
fullText.WriteString(part.Text)
events <- provider.StreamEvent{
Type: provider.StreamEventText,
Text: part.Text,
if part.Thought {
fullThinking.WriteString(part.Text)
events <- provider.StreamEvent{
Type: provider.StreamEventThinking,
Text: part.Text,
}
} else {
fullText.WriteString(part.Text)
events <- provider.StreamEvent{
Type: provider.StreamEventText,
Text: part.Text,
}
}
}
if part.FunctionCall != nil {
@@ -124,6 +133,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
Type: provider.StreamEventDone,
Response: &provider.Response{
Text: fullText.String(),
Thinking: fullThinking.String(),
ToolCalls: toolCalls,
Usage: usage,
},
@@ -166,6 +176,18 @@ func (p *Provider) buildRequest(req provider.Request) ([]*genai.Content, *genai.
cfg.StopSequences = req.Stop
}
// Extended thinking via thinking_config. Models that don't support
// thinking ignore this field; budgets here mirror the Anthropic
// mapping so a single ReasoningLevel produces comparable behavior
// across providers.
if budget := thinkingBudget(req.Reasoning); budget > 0 {
b := int32(budget)
cfg.ThinkingConfig = &genai.ThinkingConfig{
ThinkingBudget: &b,
IncludeThoughts: true,
}
}
for _, msg := range req.Messages {
var role genai.Role
switch msg.Role {
@@ -286,7 +308,11 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide
}
for _, part := range c.Content.Parts {
if part.Text != "" {
res.Text += part.Text
if part.Thought {
res.Thinking += part.Text
} else {
res.Text += part.Text
}
}
if part.FunctionCall != nil {
args, _ := json.Marshal(part.FunctionCall.Args)
@@ -320,6 +346,29 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide
return res, nil
}
// Thinking budgets used by Google for low/medium/high reasoning levels.
// Mirrors the Anthropic mapping so a single go-llm ReasoningLevel produces
// comparable behavior across providers.
const (
thinkingBudgetLow = 1024
thinkingBudgetMedium = 8000
thinkingBudgetHigh = 24000
)
// thinkingBudget returns the genai thinking_budget for a go-llm
// ReasoningLevel, or 0 to disable thinking.
func thinkingBudget(level string) int {
switch level {
case "low":
return thinkingBudgetLow
case "medium":
return thinkingBudgetMedium
case "high":
return thinkingBudgetHigh
}
return 0
}
// schemaToGenai converts a JSON Schema map to a genai.Schema.
func schemaToGenai(s map[string]any) *genai.Schema {
if s == nil {