feat(v2): add ReasoningLevel option; thinking/reasoning across providers
CI / Root Module (push) Failing after 1m30s
CI / Lint (push) Failing after 1m1s
CI / V2 Module (push) Successful in 3m41s

Introduces an opt-in level-based reasoning toggle (low/medium/high) that
each provider translates to its native parameter:

- Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature
  forced to default and MaxTokens auto-grown above the budget.
- OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a
  new Rules.SupportsReasoning predicate so non-reasoning models don't
  receive the parameter. xAI uses Rules.MapReasoningEffort to remap
  "medium" to "high" since its API only accepts low|high.
- Google: thinking_config.thinking_budget + include_thoughts:true.
- DeepSeek: SupportsReasoning=false (reasoner is always-on; the
  reasoning_content trace was already extracted via openaicompat).

Reasoning content is surfaced as Response.Thinking on Complete and as
StreamEventThinking deltas during streaming. Provider-side: extracted
from Anthropic thinking content blocks, Google's part.Thought=true
parts, and the non-standard reasoning_content field that DeepSeek and
Groq emit (parsed out of raw JSON since openai-go doesn't type it).

Public API:
  - llm.ReasoningLevel + ReasoningLow/Medium/High constants
  - llm.WithReasoning(level) request option
  - Model.WithReasoning(level) for baked-in defaults
  - provider.Request.Reasoning, provider.Response.Thinking
  - provider.StreamEventThinking

Tests cover Rules-based gating, MapReasoningEffort, reasoning_content
extraction (Complete + Stream), Anthropic budget mapping, and
temperature suppression when thinking is enabled. Existing behavior is
unchanged when Reasoning is the empty string.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
+61 -5
View File
@@ -49,10 +49,20 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
resp, err := cl.CreateMessagesStream(ctx, anth.MessagesStreamRequest{
MessagesRequest: anthReq,
OnContentBlockDelta: func(data anth.MessagesEventContentBlockDeltaData) {
if data.Delta.Type == "text_delta" && data.Delta.Text != nil {
events <- provider.StreamEvent{
Type: provider.StreamEventText,
Text: *data.Delta.Text,
switch data.Delta.Type {
case anth.MessagesContentTypeTextDelta:
if data.Delta.Text != nil {
events <- provider.StreamEvent{
Type: provider.StreamEventText,
Text: *data.Delta.Text,
}
}
case anth.MessagesContentTypeThinkingDelta:
if data.Delta.MessageContentThinking != nil {
events <- provider.StreamEvent{
Type: provider.StreamEventThinking,
Text: data.Delta.Thinking,
}
}
}
},
@@ -71,6 +81,28 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
return nil
}
// Thinking budgets used by Anthropic for low/medium/high reasoning levels.
// Must each be >= 1024 (Anthropic minimum) and strictly less than MaxTokens.
const (
thinkingBudgetLow = 1024
thinkingBudgetMedium = 8000
thinkingBudgetHigh = 24000
)
// thinkingBudget returns the Anthropic budget_tokens value for a go-llm
// ReasoningLevel string. Returns 0 to mean "no thinking" / pass-through.
func thinkingBudget(level string) int {
switch level {
case "low":
return thinkingBudgetLow
case "medium":
return thinkingBudgetMedium
case "high":
return thinkingBudgetHigh
}
return 0
}
func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
anthReq := anth.MessagesRequest{
Model: anth.Model(req.Model),
@@ -81,6 +113,20 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
anthReq.MaxTokens = *req.MaxTokens
}
// Extended thinking. Setting Thinking forces temperature to be unset
// (Anthropic only allows the default of 1.0) and requires MaxTokens to
// strictly exceed BudgetTokens. We grow MaxTokens if the caller's value
// is too small, so callers don't have to reason about budget arithmetic.
if budget := thinkingBudget(req.Reasoning); budget > 0 {
anthReq.Thinking = &anth.Thinking{
Type: anth.ThinkingTypeEnabled,
BudgetTokens: budget,
}
if anthReq.MaxTokens <= budget {
anthReq.MaxTokens = budget + 4096
}
}
var msgs []anth.Message
var systemText string
@@ -259,7 +305,11 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
}
}
if req.Temperature != nil {
// Anthropic rejects a non-default temperature when extended thinking is
// enabled. Drop the caller's value silently in that case rather than
// erroring — the alternative is forcing every caller to reset
// temperature when they enable thinking.
if req.Temperature != nil && anthReq.Thinking == nil {
f := float32(*req.Temperature)
anthReq.Temperature = &f
}
@@ -307,6 +357,7 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response {
var res provider.Response
var textParts []string
var thinkingParts []string
for _, block := range resp.Content {
switch block.Type {
@@ -314,6 +365,10 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
if block.Text != nil {
textParts = append(textParts, *block.Text)
}
case anth.MessagesContentTypeThinking:
if block.MessageContentThinking != nil {
thinkingParts = append(thinkingParts, block.Thinking)
}
case anth.MessagesContentTypeToolUse:
if block.MessageContentToolUse != nil {
args, _ := json.Marshal(block.MessageContentToolUse.Input)
@@ -327,6 +382,7 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
}
res.Text = strings.Join(textParts, "")
res.Thinking = strings.Join(thinkingParts, "")
res.Usage = &provider.Usage{
InputTokens: resp.Usage.InputTokens,