feat(v2): add ReasoningLevel option; thinking/reasoning across providers
CI / Root Module (push) Failing after 1m30s
CI / Lint (push) Failing after 1m1s
CI / V2 Module (push) Successful in 3m41s

Introduces an opt-in level-based reasoning toggle (low/medium/high) that
each provider translates to its native parameter:

- Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature
  forced to default and MaxTokens auto-grown above the budget.
- OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a
  new Rules.SupportsReasoning predicate so non-reasoning models don't
  receive the parameter. xAI uses Rules.MapReasoningEffort to remap
  "medium" to "high" since its API only accepts low|high.
- Google: thinking_config.thinking_budget + include_thoughts:true.
- DeepSeek: SupportsReasoning=false (reasoner is always-on; the
  reasoning_content trace was already extracted via openaicompat).

Reasoning content is surfaced as Response.Thinking on Complete and as
StreamEventThinking deltas during streaming. Provider-side: extracted
from Anthropic thinking content blocks, Google's part.Thought=true
parts, and the non-standard reasoning_content field that DeepSeek and
Groq emit (parsed out of raw JSON since openai-go doesn't type it).

Public API:
  - llm.ReasoningLevel + ReasoningLow/Medium/High constants
  - llm.WithReasoning(level) request option
  - Model.WithReasoning(level) for baked-in defaults
  - provider.Request.Reasoning, provider.Response.Thinking
  - provider.StreamEventThinking

Tests cover Rules-based gating, MapReasoningEffort, reasoning_content
extraction (Complete + Stream), Anthropic budget mapping, and
temperature suppression when thinking is enabled. Existing behavior is
unchanged when Reasoning is the empty string.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
+61 -5
View File
@@ -49,10 +49,20 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
resp, err := cl.CreateMessagesStream(ctx, anth.MessagesStreamRequest{
MessagesRequest: anthReq,
OnContentBlockDelta: func(data anth.MessagesEventContentBlockDeltaData) {
if data.Delta.Type == "text_delta" && data.Delta.Text != nil {
events <- provider.StreamEvent{
Type: provider.StreamEventText,
Text: *data.Delta.Text,
switch data.Delta.Type {
case anth.MessagesContentTypeTextDelta:
if data.Delta.Text != nil {
events <- provider.StreamEvent{
Type: provider.StreamEventText,
Text: *data.Delta.Text,
}
}
case anth.MessagesContentTypeThinkingDelta:
if data.Delta.MessageContentThinking != nil {
events <- provider.StreamEvent{
Type: provider.StreamEventThinking,
Text: data.Delta.Thinking,
}
}
}
},
@@ -71,6 +81,28 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
return nil
}
// Thinking budgets used by Anthropic for low/medium/high reasoning levels.
// Must each be >= 1024 (Anthropic minimum) and strictly less than MaxTokens.
const (
thinkingBudgetLow = 1024
thinkingBudgetMedium = 8000
thinkingBudgetHigh = 24000
)
// thinkingBudget returns the Anthropic budget_tokens value for a go-llm
// ReasoningLevel string. Returns 0 to mean "no thinking" / pass-through.
func thinkingBudget(level string) int {
switch level {
case "low":
return thinkingBudgetLow
case "medium":
return thinkingBudgetMedium
case "high":
return thinkingBudgetHigh
}
return 0
}
func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
anthReq := anth.MessagesRequest{
Model: anth.Model(req.Model),
@@ -81,6 +113,20 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
anthReq.MaxTokens = *req.MaxTokens
}
// Extended thinking. Setting Thinking forces temperature to be unset
// (Anthropic only allows the default of 1.0) and requires MaxTokens to
// strictly exceed BudgetTokens. We grow MaxTokens if the caller's value
// is too small, so callers don't have to reason about budget arithmetic.
if budget := thinkingBudget(req.Reasoning); budget > 0 {
anthReq.Thinking = &anth.Thinking{
Type: anth.ThinkingTypeEnabled,
BudgetTokens: budget,
}
if anthReq.MaxTokens <= budget {
anthReq.MaxTokens = budget + 4096
}
}
var msgs []anth.Message
var systemText string
@@ -259,7 +305,11 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
}
}
if req.Temperature != nil {
// Anthropic rejects a non-default temperature when extended thinking is
// enabled. Drop the caller's value silently in that case rather than
// erroring — the alternative is forcing every caller to reset
// temperature when they enable thinking.
if req.Temperature != nil && anthReq.Thinking == nil {
f := float32(*req.Temperature)
anthReq.Temperature = &f
}
@@ -307,6 +357,7 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response {
var res provider.Response
var textParts []string
var thinkingParts []string
for _, block := range resp.Content {
switch block.Type {
@@ -314,6 +365,10 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
if block.Text != nil {
textParts = append(textParts, *block.Text)
}
case anth.MessagesContentTypeThinking:
if block.MessageContentThinking != nil {
thinkingParts = append(thinkingParts, block.Thinking)
}
case anth.MessagesContentTypeToolUse:
if block.MessageContentToolUse != nil {
args, _ := json.Marshal(block.MessageContentToolUse.Input)
@@ -327,6 +382,7 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
}
res.Text = strings.Join(textParts, "")
res.Thinking = strings.Join(thinkingParts, "")
res.Usage = &provider.Usage{
InputTokens: resp.Usage.InputTokens,
+83
View File
@@ -0,0 +1,83 @@
package anthropic
import (
"testing"
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
anth "github.com/liushuangls/go-anthropic/v2"
)
func TestBuildRequest_ThinkingByLevel(t *testing.T) {
p := New("k")
cases := []struct {
level string
wantBudget int
}{
{"", 0},
{"low", thinkingBudgetLow},
{"medium", thinkingBudgetMedium},
{"high", thinkingBudgetHigh},
}
for _, tc := range cases {
t.Run("level="+tc.level, func(t *testing.T) {
req := provider.Request{
Model: "claude-opus-4-7",
Reasoning: tc.level,
Messages: []provider.Message{{Role: "user", Content: "hi"}},
}
out := p.buildRequest(req)
if tc.wantBudget == 0 {
if out.Thinking != nil {
t.Fatalf("Thinking should be nil for level=%q, got %+v", tc.level, out.Thinking)
}
return
}
if out.Thinking == nil {
t.Fatalf("Thinking should be set for level=%q", tc.level)
}
if out.Thinking.Type != anth.ThinkingTypeEnabled {
t.Errorf("Thinking.Type = %q, want enabled", out.Thinking.Type)
}
if out.Thinking.BudgetTokens != tc.wantBudget {
t.Errorf("BudgetTokens = %d, want %d", out.Thinking.BudgetTokens, tc.wantBudget)
}
if out.MaxTokens <= tc.wantBudget {
t.Errorf("MaxTokens (%d) must exceed BudgetTokens (%d)", out.MaxTokens, tc.wantBudget)
}
})
}
}
func TestBuildRequest_ThinkingDropsTemperature(t *testing.T) {
p := New("k")
temp := 0.7
req := provider.Request{
Model: "claude-opus-4-7",
Reasoning: "high",
Temperature: &temp,
Messages: []provider.Message{{Role: "user", Content: "hi"}},
}
out := p.buildRequest(req)
if out.Temperature != nil {
t.Errorf("Temperature should be dropped when thinking is enabled, got %v", *out.Temperature)
}
}
func TestBuildRequest_NoThinkingPreservesTemperature(t *testing.T) {
p := New("k")
temp := 0.7
req := provider.Request{
Model: "claude-opus-4-7",
Temperature: &temp,
Messages: []provider.Message{{Role: "user", Content: "hi"}},
}
out := p.buildRequest(req)
if out.Temperature == nil {
t.Fatal("Temperature should be set when thinking is disabled")
}
got := float64(*out.Temperature)
if got < 0.69 || got > 0.71 {
t.Errorf("Temperature should be ~0.7 when thinking is disabled, got %v", got)
}
}