feat(v2): add ReasoningLevel option; thinking/reasoning across providers
Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -49,10 +49,20 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
resp, err := cl.CreateMessagesStream(ctx, anth.MessagesStreamRequest{
|
||||
MessagesRequest: anthReq,
|
||||
OnContentBlockDelta: func(data anth.MessagesEventContentBlockDeltaData) {
|
||||
if data.Delta.Type == "text_delta" && data.Delta.Text != nil {
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventText,
|
||||
Text: *data.Delta.Text,
|
||||
switch data.Delta.Type {
|
||||
case anth.MessagesContentTypeTextDelta:
|
||||
if data.Delta.Text != nil {
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventText,
|
||||
Text: *data.Delta.Text,
|
||||
}
|
||||
}
|
||||
case anth.MessagesContentTypeThinkingDelta:
|
||||
if data.Delta.MessageContentThinking != nil {
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventThinking,
|
||||
Text: data.Delta.Thinking,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -71,6 +81,28 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
return nil
|
||||
}
|
||||
|
||||
// Thinking budgets used by Anthropic for low/medium/high reasoning levels.
|
||||
// Must each be >= 1024 (Anthropic minimum) and strictly less than MaxTokens.
|
||||
const (
|
||||
thinkingBudgetLow = 1024
|
||||
thinkingBudgetMedium = 8000
|
||||
thinkingBudgetHigh = 24000
|
||||
)
|
||||
|
||||
// thinkingBudget returns the Anthropic budget_tokens value for a go-llm
|
||||
// ReasoningLevel string. Returns 0 to mean "no thinking" / pass-through.
|
||||
func thinkingBudget(level string) int {
|
||||
switch level {
|
||||
case "low":
|
||||
return thinkingBudgetLow
|
||||
case "medium":
|
||||
return thinkingBudgetMedium
|
||||
case "high":
|
||||
return thinkingBudgetHigh
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
anthReq := anth.MessagesRequest{
|
||||
Model: anth.Model(req.Model),
|
||||
@@ -81,6 +113,20 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
anthReq.MaxTokens = *req.MaxTokens
|
||||
}
|
||||
|
||||
// Extended thinking. Setting Thinking forces temperature to be unset
|
||||
// (Anthropic only allows the default of 1.0) and requires MaxTokens to
|
||||
// strictly exceed BudgetTokens. We grow MaxTokens if the caller's value
|
||||
// is too small, so callers don't have to reason about budget arithmetic.
|
||||
if budget := thinkingBudget(req.Reasoning); budget > 0 {
|
||||
anthReq.Thinking = &anth.Thinking{
|
||||
Type: anth.ThinkingTypeEnabled,
|
||||
BudgetTokens: budget,
|
||||
}
|
||||
if anthReq.MaxTokens <= budget {
|
||||
anthReq.MaxTokens = budget + 4096
|
||||
}
|
||||
}
|
||||
|
||||
var msgs []anth.Message
|
||||
var systemText string
|
||||
|
||||
@@ -259,7 +305,11 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
}
|
||||
}
|
||||
|
||||
if req.Temperature != nil {
|
||||
// Anthropic rejects a non-default temperature when extended thinking is
|
||||
// enabled. Drop the caller's value silently in that case rather than
|
||||
// erroring — the alternative is forcing every caller to reset
|
||||
// temperature when they enable thinking.
|
||||
if req.Temperature != nil && anthReq.Thinking == nil {
|
||||
f := float32(*req.Temperature)
|
||||
anthReq.Temperature = &f
|
||||
}
|
||||
@@ -307,6 +357,7 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response {
|
||||
var res provider.Response
|
||||
var textParts []string
|
||||
var thinkingParts []string
|
||||
|
||||
for _, block := range resp.Content {
|
||||
switch block.Type {
|
||||
@@ -314,6 +365,10 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
|
||||
if block.Text != nil {
|
||||
textParts = append(textParts, *block.Text)
|
||||
}
|
||||
case anth.MessagesContentTypeThinking:
|
||||
if block.MessageContentThinking != nil {
|
||||
thinkingParts = append(thinkingParts, block.Thinking)
|
||||
}
|
||||
case anth.MessagesContentTypeToolUse:
|
||||
if block.MessageContentToolUse != nil {
|
||||
args, _ := json.Marshal(block.MessageContentToolUse.Input)
|
||||
@@ -327,6 +382,7 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
|
||||
}
|
||||
|
||||
res.Text = strings.Join(textParts, "")
|
||||
res.Thinking = strings.Join(thinkingParts, "")
|
||||
|
||||
res.Usage = &provider.Usage{
|
||||
InputTokens: resp.Usage.InputTokens,
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
|
||||
|
||||
anth "github.com/liushuangls/go-anthropic/v2"
|
||||
)
|
||||
|
||||
func TestBuildRequest_ThinkingByLevel(t *testing.T) {
|
||||
p := New("k")
|
||||
cases := []struct {
|
||||
level string
|
||||
wantBudget int
|
||||
}{
|
||||
{"", 0},
|
||||
{"low", thinkingBudgetLow},
|
||||
{"medium", thinkingBudgetMedium},
|
||||
{"high", thinkingBudgetHigh},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run("level="+tc.level, func(t *testing.T) {
|
||||
req := provider.Request{
|
||||
Model: "claude-opus-4-7",
|
||||
Reasoning: tc.level,
|
||||
Messages: []provider.Message{{Role: "user", Content: "hi"}},
|
||||
}
|
||||
out := p.buildRequest(req)
|
||||
if tc.wantBudget == 0 {
|
||||
if out.Thinking != nil {
|
||||
t.Fatalf("Thinking should be nil for level=%q, got %+v", tc.level, out.Thinking)
|
||||
}
|
||||
return
|
||||
}
|
||||
if out.Thinking == nil {
|
||||
t.Fatalf("Thinking should be set for level=%q", tc.level)
|
||||
}
|
||||
if out.Thinking.Type != anth.ThinkingTypeEnabled {
|
||||
t.Errorf("Thinking.Type = %q, want enabled", out.Thinking.Type)
|
||||
}
|
||||
if out.Thinking.BudgetTokens != tc.wantBudget {
|
||||
t.Errorf("BudgetTokens = %d, want %d", out.Thinking.BudgetTokens, tc.wantBudget)
|
||||
}
|
||||
if out.MaxTokens <= tc.wantBudget {
|
||||
t.Errorf("MaxTokens (%d) must exceed BudgetTokens (%d)", out.MaxTokens, tc.wantBudget)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildRequest_ThinkingDropsTemperature(t *testing.T) {
|
||||
p := New("k")
|
||||
temp := 0.7
|
||||
req := provider.Request{
|
||||
Model: "claude-opus-4-7",
|
||||
Reasoning: "high",
|
||||
Temperature: &temp,
|
||||
Messages: []provider.Message{{Role: "user", Content: "hi"}},
|
||||
}
|
||||
out := p.buildRequest(req)
|
||||
if out.Temperature != nil {
|
||||
t.Errorf("Temperature should be dropped when thinking is enabled, got %v", *out.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildRequest_NoThinkingPreservesTemperature(t *testing.T) {
|
||||
p := New("k")
|
||||
temp := 0.7
|
||||
req := provider.Request{
|
||||
Model: "claude-opus-4-7",
|
||||
Temperature: &temp,
|
||||
Messages: []provider.Message{{Role: "user", Content: "hi"}},
|
||||
}
|
||||
out := p.buildRequest(req)
|
||||
if out.Temperature == nil {
|
||||
t.Fatal("Temperature should be set when thinking is disabled")
|
||||
}
|
||||
got := float64(*out.Temperature)
|
||||
if got < 0.69 || got > 0.71 {
|
||||
t.Errorf("Temperature should be ~0.7 when thinking is disabled, got %v", got)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user