feat(v2): add ReasoningLevel option; thinking/reasoning across providers
Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,11 @@ package xai_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/openaicompat"
|
||||
@@ -10,12 +14,75 @@ import (
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/xai"
|
||||
)
|
||||
|
||||
// newReasoningServer is a httptest server that records the request body and
|
||||
// returns a minimal valid completion. Used to assert the reasoning_effort
|
||||
// field that lands on the wire.
|
||||
func newReasoningServer(t *testing.T) (*httptest.Server, *[]byte) {
|
||||
t.Helper()
|
||||
var body []byte
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
b, _ := io.ReadAll(r.Body)
|
||||
body = b
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = io.WriteString(w, `{"id":"x","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`)
|
||||
}))
|
||||
return srv, &body
|
||||
}
|
||||
|
||||
// readEffort returns the value of the "reasoning_effort" field in the JSON
|
||||
// body, or "" if absent.
|
||||
func readEffort(t *testing.T, body []byte) string {
|
||||
t.Helper()
|
||||
if len(body) == 0 {
|
||||
return ""
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal body: %v", err)
|
||||
}
|
||||
if v, ok := parsed["reasoning_effort"]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func TestNew_Basic(t *testing.T) {
|
||||
if p := xai.New("key", ""); p == nil {
|
||||
t.Fatal("New returned nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRules_ReasoningGate(t *testing.T) {
|
||||
srv, body := newReasoningServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
// grok-3-mini: reasoning supported, medium maps to high.
|
||||
p := xai.New("k", srv.URL)
|
||||
req := provider.Request{
|
||||
Model: "grok-3-mini",
|
||||
Messages: []provider.Message{{Role: "user", Content: "?"}},
|
||||
Reasoning: "medium",
|
||||
}
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
if effort := readEffort(t, *body); effort != "high" {
|
||||
t.Errorf("grok-3-mini medium → effort=%q, want \"high\"", effort)
|
||||
}
|
||||
|
||||
// grok-2 (no reasoning): effort must NOT be sent.
|
||||
req.Model = "grok-2"
|
||||
*body = nil
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
if effort := readEffort(t, *body); effort != "" {
|
||||
t.Errorf("grok-2 → effort=%q, want absent", effort)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRules_Grok2RejectsImages(t *testing.T) {
|
||||
p := xai.New("key", "")
|
||||
req := provider.Request{
|
||||
|
||||
Reference in New Issue
Block a user