feat(v2): add ReasoningLevel option; thinking/reasoning across providers

Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
@@ -2,7 +2,11 @@ package xai_test

 import (
 	"context"
+	"encoding/json"
 	"errors"
+	"io"
+	"net/http"
+	"net/http/httptest"
 	"testing"

 	"gitea.stevedudenhoeffer.com/steve/go-llm/v2/openaicompat"
@@ -10,12 +14,75 @@ import (
 	"gitea.stevedudenhoeffer.com/steve/go-llm/v2/xai"
 )

+// newReasoningServer is a httptest server that records the request body and
+// returns a minimal valid completion. Used to assert the reasoning_effort
+// field that lands on the wire.
+func newReasoningServer(t *testing.T) (*httptest.Server, *[]byte) {
+	t.Helper()
+	var body []byte
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		b, _ := io.ReadAll(r.Body)
+		body = b
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = io.WriteString(w, `{"id":"x","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`)
+	}))
+	return srv, &body
+}
+
+// readEffort returns the value of the "reasoning_effort" field in the JSON
+// body, or "" if absent.
+func readEffort(t *testing.T, body []byte) string {
+	t.Helper()
+	if len(body) == 0 {
+		return ""
+	}
+	var parsed map[string]any
+	if err := json.Unmarshal(body, &parsed); err != nil {
+		t.Fatalf("unmarshal body: %v", err)
+	}
+	if v, ok := parsed["reasoning_effort"]; ok {
+		if s, ok := v.(string); ok {
+			return s
+		}
+	}
+	return ""
+}
+
 func TestNew_Basic(t *testing.T) {
 	if p := xai.New("key", ""); p == nil {
 		t.Fatal("New returned nil")
 	}
 }

+func TestRules_ReasoningGate(t *testing.T) {
+	srv, body := newReasoningServer(t)
+	defer srv.Close()
+
+	// grok-3-mini: reasoning supported, medium maps to high.
+	p := xai.New("k", srv.URL)
+	req := provider.Request{
+		Model:     "grok-3-mini",
+		Messages:  []provider.Message{{Role: "user", Content: "?"}},
+		Reasoning: "medium",
+	}
+	if _, err := p.Complete(context.Background(), req); err != nil {
+		t.Fatalf("Complete: %v", err)
+	}
+	if effort := readEffort(t, *body); effort != "high" {
+		t.Errorf("grok-3-mini medium → effort=%q, want \"high\"", effort)
+	}
+
+	// grok-2 (no reasoning): effort must NOT be sent.
+	req.Model = "grok-2"
+	*body = nil
+	if _, err := p.Complete(context.Background(), req); err != nil {
+		t.Fatalf("Complete: %v", err)
+	}
+	if effort := readEffort(t, *body); effort != "" {
+		t.Errorf("grok-2 → effort=%q, want absent", effort)
+	}
+}
+
 func TestRules_Grok2RejectsImages(t *testing.T) {
 	p := xai.New("key", "")
 	req := provider.Request{