feat(v2): add ReasoningLevel option; thinking/reasoning across providers

Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 03:58:42 +00:00
parent 34119e5a00
commit cbaf41f50c
16 changed files with 602 additions and 32 deletions
@@ -282,6 +282,162 @@ func TestStream_EmitsDoneAndText(t *testing.T) {
 	}
 }

+func TestComplete_ReasoningEffortPassthrough(t *testing.T) {
+	srv, body := newTestServer(t)
+	defer srv.Close()
+
+	req := textReq("o3-mini", "hi")
+	req.Reasoning = "high"
+
+	p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
+	if _, err := p.Complete(context.Background(), req); err != nil {
+		t.Fatalf("Complete: %v", err)
+	}
+	var parsed map[string]any
+	if err := json.Unmarshal(*body, &parsed); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if parsed["reasoning_effort"] != "high" {
+		t.Errorf("reasoning_effort = %v, want \"high\"; body: %s", parsed["reasoning_effort"], *body)
+	}
+}
+
+func TestComplete_SupportsReasoningGate(t *testing.T) {
+	srv, body := newTestServer(t)
+	defer srv.Close()
+
+	req := textReq("gpt-4o", "hi")
+	req.Reasoning = "high"
+
+	// SupportsReasoning returns false → reasoning_effort must NOT be sent.
+	p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{
+		SupportsReasoning: func(string) bool { return false },
+	})
+	if _, err := p.Complete(context.Background(), req); err != nil {
+		t.Fatalf("Complete: %v", err)
+	}
+	var parsed map[string]any
+	if err := json.Unmarshal(*body, &parsed); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if _, ok := parsed["reasoning_effort"]; ok {
+		t.Errorf("reasoning_effort should be absent when SupportsReasoning=false; body: %s", *body)
+	}
+}
+
+func TestComplete_MapReasoningEffort(t *testing.T) {
+	srv, body := newTestServer(t)
+	defer srv.Close()
+
+	req := textReq("grok-3-mini", "hi")
+	req.Reasoning = "medium"
+
+	// xAI-style mapping: medium → high.
+	p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{
+		MapReasoningEffort: func(level string) string {
+			if level == "medium" {
+				return "high"
+			}
+			return level
+		},
+	})
+	if _, err := p.Complete(context.Background(), req); err != nil {
+		t.Fatalf("Complete: %v", err)
+	}
+	var parsed map[string]any
+	if err := json.Unmarshal(*body, &parsed); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if parsed["reasoning_effort"] != "high" {
+		t.Errorf("reasoning_effort = %v, want \"high\" after medium→high remap; body: %s", parsed["reasoning_effort"], *body)
+	}
+}
+
+func TestComplete_ReasoningContentExtracted(t *testing.T) {
+	// Server returns a DeepSeek-style response with reasoning_content alongside content.
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = io.WriteString(w, `{
+			"id": "cmpl-1",
+			"object": "chat.completion",
+			"choices": [{
+				"index": 0,
+				"message": {
+					"role":"assistant",
+					"content":"42",
+					"reasoning_content":"the user asked for the answer..."
+				},
+				"finish_reason": "stop"
+			}],
+			"usage": {"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}
+		}`)
+	}))
+	defer srv.Close()
+
+	p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
+	resp, err := p.Complete(context.Background(), textReq("deepseek-reasoner", "?"))
+	if err != nil {
+		t.Fatalf("Complete: %v", err)
+	}
+	if resp.Text != "42" {
+		t.Errorf("Text = %q, want %q", resp.Text, "42")
+	}
+	if !strings.Contains(resp.Thinking, "the user asked for") {
+		t.Errorf("Thinking = %q, want it to contain the reasoning trace", resp.Thinking)
+	}
+}
+
+func TestStream_ReasoningContentEmitsThinkingEvents(t *testing.T) {
+	// Two SSE chunks, each with a reasoning_content delta, then a final done chunk.
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		flusher, _ := w.(http.Flusher)
+		for _, line := range []string{
+			`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"think "}}]}`,
+			`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"hard","content":"42"}}]}`,
+			`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}}`,
+			`data: [DONE]`,
+		} {
+			_, _ = io.WriteString(w, line+"\n\n")
+			if flusher != nil {
+				flusher.Flush()
+			}
+		}
+	}))
+	defer srv.Close()
+
+	p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
+	events := make(chan provider.StreamEvent, 32)
+	go func() {
+		_ = p.Stream(context.Background(), textReq("deepseek-reasoner", "?"), events)
+		close(events)
+	}()
+
+	var thinking strings.Builder
+	var sawDone bool
+	var doneThinking string
+	for ev := range events {
+		switch ev.Type {
+		case provider.StreamEventThinking:
+			thinking.WriteString(ev.Text)
+		case provider.StreamEventDone:
+			sawDone = true
+			if ev.Response != nil {
+				doneThinking = ev.Response.Thinking
+			}
+		}
+	}
+	if thinking.String() != "think hard" {
+		t.Errorf("streamed thinking = %q, want %q", thinking.String(), "think hard")
+	}
+	if !sawDone {
+		t.Fatal("no Done event")
+	}
+	if doneThinking != "think hard" {
+		t.Errorf("Done.Response.Thinking = %q, want %q", doneThinking, "think hard")
+	}
+}
+
 func TestStream_RulesCheckedBeforeNetwork(t *testing.T) {
 	// Server should never be hit when rules reject up front.
 	hit := false