feat(v2): add ReasoningLevel option; thinking/reasoning across providers
Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ package openaicompat
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -66,6 +67,19 @@ type Rules struct {
|
||||
// parameters and may mutate them freely (add headers, flip flags, tweak
|
||||
// response_format, etc.).
|
||||
CustomizeRequest func(params *openai.ChatCompletionNewParams)
|
||||
|
||||
// SupportsReasoning, when non-nil and returning false for the request's
|
||||
// model, causes the request's Reasoning field to be silently dropped
|
||||
// from the outgoing request. Used by providers (e.g., OpenAI) where
|
||||
// reasoning_effort is rejected on non-reasoning models. nil = always
|
||||
// pass reasoning_effort through when set.
|
||||
SupportsReasoning func(model string) bool
|
||||
|
||||
// MapReasoningEffort, when non-nil, maps the standardized go-llm
|
||||
// ReasoningLevel ("low"|"medium"|"high") to the provider's wire-level
|
||||
// effort string. Used by xAI which only accepts "low"|"high" (callers
|
||||
// remap "medium" to "high"). nil = pass-through unchanged.
|
||||
MapReasoningEffort func(level string) string
|
||||
}
|
||||
|
||||
// FeatureUnsupportedError is returned when a Rules predicate rejects a request
|
||||
@@ -130,6 +144,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
stream := cl.Chat.Completions.NewStreaming(ctx, oaiReq)
|
||||
|
||||
var fullText strings.Builder
|
||||
var fullThinking strings.Builder
|
||||
var toolCalls []provider.ToolCall
|
||||
toolCallArgs := map[int]*strings.Builder{}
|
||||
var usage *provider.Usage
|
||||
@@ -157,6 +172,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
}
|
||||
}
|
||||
|
||||
// Reasoning/thinking delta — DeepSeek and Groq use a non-standard
|
||||
// "reasoning_content" field on the delta. Extract it from the
|
||||
// raw JSON since the OpenAI SDK doesn't surface it as a typed
|
||||
// field.
|
||||
if rc := extractReasoningContent(choice.Delta.RawJSON()); rc != "" {
|
||||
fullThinking.WriteString(rc)
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventThinking,
|
||||
Text: rc,
|
||||
}
|
||||
}
|
||||
|
||||
// Tool call deltas
|
||||
for _, tc := range choice.Delta.ToolCalls {
|
||||
idx := int(tc.Index)
|
||||
@@ -216,6 +243,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
Type: provider.StreamEventDone,
|
||||
Response: &provider.Response{
|
||||
Text: fullText.String(),
|
||||
Thinking: fullThinking.String(),
|
||||
ToolCalls: toolCalls,
|
||||
Usage: usage,
|
||||
},
|
||||
@@ -303,6 +331,16 @@ func (p *Provider) buildRequest(req provider.Request) openai.ChatCompletionNewPa
|
||||
oaiReq.Stop = openai.ChatCompletionNewParamsStopUnion{OfString: openai.String(req.Stop[0])}
|
||||
}
|
||||
|
||||
if req.Reasoning != "" {
|
||||
if p.rules.SupportsReasoning == nil || p.rules.SupportsReasoning(req.Model) {
|
||||
effort := req.Reasoning
|
||||
if p.rules.MapReasoningEffort != nil {
|
||||
effort = p.rules.MapReasoningEffort(effort)
|
||||
}
|
||||
oaiReq.ReasoningEffort = shared.ReasoningEffort(effort)
|
||||
}
|
||||
}
|
||||
|
||||
return oaiReq
|
||||
}
|
||||
|
||||
@@ -468,6 +506,7 @@ func (p *Provider) convertResponse(resp *openai.ChatCompletion) provider.Respons
|
||||
|
||||
choice := resp.Choices[0]
|
||||
res.Text = choice.Message.Content
|
||||
res.Thinking = extractReasoningContent(choice.Message.RawJSON())
|
||||
|
||||
for _, tc := range choice.Message.ToolCalls {
|
||||
res.ToolCalls = append(res.ToolCalls, provider.ToolCall{
|
||||
@@ -523,6 +562,25 @@ func extractUsageDetails(usage openai.CompletionUsage) map[string]int {
|
||||
return details
|
||||
}
|
||||
|
||||
// extractReasoningContent pulls the non-standard "reasoning_content" string
|
||||
// from the raw JSON of a message or delta. DeepSeek's reasoner and several
|
||||
// Groq-hosted reasoning models put their thinking trace in this field rather
|
||||
// than in OpenAI's standard "reasoning_summary" blocks; the OpenAI Go SDK
|
||||
// doesn't surface it as a typed field, so we re-parse the raw JSON. Returns
|
||||
// empty string when the field is absent or unparseable.
|
||||
func extractReasoningContent(rawJSON string) string {
|
||||
if rawJSON == "" || !strings.Contains(rawJSON, "reasoning_content") {
|
||||
return ""
|
||||
}
|
||||
var d struct {
|
||||
ReasoningContent string `json:"reasoning_content"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(rawJSON), &d); err != nil {
|
||||
return ""
|
||||
}
|
||||
return d.ReasoningContent
|
||||
}
|
||||
|
||||
// audioFormatFromURL guesses the audio format from a URL's file extension.
|
||||
func audioFormatFromURL(u string) string {
|
||||
ext := strings.ToLower(path.Ext(u))
|
||||
|
||||
@@ -282,6 +282,162 @@ func TestStream_EmitsDoneAndText(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_ReasoningEffortPassthrough(t *testing.T) {
|
||||
srv, body := newTestServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
req := textReq("o3-mini", "hi")
|
||||
req.Reasoning = "high"
|
||||
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(*body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if parsed["reasoning_effort"] != "high" {
|
||||
t.Errorf("reasoning_effort = %v, want \"high\"; body: %s", parsed["reasoning_effort"], *body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_SupportsReasoningGate(t *testing.T) {
|
||||
srv, body := newTestServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
req := textReq("gpt-4o", "hi")
|
||||
req.Reasoning = "high"
|
||||
|
||||
// SupportsReasoning returns false → reasoning_effort must NOT be sent.
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{
|
||||
SupportsReasoning: func(string) bool { return false },
|
||||
})
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(*body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if _, ok := parsed["reasoning_effort"]; ok {
|
||||
t.Errorf("reasoning_effort should be absent when SupportsReasoning=false; body: %s", *body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_MapReasoningEffort(t *testing.T) {
|
||||
srv, body := newTestServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
req := textReq("grok-3-mini", "hi")
|
||||
req.Reasoning = "medium"
|
||||
|
||||
// xAI-style mapping: medium → high.
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{
|
||||
MapReasoningEffort: func(level string) string {
|
||||
if level == "medium" {
|
||||
return "high"
|
||||
}
|
||||
return level
|
||||
},
|
||||
})
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(*body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if parsed["reasoning_effort"] != "high" {
|
||||
t.Errorf("reasoning_effort = %v, want \"high\" after medium→high remap; body: %s", parsed["reasoning_effort"], *body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_ReasoningContentExtracted(t *testing.T) {
|
||||
// Server returns a DeepSeek-style response with reasoning_content alongside content.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = io.WriteString(w, `{
|
||||
"id": "cmpl-1",
|
||||
"object": "chat.completion",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role":"assistant",
|
||||
"content":"42",
|
||||
"reasoning_content":"the user asked for the answer..."
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}
|
||||
}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
|
||||
resp, err := p.Complete(context.Background(), textReq("deepseek-reasoner", "?"))
|
||||
if err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
if resp.Text != "42" {
|
||||
t.Errorf("Text = %q, want %q", resp.Text, "42")
|
||||
}
|
||||
if !strings.Contains(resp.Thinking, "the user asked for") {
|
||||
t.Errorf("Thinking = %q, want it to contain the reasoning trace", resp.Thinking)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStream_ReasoningContentEmitsThinkingEvents(t *testing.T) {
|
||||
// Two SSE chunks, each with a reasoning_content delta, then a final done chunk.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
flusher, _ := w.(http.Flusher)
|
||||
for _, line := range []string{
|
||||
`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"think "}}]}`,
|
||||
`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"hard","content":"42"}}]}`,
|
||||
`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}}`,
|
||||
`data: [DONE]`,
|
||||
} {
|
||||
_, _ = io.WriteString(w, line+"\n\n")
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
|
||||
events := make(chan provider.StreamEvent, 32)
|
||||
go func() {
|
||||
_ = p.Stream(context.Background(), textReq("deepseek-reasoner", "?"), events)
|
||||
close(events)
|
||||
}()
|
||||
|
||||
var thinking strings.Builder
|
||||
var sawDone bool
|
||||
var doneThinking string
|
||||
for ev := range events {
|
||||
switch ev.Type {
|
||||
case provider.StreamEventThinking:
|
||||
thinking.WriteString(ev.Text)
|
||||
case provider.StreamEventDone:
|
||||
sawDone = true
|
||||
if ev.Response != nil {
|
||||
doneThinking = ev.Response.Thinking
|
||||
}
|
||||
}
|
||||
}
|
||||
if thinking.String() != "think hard" {
|
||||
t.Errorf("streamed thinking = %q, want %q", thinking.String(), "think hard")
|
||||
}
|
||||
if !sawDone {
|
||||
t.Fatal("no Done event")
|
||||
}
|
||||
if doneThinking != "think hard" {
|
||||
t.Errorf("Done.Response.Thinking = %q, want %q", doneThinking, "think hard")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStream_RulesCheckedBeforeNetwork(t *testing.T) {
|
||||
// Server should never be hit when rules reject up front.
|
||||
hit := false
|
||||
|
||||
Reference in New Issue
Block a user