From cbaf41f50c1ffe624b03469cf927d85848e8a625 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sat, 25 Apr 2026 03:58:42 +0000 Subject: [PATCH] feat(v2): add ReasoningLevel option; thinking/reasoning across providers Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 --- v2/anthropic/anthropic.go | 66 +++++++++++- v2/anthropic/thinking_test.go | 83 ++++++++++++++ v2/chat.go | 7 +- v2/deepseek/deepseek.go | 5 + v2/google/google.go | 59 +++++++++- v2/groq/groq.go | 8 ++ v2/llm.go | 44 +++++--- v2/openai/openai.go | 11 +- v2/openaicompat/openaicompat.go | 58 ++++++++++ v2/openaicompat/openaicompat_test.go | 156 +++++++++++++++++++++++++++ v2/provider/provider.go | 15 +++ v2/request.go | 39 +++++++ v2/response.go | 4 + v2/stream.go | 1 + v2/xai/xai.go | 11 ++ v2/xai/xai_test.go | 67 ++++++++++++ 16 files changed, 602 insertions(+), 32 deletions(-) create mode 100644 v2/anthropic/thinking_test.go diff --git a/v2/anthropic/anthropic.go b/v2/anthropic/anthropic.go index 138ffbf..5b56d76 100644 --- a/v2/anthropic/anthropic.go +++ b/v2/anthropic/anthropic.go @@ -49,10 +49,20 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan resp, err := cl.CreateMessagesStream(ctx, anth.MessagesStreamRequest{ MessagesRequest: anthReq, OnContentBlockDelta: func(data anth.MessagesEventContentBlockDeltaData) { - if data.Delta.Type == "text_delta" && data.Delta.Text != nil { - events <- provider.StreamEvent{ - Type: provider.StreamEventText, - Text: *data.Delta.Text, + switch data.Delta.Type { + case anth.MessagesContentTypeTextDelta: + if data.Delta.Text != nil { + events <- provider.StreamEvent{ + Type: provider.StreamEventText, + Text: *data.Delta.Text, + } + } + case anth.MessagesContentTypeThinkingDelta: + if data.Delta.MessageContentThinking != nil { + events <- provider.StreamEvent{ + Type: provider.StreamEventThinking, + Text: data.Delta.Thinking, + } } } }, @@ -71,6 +81,28 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan return nil } +// Thinking budgets used by Anthropic for low/medium/high reasoning levels. +// Must each be >= 1024 (Anthropic minimum) and strictly less than MaxTokens. +const ( + thinkingBudgetLow = 1024 + thinkingBudgetMedium = 8000 + thinkingBudgetHigh = 24000 +) + +// thinkingBudget returns the Anthropic budget_tokens value for a go-llm +// ReasoningLevel string. Returns 0 to mean "no thinking" / pass-through. +func thinkingBudget(level string) int { + switch level { + case "low": + return thinkingBudgetLow + case "medium": + return thinkingBudgetMedium + case "high": + return thinkingBudgetHigh + } + return 0 +} + func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest { anthReq := anth.MessagesRequest{ Model: anth.Model(req.Model), @@ -81,6 +113,20 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest { anthReq.MaxTokens = *req.MaxTokens } + // Extended thinking. Setting Thinking forces temperature to be unset + // (Anthropic only allows the default of 1.0) and requires MaxTokens to + // strictly exceed BudgetTokens. We grow MaxTokens if the caller's value + // is too small, so callers don't have to reason about budget arithmetic. + if budget := thinkingBudget(req.Reasoning); budget > 0 { + anthReq.Thinking = &anth.Thinking{ + Type: anth.ThinkingTypeEnabled, + BudgetTokens: budget, + } + if anthReq.MaxTokens <= budget { + anthReq.MaxTokens = budget + 4096 + } + } + var msgs []anth.Message var systemText string @@ -259,7 +305,11 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest { } } - if req.Temperature != nil { + // Anthropic rejects a non-default temperature when extended thinking is + // enabled. Drop the caller's value silently in that case rather than + // erroring — the alternative is forcing every caller to reset + // temperature when they enable thinking. + if req.Temperature != nil && anthReq.Thinking == nil { f := float32(*req.Temperature) anthReq.Temperature = &f } @@ -307,6 +357,7 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest { func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response { var res provider.Response var textParts []string + var thinkingParts []string for _, block := range resp.Content { switch block.Type { @@ -314,6 +365,10 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response if block.Text != nil { textParts = append(textParts, *block.Text) } + case anth.MessagesContentTypeThinking: + if block.MessageContentThinking != nil { + thinkingParts = append(thinkingParts, block.Thinking) + } case anth.MessagesContentTypeToolUse: if block.MessageContentToolUse != nil { args, _ := json.Marshal(block.MessageContentToolUse.Input) @@ -327,6 +382,7 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response } res.Text = strings.Join(textParts, "") + res.Thinking = strings.Join(thinkingParts, "") res.Usage = &provider.Usage{ InputTokens: resp.Usage.InputTokens, diff --git a/v2/anthropic/thinking_test.go b/v2/anthropic/thinking_test.go new file mode 100644 index 0000000..d3adcae --- /dev/null +++ b/v2/anthropic/thinking_test.go @@ -0,0 +1,83 @@ +package anthropic + +import ( + "testing" + + "gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider" + + anth "github.com/liushuangls/go-anthropic/v2" +) + +func TestBuildRequest_ThinkingByLevel(t *testing.T) { + p := New("k") + cases := []struct { + level string + wantBudget int + }{ + {"", 0}, + {"low", thinkingBudgetLow}, + {"medium", thinkingBudgetMedium}, + {"high", thinkingBudgetHigh}, + } + for _, tc := range cases { + t.Run("level="+tc.level, func(t *testing.T) { + req := provider.Request{ + Model: "claude-opus-4-7", + Reasoning: tc.level, + Messages: []provider.Message{{Role: "user", Content: "hi"}}, + } + out := p.buildRequest(req) + if tc.wantBudget == 0 { + if out.Thinking != nil { + t.Fatalf("Thinking should be nil for level=%q, got %+v", tc.level, out.Thinking) + } + return + } + if out.Thinking == nil { + t.Fatalf("Thinking should be set for level=%q", tc.level) + } + if out.Thinking.Type != anth.ThinkingTypeEnabled { + t.Errorf("Thinking.Type = %q, want enabled", out.Thinking.Type) + } + if out.Thinking.BudgetTokens != tc.wantBudget { + t.Errorf("BudgetTokens = %d, want %d", out.Thinking.BudgetTokens, tc.wantBudget) + } + if out.MaxTokens <= tc.wantBudget { + t.Errorf("MaxTokens (%d) must exceed BudgetTokens (%d)", out.MaxTokens, tc.wantBudget) + } + }) + } +} + +func TestBuildRequest_ThinkingDropsTemperature(t *testing.T) { + p := New("k") + temp := 0.7 + req := provider.Request{ + Model: "claude-opus-4-7", + Reasoning: "high", + Temperature: &temp, + Messages: []provider.Message{{Role: "user", Content: "hi"}}, + } + out := p.buildRequest(req) + if out.Temperature != nil { + t.Errorf("Temperature should be dropped when thinking is enabled, got %v", *out.Temperature) + } +} + +func TestBuildRequest_NoThinkingPreservesTemperature(t *testing.T) { + p := New("k") + temp := 0.7 + req := provider.Request{ + Model: "claude-opus-4-7", + Temperature: &temp, + Messages: []provider.Message{{Role: "user", Content: "hi"}}, + } + out := p.buildRequest(req) + if out.Temperature == nil { + t.Fatal("Temperature should be set when thinking is disabled") + } + got := float64(*out.Temperature) + if got < 0.69 || got > 0.71 { + t.Errorf("Temperature should be ~0.7 when thinking is disabled, got %v", got) + } +} diff --git a/v2/chat.go b/v2/chat.go index 4b3a92e..d9cbf05 100644 --- a/v2/chat.go +++ b/v2/chat.go @@ -108,12 +108,7 @@ func (c *Chat) SendRaw(ctx context.Context, msg Message) (Response, error) { func (c *Chat) SendStream(ctx context.Context, text string) (*StreamReader, error) { c.messages = append(c.messages, UserMessage(text)) - opts := c.buildOpts() - - cfg := &requestConfig{} - for _, opt := range opts { - opt(cfg) - } + cfg := c.model.newRequestConfig(c.buildOpts()) req := buildProviderRequest(c.model.model, c.messages, cfg) return newStreamReader(ctx, c.model.provider, req) diff --git a/v2/deepseek/deepseek.go b/v2/deepseek/deepseek.go index be67913..af3a1c2 100644 --- a/v2/deepseek/deepseek.go +++ b/v2/deepseek/deepseek.go @@ -32,5 +32,10 @@ func New(apiKey, baseURL string) *Provider { RestrictTemperature: func(m string) bool { return strings.Contains(m, "reasoner") }, + // DeepSeek's reasoner thinks unconditionally; the API rejects an + // explicit reasoning_effort parameter. The thinking trace is + // surfaced via openaicompat's reasoning_content extraction without + // any opt-in. + SupportsReasoning: func(string) bool { return false }, }) } diff --git a/v2/google/google.go b/v2/google/google.go index 5003bf2..60128b9 100644 --- a/v2/google/google.go +++ b/v2/google/google.go @@ -58,6 +58,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan contents, cfg := p.buildRequest(req) var fullText strings.Builder + var fullThinking strings.Builder var toolCalls []provider.ToolCall var usage *provider.Usage @@ -91,10 +92,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan } for _, part := range c.Content.Parts { if part.Text != "" { - fullText.WriteString(part.Text) - events <- provider.StreamEvent{ - Type: provider.StreamEventText, - Text: part.Text, + if part.Thought { + fullThinking.WriteString(part.Text) + events <- provider.StreamEvent{ + Type: provider.StreamEventThinking, + Text: part.Text, + } + } else { + fullText.WriteString(part.Text) + events <- provider.StreamEvent{ + Type: provider.StreamEventText, + Text: part.Text, + } } } if part.FunctionCall != nil { @@ -124,6 +133,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan Type: provider.StreamEventDone, Response: &provider.Response{ Text: fullText.String(), + Thinking: fullThinking.String(), ToolCalls: toolCalls, Usage: usage, }, @@ -166,6 +176,18 @@ func (p *Provider) buildRequest(req provider.Request) ([]*genai.Content, *genai. cfg.StopSequences = req.Stop } + // Extended thinking via thinking_config. Models that don't support + // thinking ignore this field; budgets here mirror the Anthropic + // mapping so a single ReasoningLevel produces comparable behavior + // across providers. + if budget := thinkingBudget(req.Reasoning); budget > 0 { + b := int32(budget) + cfg.ThinkingConfig = &genai.ThinkingConfig{ + ThinkingBudget: &b, + IncludeThoughts: true, + } + } + for _, msg := range req.Messages { var role genai.Role switch msg.Role { @@ -286,7 +308,11 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide } for _, part := range c.Content.Parts { if part.Text != "" { - res.Text += part.Text + if part.Thought { + res.Thinking += part.Text + } else { + res.Text += part.Text + } } if part.FunctionCall != nil { args, _ := json.Marshal(part.FunctionCall.Args) @@ -320,6 +346,29 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide return res, nil } +// Thinking budgets used by Google for low/medium/high reasoning levels. +// Mirrors the Anthropic mapping so a single go-llm ReasoningLevel produces +// comparable behavior across providers. +const ( + thinkingBudgetLow = 1024 + thinkingBudgetMedium = 8000 + thinkingBudgetHigh = 24000 +) + +// thinkingBudget returns the genai thinking_budget for a go-llm +// ReasoningLevel, or 0 to disable thinking. +func thinkingBudget(level string) int { + switch level { + case "low": + return thinkingBudgetLow + case "medium": + return thinkingBudgetMedium + case "high": + return thinkingBudgetHigh + } + return 0 +} + // schemaToGenai converts a JSON Schema map to a genai.Schema. func schemaToGenai(s map[string]any) *genai.Schema { if s == nil { diff --git a/v2/groq/groq.go b/v2/groq/groq.go index cd1d9a7..fe7e32e 100644 --- a/v2/groq/groq.go +++ b/v2/groq/groq.go @@ -29,5 +29,13 @@ func New(apiKey, baseURL string) *Provider { // Chat completions endpoint does not accept audio input; audio is via // dedicated transcription endpoints, which go-llm doesn't cover here. SupportsAudio: func(string) bool { return false }, + // Reasoning models hosted on Groq (DeepSeek R1 distill family, qwen + // reasoning variants, gpt-oss) accept reasoning_effort. Vanilla + // Llama / Mixtral don't. + SupportsReasoning: func(m string) bool { + return strings.Contains(m, "deepseek-r1") || + strings.Contains(m, "qwen") || + strings.Contains(m, "gpt-oss") + }, }) } diff --git a/v2/llm.go b/v2/llm.go index df94905..4b46b5c 100644 --- a/v2/llm.go +++ b/v2/llm.go @@ -38,17 +38,24 @@ func (c *Client) WithMiddleware(mw ...Middleware) *Client { // Model represents a specific model from a provider, ready for completions. type Model struct { - provider provider.Provider - model string - middleware []Middleware + provider provider.Provider + model string + middleware []Middleware + defaultReasoning ReasoningLevel +} + +// WithReasoning returns a copy of the Model that uses the given reasoning +// level by default on every Complete/Stream/Chat call. Per-request use of the +// WithReasoning request option still takes precedence. +func (m *Model) WithReasoning(level ReasoningLevel) *Model { + c := *m + c.defaultReasoning = level + return &c } // Complete sends a non-streaming completion request. func (m *Model) Complete(ctx context.Context, messages []Message, opts ...RequestOption) (Response, error) { - cfg := &requestConfig{} - for _, opt := range opts { - opt(cfg) - } + cfg := m.newRequestConfig(opts) chain := m.buildChain() return chain(ctx, m.model, messages, cfg) @@ -56,15 +63,24 @@ func (m *Model) Complete(ctx context.Context, messages []Message, opts ...Reques // Stream sends a streaming completion request, returning a StreamReader. func (m *Model) Stream(ctx context.Context, messages []Message, opts ...RequestOption) (*StreamReader, error) { - cfg := &requestConfig{} - for _, opt := range opts { - opt(cfg) - } + cfg := m.newRequestConfig(opts) req := buildProviderRequest(m.model, messages, cfg) return newStreamReader(ctx, m.provider, req) } +// newRequestConfig builds a requestConfig pre-populated with the Model's +// defaults, then applies per-call options on top. +func (m *Model) newRequestConfig(opts []RequestOption) *requestConfig { + cfg := &requestConfig{ + reasoning: m.defaultReasoning, + } + for _, opt := range opts { + opt(cfg) + } + return cfg +} + // WithMiddleware returns a new Model with additional middleware applied. func (m *Model) WithMiddleware(mw ...Middleware) *Model { return &Model{ @@ -111,6 +127,9 @@ func buildProviderRequest(model string, messages []Message, cfg *requestConfig) if len(cfg.stop) > 0 { req.Stop = cfg.stop } + if cfg.reasoning != "" { + req.Reasoning = string(cfg.reasoning) + } if cfg.tools != nil { for _, tool := range cfg.tools.AllTools() { @@ -181,7 +200,8 @@ func convertMessages(msgs []Message) []provider.Message { func convertProviderResponse(resp provider.Response) Response { r := Response{ - Text: resp.Text, + Text: resp.Text, + Thinking: resp.Thinking, } for _, tc := range resp.ToolCalls { diff --git a/v2/openai/openai.go b/v2/openai/openai.go index 5b40964..23b6c9e 100644 --- a/v2/openai/openai.go +++ b/v2/openai/openai.go @@ -24,12 +24,15 @@ func New(apiKey string, baseURL string) *Provider { baseURL = DefaultBaseURL } return openaicompat.New(apiKey, baseURL, openaicompat.Rules{ - RestrictTemperature: restrictTemperature, + RestrictTemperature: isReasoningModel, + SupportsReasoning: isReasoningModel, }) } -// restrictTemperature reports whether OpenAI rejects a user-supplied -// temperature for this model. o-series reasoning models and gpt-5* both do. -func restrictTemperature(model string) bool { +// isReasoningModel reports whether the named OpenAI model is a reasoning +// model (o-series or gpt-5*). Reasoning models reject a user-supplied +// temperature and accept a reasoning_effort parameter; everything else +// rejects reasoning_effort. +func isReasoningModel(model string) bool { return strings.HasPrefix(model, "o") || strings.HasPrefix(model, "gpt-5") } diff --git a/v2/openaicompat/openaicompat.go b/v2/openaicompat/openaicompat.go index 690b358..45867ad 100644 --- a/v2/openaicompat/openaicompat.go +++ b/v2/openaicompat/openaicompat.go @@ -12,6 +12,7 @@ package openaicompat import ( "context" "encoding/base64" + "encoding/json" "fmt" "io" "net/http" @@ -66,6 +67,19 @@ type Rules struct { // parameters and may mutate them freely (add headers, flip flags, tweak // response_format, etc.). CustomizeRequest func(params *openai.ChatCompletionNewParams) + + // SupportsReasoning, when non-nil and returning false for the request's + // model, causes the request's Reasoning field to be silently dropped + // from the outgoing request. Used by providers (e.g., OpenAI) where + // reasoning_effort is rejected on non-reasoning models. nil = always + // pass reasoning_effort through when set. + SupportsReasoning func(model string) bool + + // MapReasoningEffort, when non-nil, maps the standardized go-llm + // ReasoningLevel ("low"|"medium"|"high") to the provider's wire-level + // effort string. Used by xAI which only accepts "low"|"high" (callers + // remap "medium" to "high"). nil = pass-through unchanged. + MapReasoningEffort func(level string) string } // FeatureUnsupportedError is returned when a Rules predicate rejects a request @@ -130,6 +144,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan stream := cl.Chat.Completions.NewStreaming(ctx, oaiReq) var fullText strings.Builder + var fullThinking strings.Builder var toolCalls []provider.ToolCall toolCallArgs := map[int]*strings.Builder{} var usage *provider.Usage @@ -157,6 +172,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan } } + // Reasoning/thinking delta — DeepSeek and Groq use a non-standard + // "reasoning_content" field on the delta. Extract it from the + // raw JSON since the OpenAI SDK doesn't surface it as a typed + // field. + if rc := extractReasoningContent(choice.Delta.RawJSON()); rc != "" { + fullThinking.WriteString(rc) + events <- provider.StreamEvent{ + Type: provider.StreamEventThinking, + Text: rc, + } + } + // Tool call deltas for _, tc := range choice.Delta.ToolCalls { idx := int(tc.Index) @@ -216,6 +243,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan Type: provider.StreamEventDone, Response: &provider.Response{ Text: fullText.String(), + Thinking: fullThinking.String(), ToolCalls: toolCalls, Usage: usage, }, @@ -303,6 +331,16 @@ func (p *Provider) buildRequest(req provider.Request) openai.ChatCompletionNewPa oaiReq.Stop = openai.ChatCompletionNewParamsStopUnion{OfString: openai.String(req.Stop[0])} } + if req.Reasoning != "" { + if p.rules.SupportsReasoning == nil || p.rules.SupportsReasoning(req.Model) { + effort := req.Reasoning + if p.rules.MapReasoningEffort != nil { + effort = p.rules.MapReasoningEffort(effort) + } + oaiReq.ReasoningEffort = shared.ReasoningEffort(effort) + } + } + return oaiReq } @@ -468,6 +506,7 @@ func (p *Provider) convertResponse(resp *openai.ChatCompletion) provider.Respons choice := resp.Choices[0] res.Text = choice.Message.Content + res.Thinking = extractReasoningContent(choice.Message.RawJSON()) for _, tc := range choice.Message.ToolCalls { res.ToolCalls = append(res.ToolCalls, provider.ToolCall{ @@ -523,6 +562,25 @@ func extractUsageDetails(usage openai.CompletionUsage) map[string]int { return details } +// extractReasoningContent pulls the non-standard "reasoning_content" string +// from the raw JSON of a message or delta. DeepSeek's reasoner and several +// Groq-hosted reasoning models put their thinking trace in this field rather +// than in OpenAI's standard "reasoning_summary" blocks; the OpenAI Go SDK +// doesn't surface it as a typed field, so we re-parse the raw JSON. Returns +// empty string when the field is absent or unparseable. +func extractReasoningContent(rawJSON string) string { + if rawJSON == "" || !strings.Contains(rawJSON, "reasoning_content") { + return "" + } + var d struct { + ReasoningContent string `json:"reasoning_content"` + } + if err := json.Unmarshal([]byte(rawJSON), &d); err != nil { + return "" + } + return d.ReasoningContent +} + // audioFormatFromURL guesses the audio format from a URL's file extension. func audioFormatFromURL(u string) string { ext := strings.ToLower(path.Ext(u)) diff --git a/v2/openaicompat/openaicompat_test.go b/v2/openaicompat/openaicompat_test.go index efd412a..ae76683 100644 --- a/v2/openaicompat/openaicompat_test.go +++ b/v2/openaicompat/openaicompat_test.go @@ -282,6 +282,162 @@ func TestStream_EmitsDoneAndText(t *testing.T) { } } +func TestComplete_ReasoningEffortPassthrough(t *testing.T) { + srv, body := newTestServer(t) + defer srv.Close() + + req := textReq("o3-mini", "hi") + req.Reasoning = "high" + + p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{}) + if _, err := p.Complete(context.Background(), req); err != nil { + t.Fatalf("Complete: %v", err) + } + var parsed map[string]any + if err := json.Unmarshal(*body, &parsed); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if parsed["reasoning_effort"] != "high" { + t.Errorf("reasoning_effort = %v, want \"high\"; body: %s", parsed["reasoning_effort"], *body) + } +} + +func TestComplete_SupportsReasoningGate(t *testing.T) { + srv, body := newTestServer(t) + defer srv.Close() + + req := textReq("gpt-4o", "hi") + req.Reasoning = "high" + + // SupportsReasoning returns false → reasoning_effort must NOT be sent. + p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{ + SupportsReasoning: func(string) bool { return false }, + }) + if _, err := p.Complete(context.Background(), req); err != nil { + t.Fatalf("Complete: %v", err) + } + var parsed map[string]any + if err := json.Unmarshal(*body, &parsed); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if _, ok := parsed["reasoning_effort"]; ok { + t.Errorf("reasoning_effort should be absent when SupportsReasoning=false; body: %s", *body) + } +} + +func TestComplete_MapReasoningEffort(t *testing.T) { + srv, body := newTestServer(t) + defer srv.Close() + + req := textReq("grok-3-mini", "hi") + req.Reasoning = "medium" + + // xAI-style mapping: medium → high. + p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{ + MapReasoningEffort: func(level string) string { + if level == "medium" { + return "high" + } + return level + }, + }) + if _, err := p.Complete(context.Background(), req); err != nil { + t.Fatalf("Complete: %v", err) + } + var parsed map[string]any + if err := json.Unmarshal(*body, &parsed); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if parsed["reasoning_effort"] != "high" { + t.Errorf("reasoning_effort = %v, want \"high\" after medium→high remap; body: %s", parsed["reasoning_effort"], *body) + } +} + +func TestComplete_ReasoningContentExtracted(t *testing.T) { + // Server returns a DeepSeek-style response with reasoning_content alongside content. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = io.WriteString(w, `{ + "id": "cmpl-1", + "object": "chat.completion", + "choices": [{ + "index": 0, + "message": { + "role":"assistant", + "content":"42", + "reasoning_content":"the user asked for the answer..." + }, + "finish_reason": "stop" + }], + "usage": {"prompt_tokens":1,"completion_tokens":2,"total_tokens":3} + }`) + })) + defer srv.Close() + + p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{}) + resp, err := p.Complete(context.Background(), textReq("deepseek-reasoner", "?")) + if err != nil { + t.Fatalf("Complete: %v", err) + } + if resp.Text != "42" { + t.Errorf("Text = %q, want %q", resp.Text, "42") + } + if !strings.Contains(resp.Thinking, "the user asked for") { + t.Errorf("Thinking = %q, want it to contain the reasoning trace", resp.Thinking) + } +} + +func TestStream_ReasoningContentEmitsThinkingEvents(t *testing.T) { + // Two SSE chunks, each with a reasoning_content delta, then a final done chunk. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/event-stream") + flusher, _ := w.(http.Flusher) + for _, line := range []string{ + `data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"think "}}]}`, + `data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"hard","content":"42"}}]}`, + `data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}}`, + `data: [DONE]`, + } { + _, _ = io.WriteString(w, line+"\n\n") + if flusher != nil { + flusher.Flush() + } + } + })) + defer srv.Close() + + p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{}) + events := make(chan provider.StreamEvent, 32) + go func() { + _ = p.Stream(context.Background(), textReq("deepseek-reasoner", "?"), events) + close(events) + }() + + var thinking strings.Builder + var sawDone bool + var doneThinking string + for ev := range events { + switch ev.Type { + case provider.StreamEventThinking: + thinking.WriteString(ev.Text) + case provider.StreamEventDone: + sawDone = true + if ev.Response != nil { + doneThinking = ev.Response.Thinking + } + } + } + if thinking.String() != "think hard" { + t.Errorf("streamed thinking = %q, want %q", thinking.String(), "think hard") + } + if !sawDone { + t.Fatal("no Done event") + } + if doneThinking != "think hard" { + t.Errorf("Done.Response.Thinking = %q, want %q", doneThinking, "think hard") + } +} + func TestStream_RulesCheckedBeforeNetwork(t *testing.T) { // Server should never be hit when rules reject up front. hit := false diff --git a/v2/provider/provider.go b/v2/provider/provider.go index 9e2b5fa..75b26dc 100644 --- a/v2/provider/provider.go +++ b/v2/provider/provider.go @@ -80,6 +80,15 @@ type Request struct { // CacheHints requests prompt-cache breakpoints at specified positions // on providers that support it (currently Anthropic). nil = no caching. CacheHints *CacheHints + + // Reasoning, when non-empty, asks the model to spend extra inference + // budget reasoning before answering. Each provider translates this to + // its native parameter (Anthropic thinking.budget_tokens, OpenAI/xAI + // reasoning_effort, Google thinking_config, etc.). Models that do not + // support reasoning silently ignore it. + // + // Allowed values: "" (no reasoning, default), "low", "medium", "high". + Reasoning string } // Response is a completion response at the provider level. @@ -87,6 +96,11 @@ type Response struct { Text string ToolCalls []ToolCall Usage *Usage + + // Thinking holds the model's reasoning/thinking trace, when one was + // requested and the provider exposed it. Empty for providers/models + // that do not surface a thinking trace. + Thinking string } // Usage captures token consumption. @@ -117,6 +131,7 @@ const ( StreamEventToolEnd // Tool call complete StreamEventDone // Stream complete StreamEventError // Error occurred + StreamEventThinking // Reasoning/thinking content delta ) // StreamEvent represents a single event in a streaming response. diff --git a/v2/request.go b/v2/request.go index 61912dc..5d15f19 100644 --- a/v2/request.go +++ b/v2/request.go @@ -10,8 +10,32 @@ type requestConfig struct { topP *float64 stop []string cacheConfig *cacheConfig + reasoning ReasoningLevel } +// ReasoningLevel selects how much reasoning effort/budget the provider should +// spend before answering. Empty string is the default (no reasoning, identical +// to historical behavior). Each provider translates this to its native +// parameter; models that don't support reasoning silently ignore it. +type ReasoningLevel string + +const ( + // ReasoningLow asks for a small amount of extra reasoning. Maps to + // reasoning_effort="low" on OpenAI/xAI, ~1k thinking budget on + // Anthropic/Google. + ReasoningLow ReasoningLevel = "low" + + // ReasoningMedium asks for a moderate amount. Maps to reasoning_effort + // ="medium" on OpenAI, ~8k thinking budget on Anthropic/Google. xAI + // remaps medium to its only-other-option, "high". + ReasoningMedium ReasoningLevel = "medium" + + // ReasoningHigh asks for the most reasoning the provider exposes. + // Maps to reasoning_effort="high" on OpenAI/xAI, ~24k thinking budget + // on Anthropic/Google. + ReasoningHigh ReasoningLevel = "high" +) + // cacheConfig holds prompt-caching settings. nil = disabled. type cacheConfig struct { enabled bool @@ -42,6 +66,21 @@ func WithStop(sequences ...string) RequestOption { return func(c *requestConfig) { c.stop = sequences } } +// WithReasoning asks the model to spend extra reasoning budget on the +// response. Each provider maps the level to its native shape: +// +// - Anthropic: thinking.budget_tokens (low ~ 1024, medium ~ 8000, high ~ 24000) +// - OpenAI / xAI / Groq: reasoning_effort string (xAI remaps medium to high) +// - Google: thinking_config.thinking_budget (same budget as Anthropic) +// - DeepSeek (reasoner): always-on regardless; this option is a no-op +// - Models without reasoning support: silently ignored +// +// Reasoning content (when surfaced by the provider) appears on +// Response.Thinking, and is also streamed as StreamEventThinking events. +func WithReasoning(level ReasoningLevel) RequestOption { + return func(c *requestConfig) { c.reasoning = level } +} + // WithPromptCaching enables automatic prompt-caching markers on providers // that support it (currently Anthropic). On providers that don't support // explicit cache markers (OpenAI, Google), this is a no-op. diff --git a/v2/response.go b/v2/response.go index e7ae0ad..ebff420 100644 --- a/v2/response.go +++ b/v2/response.go @@ -7,6 +7,10 @@ type Response struct { // Text is the assistant's text content. Empty if only tool calls. Text string + // Thinking is the assistant's reasoning/thinking trace, when reasoning + // was requested and the provider exposed it. Empty otherwise. + Thinking string + // ToolCalls contains any tool invocations the assistant requested. ToolCalls []ToolCall diff --git a/v2/stream.go b/v2/stream.go index 8caf8a0..636cf4f 100644 --- a/v2/stream.go +++ b/v2/stream.go @@ -18,6 +18,7 @@ const ( StreamEventToolEnd = provider.StreamEventToolEnd StreamEventDone = provider.StreamEventDone StreamEventError = provider.StreamEventError + StreamEventThinking = provider.StreamEventThinking ) // StreamEvent represents a single event in a streaming response. diff --git a/v2/xai/xai.go b/v2/xai/xai.go index 500fd4c..ab739a6 100644 --- a/v2/xai/xai.go +++ b/v2/xai/xai.go @@ -25,5 +25,16 @@ func New(apiKey, baseURL string) *Provider { SupportsVision: func(m string) bool { return strings.Contains(m, "vision") }, + // Reasoning is supported on grok-3-mini and grok-4 family. The xAI + // API only accepts low|high (no medium); we map medium up to high. + SupportsReasoning: func(m string) bool { + return strings.Contains(m, "grok-3-mini") || strings.Contains(m, "grok-4") + }, + MapReasoningEffort: func(level string) string { + if level == "medium" { + return "high" + } + return level + }, }) } diff --git a/v2/xai/xai_test.go b/v2/xai/xai_test.go index bed5b6b..607ec3e 100644 --- a/v2/xai/xai_test.go +++ b/v2/xai/xai_test.go @@ -2,7 +2,11 @@ package xai_test import ( "context" + "encoding/json" "errors" + "io" + "net/http" + "net/http/httptest" "testing" "gitea.stevedudenhoeffer.com/steve/go-llm/v2/openaicompat" @@ -10,12 +14,75 @@ import ( "gitea.stevedudenhoeffer.com/steve/go-llm/v2/xai" ) +// newReasoningServer is a httptest server that records the request body and +// returns a minimal valid completion. Used to assert the reasoning_effort +// field that lands on the wire. +func newReasoningServer(t *testing.T) (*httptest.Server, *[]byte) { + t.Helper() + var body []byte + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + b, _ := io.ReadAll(r.Body) + body = b + w.Header().Set("Content-Type", "application/json") + _, _ = io.WriteString(w, `{"id":"x","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`) + })) + return srv, &body +} + +// readEffort returns the value of the "reasoning_effort" field in the JSON +// body, or "" if absent. +func readEffort(t *testing.T, body []byte) string { + t.Helper() + if len(body) == 0 { + return "" + } + var parsed map[string]any + if err := json.Unmarshal(body, &parsed); err != nil { + t.Fatalf("unmarshal body: %v", err) + } + if v, ok := parsed["reasoning_effort"]; ok { + if s, ok := v.(string); ok { + return s + } + } + return "" +} + func TestNew_Basic(t *testing.T) { if p := xai.New("key", ""); p == nil { t.Fatal("New returned nil") } } +func TestRules_ReasoningGate(t *testing.T) { + srv, body := newReasoningServer(t) + defer srv.Close() + + // grok-3-mini: reasoning supported, medium maps to high. + p := xai.New("k", srv.URL) + req := provider.Request{ + Model: "grok-3-mini", + Messages: []provider.Message{{Role: "user", Content: "?"}}, + Reasoning: "medium", + } + if _, err := p.Complete(context.Background(), req); err != nil { + t.Fatalf("Complete: %v", err) + } + if effort := readEffort(t, *body); effort != "high" { + t.Errorf("grok-3-mini medium → effort=%q, want \"high\"", effort) + } + + // grok-2 (no reasoning): effort must NOT be sent. + req.Model = "grok-2" + *body = nil + if _, err := p.Complete(context.Background(), req); err != nil { + t.Fatalf("Complete: %v", err) + } + if effort := readEffort(t, *body); effort != "" { + t.Errorf("grok-2 → effort=%q, want absent", effort) + } +} + func TestRules_Grok2RejectsImages(t *testing.T) { p := xai.New("key", "") req := provider.Request{