feat(v2): add ReasoningLevel option; thinking/reasoning across providers
Introduces an opt-in level-based reasoning toggle (low/medium/high) that each provider translates to its native parameter: - Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature forced to default and MaxTokens auto-grown above the budget. - OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a new Rules.SupportsReasoning predicate so non-reasoning models don't receive the parameter. xAI uses Rules.MapReasoningEffort to remap "medium" to "high" since its API only accepts low|high. - Google: thinking_config.thinking_budget + include_thoughts:true. - DeepSeek: SupportsReasoning=false (reasoner is always-on; the reasoning_content trace was already extracted via openaicompat). Reasoning content is surfaced as Response.Thinking on Complete and as StreamEventThinking deltas during streaming. Provider-side: extracted from Anthropic thinking content blocks, Google's part.Thought=true parts, and the non-standard reasoning_content field that DeepSeek and Groq emit (parsed out of raw JSON since openai-go doesn't type it). Public API: - llm.ReasoningLevel + ReasoningLow/Medium/High constants - llm.WithReasoning(level) request option - Model.WithReasoning(level) for baked-in defaults - provider.Request.Reasoning, provider.Response.Thinking - provider.StreamEventThinking Tests cover Rules-based gating, MapReasoningEffort, reasoning_content extraction (Complete + Stream), Anthropic budget mapping, and temperature suppression when thinking is enabled. Existing behavior is unchanged when Reasoning is the empty string. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -49,10 +49,20 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
resp, err := cl.CreateMessagesStream(ctx, anth.MessagesStreamRequest{
|
||||
MessagesRequest: anthReq,
|
||||
OnContentBlockDelta: func(data anth.MessagesEventContentBlockDeltaData) {
|
||||
if data.Delta.Type == "text_delta" && data.Delta.Text != nil {
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventText,
|
||||
Text: *data.Delta.Text,
|
||||
switch data.Delta.Type {
|
||||
case anth.MessagesContentTypeTextDelta:
|
||||
if data.Delta.Text != nil {
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventText,
|
||||
Text: *data.Delta.Text,
|
||||
}
|
||||
}
|
||||
case anth.MessagesContentTypeThinkingDelta:
|
||||
if data.Delta.MessageContentThinking != nil {
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventThinking,
|
||||
Text: data.Delta.Thinking,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -71,6 +81,28 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
return nil
|
||||
}
|
||||
|
||||
// Thinking budgets used by Anthropic for low/medium/high reasoning levels.
|
||||
// Must each be >= 1024 (Anthropic minimum) and strictly less than MaxTokens.
|
||||
const (
|
||||
thinkingBudgetLow = 1024
|
||||
thinkingBudgetMedium = 8000
|
||||
thinkingBudgetHigh = 24000
|
||||
)
|
||||
|
||||
// thinkingBudget returns the Anthropic budget_tokens value for a go-llm
|
||||
// ReasoningLevel string. Returns 0 to mean "no thinking" / pass-through.
|
||||
func thinkingBudget(level string) int {
|
||||
switch level {
|
||||
case "low":
|
||||
return thinkingBudgetLow
|
||||
case "medium":
|
||||
return thinkingBudgetMedium
|
||||
case "high":
|
||||
return thinkingBudgetHigh
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
anthReq := anth.MessagesRequest{
|
||||
Model: anth.Model(req.Model),
|
||||
@@ -81,6 +113,20 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
anthReq.MaxTokens = *req.MaxTokens
|
||||
}
|
||||
|
||||
// Extended thinking. Setting Thinking forces temperature to be unset
|
||||
// (Anthropic only allows the default of 1.0) and requires MaxTokens to
|
||||
// strictly exceed BudgetTokens. We grow MaxTokens if the caller's value
|
||||
// is too small, so callers don't have to reason about budget arithmetic.
|
||||
if budget := thinkingBudget(req.Reasoning); budget > 0 {
|
||||
anthReq.Thinking = &anth.Thinking{
|
||||
Type: anth.ThinkingTypeEnabled,
|
||||
BudgetTokens: budget,
|
||||
}
|
||||
if anthReq.MaxTokens <= budget {
|
||||
anthReq.MaxTokens = budget + 4096
|
||||
}
|
||||
}
|
||||
|
||||
var msgs []anth.Message
|
||||
var systemText string
|
||||
|
||||
@@ -259,7 +305,11 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
}
|
||||
}
|
||||
|
||||
if req.Temperature != nil {
|
||||
// Anthropic rejects a non-default temperature when extended thinking is
|
||||
// enabled. Drop the caller's value silently in that case rather than
|
||||
// erroring — the alternative is forcing every caller to reset
|
||||
// temperature when they enable thinking.
|
||||
if req.Temperature != nil && anthReq.Thinking == nil {
|
||||
f := float32(*req.Temperature)
|
||||
anthReq.Temperature = &f
|
||||
}
|
||||
@@ -307,6 +357,7 @@ func (p *Provider) buildRequest(req provider.Request) anth.MessagesRequest {
|
||||
func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response {
|
||||
var res provider.Response
|
||||
var textParts []string
|
||||
var thinkingParts []string
|
||||
|
||||
for _, block := range resp.Content {
|
||||
switch block.Type {
|
||||
@@ -314,6 +365,10 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
|
||||
if block.Text != nil {
|
||||
textParts = append(textParts, *block.Text)
|
||||
}
|
||||
case anth.MessagesContentTypeThinking:
|
||||
if block.MessageContentThinking != nil {
|
||||
thinkingParts = append(thinkingParts, block.Thinking)
|
||||
}
|
||||
case anth.MessagesContentTypeToolUse:
|
||||
if block.MessageContentToolUse != nil {
|
||||
args, _ := json.Marshal(block.MessageContentToolUse.Input)
|
||||
@@ -327,6 +382,7 @@ func (p *Provider) convertResponse(resp anth.MessagesResponse) provider.Response
|
||||
}
|
||||
|
||||
res.Text = strings.Join(textParts, "")
|
||||
res.Thinking = strings.Join(thinkingParts, "")
|
||||
|
||||
res.Usage = &provider.Usage{
|
||||
InputTokens: resp.Usage.InputTokens,
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
package anthropic
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
|
||||
|
||||
anth "github.com/liushuangls/go-anthropic/v2"
|
||||
)
|
||||
|
||||
func TestBuildRequest_ThinkingByLevel(t *testing.T) {
|
||||
p := New("k")
|
||||
cases := []struct {
|
||||
level string
|
||||
wantBudget int
|
||||
}{
|
||||
{"", 0},
|
||||
{"low", thinkingBudgetLow},
|
||||
{"medium", thinkingBudgetMedium},
|
||||
{"high", thinkingBudgetHigh},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run("level="+tc.level, func(t *testing.T) {
|
||||
req := provider.Request{
|
||||
Model: "claude-opus-4-7",
|
||||
Reasoning: tc.level,
|
||||
Messages: []provider.Message{{Role: "user", Content: "hi"}},
|
||||
}
|
||||
out := p.buildRequest(req)
|
||||
if tc.wantBudget == 0 {
|
||||
if out.Thinking != nil {
|
||||
t.Fatalf("Thinking should be nil for level=%q, got %+v", tc.level, out.Thinking)
|
||||
}
|
||||
return
|
||||
}
|
||||
if out.Thinking == nil {
|
||||
t.Fatalf("Thinking should be set for level=%q", tc.level)
|
||||
}
|
||||
if out.Thinking.Type != anth.ThinkingTypeEnabled {
|
||||
t.Errorf("Thinking.Type = %q, want enabled", out.Thinking.Type)
|
||||
}
|
||||
if out.Thinking.BudgetTokens != tc.wantBudget {
|
||||
t.Errorf("BudgetTokens = %d, want %d", out.Thinking.BudgetTokens, tc.wantBudget)
|
||||
}
|
||||
if out.MaxTokens <= tc.wantBudget {
|
||||
t.Errorf("MaxTokens (%d) must exceed BudgetTokens (%d)", out.MaxTokens, tc.wantBudget)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildRequest_ThinkingDropsTemperature(t *testing.T) {
|
||||
p := New("k")
|
||||
temp := 0.7
|
||||
req := provider.Request{
|
||||
Model: "claude-opus-4-7",
|
||||
Reasoning: "high",
|
||||
Temperature: &temp,
|
||||
Messages: []provider.Message{{Role: "user", Content: "hi"}},
|
||||
}
|
||||
out := p.buildRequest(req)
|
||||
if out.Temperature != nil {
|
||||
t.Errorf("Temperature should be dropped when thinking is enabled, got %v", *out.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildRequest_NoThinkingPreservesTemperature(t *testing.T) {
|
||||
p := New("k")
|
||||
temp := 0.7
|
||||
req := provider.Request{
|
||||
Model: "claude-opus-4-7",
|
||||
Temperature: &temp,
|
||||
Messages: []provider.Message{{Role: "user", Content: "hi"}},
|
||||
}
|
||||
out := p.buildRequest(req)
|
||||
if out.Temperature == nil {
|
||||
t.Fatal("Temperature should be set when thinking is disabled")
|
||||
}
|
||||
got := float64(*out.Temperature)
|
||||
if got < 0.69 || got > 0.71 {
|
||||
t.Errorf("Temperature should be ~0.7 when thinking is disabled, got %v", got)
|
||||
}
|
||||
}
|
||||
+1
-6
@@ -108,12 +108,7 @@ func (c *Chat) SendRaw(ctx context.Context, msg Message) (Response, error) {
|
||||
func (c *Chat) SendStream(ctx context.Context, text string) (*StreamReader, error) {
|
||||
c.messages = append(c.messages, UserMessage(text))
|
||||
|
||||
opts := c.buildOpts()
|
||||
|
||||
cfg := &requestConfig{}
|
||||
for _, opt := range opts {
|
||||
opt(cfg)
|
||||
}
|
||||
cfg := c.model.newRequestConfig(c.buildOpts())
|
||||
|
||||
req := buildProviderRequest(c.model.model, c.messages, cfg)
|
||||
return newStreamReader(ctx, c.model.provider, req)
|
||||
|
||||
@@ -32,5 +32,10 @@ func New(apiKey, baseURL string) *Provider {
|
||||
RestrictTemperature: func(m string) bool {
|
||||
return strings.Contains(m, "reasoner")
|
||||
},
|
||||
// DeepSeek's reasoner thinks unconditionally; the API rejects an
|
||||
// explicit reasoning_effort parameter. The thinking trace is
|
||||
// surfaced via openaicompat's reasoning_content extraction without
|
||||
// any opt-in.
|
||||
SupportsReasoning: func(string) bool { return false },
|
||||
})
|
||||
}
|
||||
|
||||
+54
-5
@@ -58,6 +58,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
contents, cfg := p.buildRequest(req)
|
||||
|
||||
var fullText strings.Builder
|
||||
var fullThinking strings.Builder
|
||||
var toolCalls []provider.ToolCall
|
||||
var usage *provider.Usage
|
||||
|
||||
@@ -91,10 +92,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
}
|
||||
for _, part := range c.Content.Parts {
|
||||
if part.Text != "" {
|
||||
fullText.WriteString(part.Text)
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventText,
|
||||
Text: part.Text,
|
||||
if part.Thought {
|
||||
fullThinking.WriteString(part.Text)
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventThinking,
|
||||
Text: part.Text,
|
||||
}
|
||||
} else {
|
||||
fullText.WriteString(part.Text)
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventText,
|
||||
Text: part.Text,
|
||||
}
|
||||
}
|
||||
}
|
||||
if part.FunctionCall != nil {
|
||||
@@ -124,6 +133,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
Type: provider.StreamEventDone,
|
||||
Response: &provider.Response{
|
||||
Text: fullText.String(),
|
||||
Thinking: fullThinking.String(),
|
||||
ToolCalls: toolCalls,
|
||||
Usage: usage,
|
||||
},
|
||||
@@ -166,6 +176,18 @@ func (p *Provider) buildRequest(req provider.Request) ([]*genai.Content, *genai.
|
||||
cfg.StopSequences = req.Stop
|
||||
}
|
||||
|
||||
// Extended thinking via thinking_config. Models that don't support
|
||||
// thinking ignore this field; budgets here mirror the Anthropic
|
||||
// mapping so a single ReasoningLevel produces comparable behavior
|
||||
// across providers.
|
||||
if budget := thinkingBudget(req.Reasoning); budget > 0 {
|
||||
b := int32(budget)
|
||||
cfg.ThinkingConfig = &genai.ThinkingConfig{
|
||||
ThinkingBudget: &b,
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
}
|
||||
|
||||
for _, msg := range req.Messages {
|
||||
var role genai.Role
|
||||
switch msg.Role {
|
||||
@@ -286,7 +308,11 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide
|
||||
}
|
||||
for _, part := range c.Content.Parts {
|
||||
if part.Text != "" {
|
||||
res.Text += part.Text
|
||||
if part.Thought {
|
||||
res.Thinking += part.Text
|
||||
} else {
|
||||
res.Text += part.Text
|
||||
}
|
||||
}
|
||||
if part.FunctionCall != nil {
|
||||
args, _ := json.Marshal(part.FunctionCall.Args)
|
||||
@@ -320,6 +346,29 @@ func (p *Provider) convertResponse(resp *genai.GenerateContentResponse) (provide
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Thinking budgets used by Google for low/medium/high reasoning levels.
|
||||
// Mirrors the Anthropic mapping so a single go-llm ReasoningLevel produces
|
||||
// comparable behavior across providers.
|
||||
const (
|
||||
thinkingBudgetLow = 1024
|
||||
thinkingBudgetMedium = 8000
|
||||
thinkingBudgetHigh = 24000
|
||||
)
|
||||
|
||||
// thinkingBudget returns the genai thinking_budget for a go-llm
|
||||
// ReasoningLevel, or 0 to disable thinking.
|
||||
func thinkingBudget(level string) int {
|
||||
switch level {
|
||||
case "low":
|
||||
return thinkingBudgetLow
|
||||
case "medium":
|
||||
return thinkingBudgetMedium
|
||||
case "high":
|
||||
return thinkingBudgetHigh
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// schemaToGenai converts a JSON Schema map to a genai.Schema.
|
||||
func schemaToGenai(s map[string]any) *genai.Schema {
|
||||
if s == nil {
|
||||
|
||||
@@ -29,5 +29,13 @@ func New(apiKey, baseURL string) *Provider {
|
||||
// Chat completions endpoint does not accept audio input; audio is via
|
||||
// dedicated transcription endpoints, which go-llm doesn't cover here.
|
||||
SupportsAudio: func(string) bool { return false },
|
||||
// Reasoning models hosted on Groq (DeepSeek R1 distill family, qwen
|
||||
// reasoning variants, gpt-oss) accept reasoning_effort. Vanilla
|
||||
// Llama / Mixtral don't.
|
||||
SupportsReasoning: func(m string) bool {
|
||||
return strings.Contains(m, "deepseek-r1") ||
|
||||
strings.Contains(m, "qwen") ||
|
||||
strings.Contains(m, "gpt-oss")
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -38,17 +38,24 @@ func (c *Client) WithMiddleware(mw ...Middleware) *Client {
|
||||
|
||||
// Model represents a specific model from a provider, ready for completions.
|
||||
type Model struct {
|
||||
provider provider.Provider
|
||||
model string
|
||||
middleware []Middleware
|
||||
provider provider.Provider
|
||||
model string
|
||||
middleware []Middleware
|
||||
defaultReasoning ReasoningLevel
|
||||
}
|
||||
|
||||
// WithReasoning returns a copy of the Model that uses the given reasoning
|
||||
// level by default on every Complete/Stream/Chat call. Per-request use of the
|
||||
// WithReasoning request option still takes precedence.
|
||||
func (m *Model) WithReasoning(level ReasoningLevel) *Model {
|
||||
c := *m
|
||||
c.defaultReasoning = level
|
||||
return &c
|
||||
}
|
||||
|
||||
// Complete sends a non-streaming completion request.
|
||||
func (m *Model) Complete(ctx context.Context, messages []Message, opts ...RequestOption) (Response, error) {
|
||||
cfg := &requestConfig{}
|
||||
for _, opt := range opts {
|
||||
opt(cfg)
|
||||
}
|
||||
cfg := m.newRequestConfig(opts)
|
||||
|
||||
chain := m.buildChain()
|
||||
return chain(ctx, m.model, messages, cfg)
|
||||
@@ -56,15 +63,24 @@ func (m *Model) Complete(ctx context.Context, messages []Message, opts ...Reques
|
||||
|
||||
// Stream sends a streaming completion request, returning a StreamReader.
|
||||
func (m *Model) Stream(ctx context.Context, messages []Message, opts ...RequestOption) (*StreamReader, error) {
|
||||
cfg := &requestConfig{}
|
||||
for _, opt := range opts {
|
||||
opt(cfg)
|
||||
}
|
||||
cfg := m.newRequestConfig(opts)
|
||||
|
||||
req := buildProviderRequest(m.model, messages, cfg)
|
||||
return newStreamReader(ctx, m.provider, req)
|
||||
}
|
||||
|
||||
// newRequestConfig builds a requestConfig pre-populated with the Model's
|
||||
// defaults, then applies per-call options on top.
|
||||
func (m *Model) newRequestConfig(opts []RequestOption) *requestConfig {
|
||||
cfg := &requestConfig{
|
||||
reasoning: m.defaultReasoning,
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(cfg)
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
// WithMiddleware returns a new Model with additional middleware applied.
|
||||
func (m *Model) WithMiddleware(mw ...Middleware) *Model {
|
||||
return &Model{
|
||||
@@ -111,6 +127,9 @@ func buildProviderRequest(model string, messages []Message, cfg *requestConfig)
|
||||
if len(cfg.stop) > 0 {
|
||||
req.Stop = cfg.stop
|
||||
}
|
||||
if cfg.reasoning != "" {
|
||||
req.Reasoning = string(cfg.reasoning)
|
||||
}
|
||||
|
||||
if cfg.tools != nil {
|
||||
for _, tool := range cfg.tools.AllTools() {
|
||||
@@ -181,7 +200,8 @@ func convertMessages(msgs []Message) []provider.Message {
|
||||
|
||||
func convertProviderResponse(resp provider.Response) Response {
|
||||
r := Response{
|
||||
Text: resp.Text,
|
||||
Text: resp.Text,
|
||||
Thinking: resp.Thinking,
|
||||
}
|
||||
|
||||
for _, tc := range resp.ToolCalls {
|
||||
|
||||
+7
-4
@@ -24,12 +24,15 @@ func New(apiKey string, baseURL string) *Provider {
|
||||
baseURL = DefaultBaseURL
|
||||
}
|
||||
return openaicompat.New(apiKey, baseURL, openaicompat.Rules{
|
||||
RestrictTemperature: restrictTemperature,
|
||||
RestrictTemperature: isReasoningModel,
|
||||
SupportsReasoning: isReasoningModel,
|
||||
})
|
||||
}
|
||||
|
||||
// restrictTemperature reports whether OpenAI rejects a user-supplied
|
||||
// temperature for this model. o-series reasoning models and gpt-5* both do.
|
||||
func restrictTemperature(model string) bool {
|
||||
// isReasoningModel reports whether the named OpenAI model is a reasoning
|
||||
// model (o-series or gpt-5*). Reasoning models reject a user-supplied
|
||||
// temperature and accept a reasoning_effort parameter; everything else
|
||||
// rejects reasoning_effort.
|
||||
func isReasoningModel(model string) bool {
|
||||
return strings.HasPrefix(model, "o") || strings.HasPrefix(model, "gpt-5")
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ package openaicompat
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -66,6 +67,19 @@ type Rules struct {
|
||||
// parameters and may mutate them freely (add headers, flip flags, tweak
|
||||
// response_format, etc.).
|
||||
CustomizeRequest func(params *openai.ChatCompletionNewParams)
|
||||
|
||||
// SupportsReasoning, when non-nil and returning false for the request's
|
||||
// model, causes the request's Reasoning field to be silently dropped
|
||||
// from the outgoing request. Used by providers (e.g., OpenAI) where
|
||||
// reasoning_effort is rejected on non-reasoning models. nil = always
|
||||
// pass reasoning_effort through when set.
|
||||
SupportsReasoning func(model string) bool
|
||||
|
||||
// MapReasoningEffort, when non-nil, maps the standardized go-llm
|
||||
// ReasoningLevel ("low"|"medium"|"high") to the provider's wire-level
|
||||
// effort string. Used by xAI which only accepts "low"|"high" (callers
|
||||
// remap "medium" to "high"). nil = pass-through unchanged.
|
||||
MapReasoningEffort func(level string) string
|
||||
}
|
||||
|
||||
// FeatureUnsupportedError is returned when a Rules predicate rejects a request
|
||||
@@ -130,6 +144,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
stream := cl.Chat.Completions.NewStreaming(ctx, oaiReq)
|
||||
|
||||
var fullText strings.Builder
|
||||
var fullThinking strings.Builder
|
||||
var toolCalls []provider.ToolCall
|
||||
toolCallArgs := map[int]*strings.Builder{}
|
||||
var usage *provider.Usage
|
||||
@@ -157,6 +172,18 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
}
|
||||
}
|
||||
|
||||
// Reasoning/thinking delta — DeepSeek and Groq use a non-standard
|
||||
// "reasoning_content" field on the delta. Extract it from the
|
||||
// raw JSON since the OpenAI SDK doesn't surface it as a typed
|
||||
// field.
|
||||
if rc := extractReasoningContent(choice.Delta.RawJSON()); rc != "" {
|
||||
fullThinking.WriteString(rc)
|
||||
events <- provider.StreamEvent{
|
||||
Type: provider.StreamEventThinking,
|
||||
Text: rc,
|
||||
}
|
||||
}
|
||||
|
||||
// Tool call deltas
|
||||
for _, tc := range choice.Delta.ToolCalls {
|
||||
idx := int(tc.Index)
|
||||
@@ -216,6 +243,7 @@ func (p *Provider) Stream(ctx context.Context, req provider.Request, events chan
|
||||
Type: provider.StreamEventDone,
|
||||
Response: &provider.Response{
|
||||
Text: fullText.String(),
|
||||
Thinking: fullThinking.String(),
|
||||
ToolCalls: toolCalls,
|
||||
Usage: usage,
|
||||
},
|
||||
@@ -303,6 +331,16 @@ func (p *Provider) buildRequest(req provider.Request) openai.ChatCompletionNewPa
|
||||
oaiReq.Stop = openai.ChatCompletionNewParamsStopUnion{OfString: openai.String(req.Stop[0])}
|
||||
}
|
||||
|
||||
if req.Reasoning != "" {
|
||||
if p.rules.SupportsReasoning == nil || p.rules.SupportsReasoning(req.Model) {
|
||||
effort := req.Reasoning
|
||||
if p.rules.MapReasoningEffort != nil {
|
||||
effort = p.rules.MapReasoningEffort(effort)
|
||||
}
|
||||
oaiReq.ReasoningEffort = shared.ReasoningEffort(effort)
|
||||
}
|
||||
}
|
||||
|
||||
return oaiReq
|
||||
}
|
||||
|
||||
@@ -468,6 +506,7 @@ func (p *Provider) convertResponse(resp *openai.ChatCompletion) provider.Respons
|
||||
|
||||
choice := resp.Choices[0]
|
||||
res.Text = choice.Message.Content
|
||||
res.Thinking = extractReasoningContent(choice.Message.RawJSON())
|
||||
|
||||
for _, tc := range choice.Message.ToolCalls {
|
||||
res.ToolCalls = append(res.ToolCalls, provider.ToolCall{
|
||||
@@ -523,6 +562,25 @@ func extractUsageDetails(usage openai.CompletionUsage) map[string]int {
|
||||
return details
|
||||
}
|
||||
|
||||
// extractReasoningContent pulls the non-standard "reasoning_content" string
|
||||
// from the raw JSON of a message or delta. DeepSeek's reasoner and several
|
||||
// Groq-hosted reasoning models put their thinking trace in this field rather
|
||||
// than in OpenAI's standard "reasoning_summary" blocks; the OpenAI Go SDK
|
||||
// doesn't surface it as a typed field, so we re-parse the raw JSON. Returns
|
||||
// empty string when the field is absent or unparseable.
|
||||
func extractReasoningContent(rawJSON string) string {
|
||||
if rawJSON == "" || !strings.Contains(rawJSON, "reasoning_content") {
|
||||
return ""
|
||||
}
|
||||
var d struct {
|
||||
ReasoningContent string `json:"reasoning_content"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(rawJSON), &d); err != nil {
|
||||
return ""
|
||||
}
|
||||
return d.ReasoningContent
|
||||
}
|
||||
|
||||
// audioFormatFromURL guesses the audio format from a URL's file extension.
|
||||
func audioFormatFromURL(u string) string {
|
||||
ext := strings.ToLower(path.Ext(u))
|
||||
|
||||
@@ -282,6 +282,162 @@ func TestStream_EmitsDoneAndText(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_ReasoningEffortPassthrough(t *testing.T) {
|
||||
srv, body := newTestServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
req := textReq("o3-mini", "hi")
|
||||
req.Reasoning = "high"
|
||||
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(*body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if parsed["reasoning_effort"] != "high" {
|
||||
t.Errorf("reasoning_effort = %v, want \"high\"; body: %s", parsed["reasoning_effort"], *body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_SupportsReasoningGate(t *testing.T) {
|
||||
srv, body := newTestServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
req := textReq("gpt-4o", "hi")
|
||||
req.Reasoning = "high"
|
||||
|
||||
// SupportsReasoning returns false → reasoning_effort must NOT be sent.
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{
|
||||
SupportsReasoning: func(string) bool { return false },
|
||||
})
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(*body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if _, ok := parsed["reasoning_effort"]; ok {
|
||||
t.Errorf("reasoning_effort should be absent when SupportsReasoning=false; body: %s", *body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_MapReasoningEffort(t *testing.T) {
|
||||
srv, body := newTestServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
req := textReq("grok-3-mini", "hi")
|
||||
req.Reasoning = "medium"
|
||||
|
||||
// xAI-style mapping: medium → high.
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{
|
||||
MapReasoningEffort: func(level string) string {
|
||||
if level == "medium" {
|
||||
return "high"
|
||||
}
|
||||
return level
|
||||
},
|
||||
})
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(*body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if parsed["reasoning_effort"] != "high" {
|
||||
t.Errorf("reasoning_effort = %v, want \"high\" after medium→high remap; body: %s", parsed["reasoning_effort"], *body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComplete_ReasoningContentExtracted(t *testing.T) {
|
||||
// Server returns a DeepSeek-style response with reasoning_content alongside content.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = io.WriteString(w, `{
|
||||
"id": "cmpl-1",
|
||||
"object": "chat.completion",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role":"assistant",
|
||||
"content":"42",
|
||||
"reasoning_content":"the user asked for the answer..."
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}
|
||||
}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
|
||||
resp, err := p.Complete(context.Background(), textReq("deepseek-reasoner", "?"))
|
||||
if err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
if resp.Text != "42" {
|
||||
t.Errorf("Text = %q, want %q", resp.Text, "42")
|
||||
}
|
||||
if !strings.Contains(resp.Thinking, "the user asked for") {
|
||||
t.Errorf("Thinking = %q, want it to contain the reasoning trace", resp.Thinking)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStream_ReasoningContentEmitsThinkingEvents(t *testing.T) {
|
||||
// Two SSE chunks, each with a reasoning_content delta, then a final done chunk.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
flusher, _ := w.(http.Flusher)
|
||||
for _, line := range []string{
|
||||
`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"think "}}]}`,
|
||||
`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"reasoning_content":"hard","content":"42"}}]}`,
|
||||
`data: {"id":"1","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}}`,
|
||||
`data: [DONE]`,
|
||||
} {
|
||||
_, _ = io.WriteString(w, line+"\n\n")
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := openaicompat.New("test-key", srv.URL, openaicompat.Rules{})
|
||||
events := make(chan provider.StreamEvent, 32)
|
||||
go func() {
|
||||
_ = p.Stream(context.Background(), textReq("deepseek-reasoner", "?"), events)
|
||||
close(events)
|
||||
}()
|
||||
|
||||
var thinking strings.Builder
|
||||
var sawDone bool
|
||||
var doneThinking string
|
||||
for ev := range events {
|
||||
switch ev.Type {
|
||||
case provider.StreamEventThinking:
|
||||
thinking.WriteString(ev.Text)
|
||||
case provider.StreamEventDone:
|
||||
sawDone = true
|
||||
if ev.Response != nil {
|
||||
doneThinking = ev.Response.Thinking
|
||||
}
|
||||
}
|
||||
}
|
||||
if thinking.String() != "think hard" {
|
||||
t.Errorf("streamed thinking = %q, want %q", thinking.String(), "think hard")
|
||||
}
|
||||
if !sawDone {
|
||||
t.Fatal("no Done event")
|
||||
}
|
||||
if doneThinking != "think hard" {
|
||||
t.Errorf("Done.Response.Thinking = %q, want %q", doneThinking, "think hard")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStream_RulesCheckedBeforeNetwork(t *testing.T) {
|
||||
// Server should never be hit when rules reject up front.
|
||||
hit := false
|
||||
|
||||
@@ -80,6 +80,15 @@ type Request struct {
|
||||
// CacheHints requests prompt-cache breakpoints at specified positions
|
||||
// on providers that support it (currently Anthropic). nil = no caching.
|
||||
CacheHints *CacheHints
|
||||
|
||||
// Reasoning, when non-empty, asks the model to spend extra inference
|
||||
// budget reasoning before answering. Each provider translates this to
|
||||
// its native parameter (Anthropic thinking.budget_tokens, OpenAI/xAI
|
||||
// reasoning_effort, Google thinking_config, etc.). Models that do not
|
||||
// support reasoning silently ignore it.
|
||||
//
|
||||
// Allowed values: "" (no reasoning, default), "low", "medium", "high".
|
||||
Reasoning string
|
||||
}
|
||||
|
||||
// Response is a completion response at the provider level.
|
||||
@@ -87,6 +96,11 @@ type Response struct {
|
||||
Text string
|
||||
ToolCalls []ToolCall
|
||||
Usage *Usage
|
||||
|
||||
// Thinking holds the model's reasoning/thinking trace, when one was
|
||||
// requested and the provider exposed it. Empty for providers/models
|
||||
// that do not surface a thinking trace.
|
||||
Thinking string
|
||||
}
|
||||
|
||||
// Usage captures token consumption.
|
||||
@@ -117,6 +131,7 @@ const (
|
||||
StreamEventToolEnd // Tool call complete
|
||||
StreamEventDone // Stream complete
|
||||
StreamEventError // Error occurred
|
||||
StreamEventThinking // Reasoning/thinking content delta
|
||||
)
|
||||
|
||||
// StreamEvent represents a single event in a streaming response.
|
||||
|
||||
@@ -10,8 +10,32 @@ type requestConfig struct {
|
||||
topP *float64
|
||||
stop []string
|
||||
cacheConfig *cacheConfig
|
||||
reasoning ReasoningLevel
|
||||
}
|
||||
|
||||
// ReasoningLevel selects how much reasoning effort/budget the provider should
|
||||
// spend before answering. Empty string is the default (no reasoning, identical
|
||||
// to historical behavior). Each provider translates this to its native
|
||||
// parameter; models that don't support reasoning silently ignore it.
|
||||
type ReasoningLevel string
|
||||
|
||||
const (
|
||||
// ReasoningLow asks for a small amount of extra reasoning. Maps to
|
||||
// reasoning_effort="low" on OpenAI/xAI, ~1k thinking budget on
|
||||
// Anthropic/Google.
|
||||
ReasoningLow ReasoningLevel = "low"
|
||||
|
||||
// ReasoningMedium asks for a moderate amount. Maps to reasoning_effort
|
||||
// ="medium" on OpenAI, ~8k thinking budget on Anthropic/Google. xAI
|
||||
// remaps medium to its only-other-option, "high".
|
||||
ReasoningMedium ReasoningLevel = "medium"
|
||||
|
||||
// ReasoningHigh asks for the most reasoning the provider exposes.
|
||||
// Maps to reasoning_effort="high" on OpenAI/xAI, ~24k thinking budget
|
||||
// on Anthropic/Google.
|
||||
ReasoningHigh ReasoningLevel = "high"
|
||||
)
|
||||
|
||||
// cacheConfig holds prompt-caching settings. nil = disabled.
|
||||
type cacheConfig struct {
|
||||
enabled bool
|
||||
@@ -42,6 +66,21 @@ func WithStop(sequences ...string) RequestOption {
|
||||
return func(c *requestConfig) { c.stop = sequences }
|
||||
}
|
||||
|
||||
// WithReasoning asks the model to spend extra reasoning budget on the
|
||||
// response. Each provider maps the level to its native shape:
|
||||
//
|
||||
// - Anthropic: thinking.budget_tokens (low ~ 1024, medium ~ 8000, high ~ 24000)
|
||||
// - OpenAI / xAI / Groq: reasoning_effort string (xAI remaps medium to high)
|
||||
// - Google: thinking_config.thinking_budget (same budget as Anthropic)
|
||||
// - DeepSeek (reasoner): always-on regardless; this option is a no-op
|
||||
// - Models without reasoning support: silently ignored
|
||||
//
|
||||
// Reasoning content (when surfaced by the provider) appears on
|
||||
// Response.Thinking, and is also streamed as StreamEventThinking events.
|
||||
func WithReasoning(level ReasoningLevel) RequestOption {
|
||||
return func(c *requestConfig) { c.reasoning = level }
|
||||
}
|
||||
|
||||
// WithPromptCaching enables automatic prompt-caching markers on providers
|
||||
// that support it (currently Anthropic). On providers that don't support
|
||||
// explicit cache markers (OpenAI, Google), this is a no-op.
|
||||
|
||||
@@ -7,6 +7,10 @@ type Response struct {
|
||||
// Text is the assistant's text content. Empty if only tool calls.
|
||||
Text string
|
||||
|
||||
// Thinking is the assistant's reasoning/thinking trace, when reasoning
|
||||
// was requested and the provider exposed it. Empty otherwise.
|
||||
Thinking string
|
||||
|
||||
// ToolCalls contains any tool invocations the assistant requested.
|
||||
ToolCalls []ToolCall
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ const (
|
||||
StreamEventToolEnd = provider.StreamEventToolEnd
|
||||
StreamEventDone = provider.StreamEventDone
|
||||
StreamEventError = provider.StreamEventError
|
||||
StreamEventThinking = provider.StreamEventThinking
|
||||
)
|
||||
|
||||
// StreamEvent represents a single event in a streaming response.
|
||||
|
||||
@@ -25,5 +25,16 @@ func New(apiKey, baseURL string) *Provider {
|
||||
SupportsVision: func(m string) bool {
|
||||
return strings.Contains(m, "vision")
|
||||
},
|
||||
// Reasoning is supported on grok-3-mini and grok-4 family. The xAI
|
||||
// API only accepts low|high (no medium); we map medium up to high.
|
||||
SupportsReasoning: func(m string) bool {
|
||||
return strings.Contains(m, "grok-3-mini") || strings.Contains(m, "grok-4")
|
||||
},
|
||||
MapReasoningEffort: func(level string) string {
|
||||
if level == "medium" {
|
||||
return "high"
|
||||
}
|
||||
return level
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2,7 +2,11 @@ package xai_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/openaicompat"
|
||||
@@ -10,12 +14,75 @@ import (
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/xai"
|
||||
)
|
||||
|
||||
// newReasoningServer is a httptest server that records the request body and
|
||||
// returns a minimal valid completion. Used to assert the reasoning_effort
|
||||
// field that lands on the wire.
|
||||
func newReasoningServer(t *testing.T) (*httptest.Server, *[]byte) {
|
||||
t.Helper()
|
||||
var body []byte
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
b, _ := io.ReadAll(r.Body)
|
||||
body = b
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = io.WriteString(w, `{"id":"x","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}}`)
|
||||
}))
|
||||
return srv, &body
|
||||
}
|
||||
|
||||
// readEffort returns the value of the "reasoning_effort" field in the JSON
|
||||
// body, or "" if absent.
|
||||
func readEffort(t *testing.T, body []byte) string {
|
||||
t.Helper()
|
||||
if len(body) == 0 {
|
||||
return ""
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(body, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal body: %v", err)
|
||||
}
|
||||
if v, ok := parsed["reasoning_effort"]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func TestNew_Basic(t *testing.T) {
|
||||
if p := xai.New("key", ""); p == nil {
|
||||
t.Fatal("New returned nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRules_ReasoningGate(t *testing.T) {
|
||||
srv, body := newReasoningServer(t)
|
||||
defer srv.Close()
|
||||
|
||||
// grok-3-mini: reasoning supported, medium maps to high.
|
||||
p := xai.New("k", srv.URL)
|
||||
req := provider.Request{
|
||||
Model: "grok-3-mini",
|
||||
Messages: []provider.Message{{Role: "user", Content: "?"}},
|
||||
Reasoning: "medium",
|
||||
}
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
if effort := readEffort(t, *body); effort != "high" {
|
||||
t.Errorf("grok-3-mini medium → effort=%q, want \"high\"", effort)
|
||||
}
|
||||
|
||||
// grok-2 (no reasoning): effort must NOT be sent.
|
||||
req.Model = "grok-2"
|
||||
*body = nil
|
||||
if _, err := p.Complete(context.Background(), req); err != nil {
|
||||
t.Fatalf("Complete: %v", err)
|
||||
}
|
||||
if effort := readEffort(t, *body); effort != "" {
|
||||
t.Errorf("grok-2 → effort=%q, want absent", effort)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRules_Grok2RejectsImages(t *testing.T) {
|
||||
p := xai.New("key", "")
|
||||
req := provider.Request{
|
||||
|
||||
Reference in New Issue
Block a user