Files
go-llm/v2/llm.go
T
steve cbaf41f50c
CI / Root Module (push) Failing after 1m30s
CI / Lint (push) Failing after 1m1s
CI / V2 Module (push) Successful in 3m41s
feat(v2): add ReasoningLevel option; thinking/reasoning across providers
Introduces an opt-in level-based reasoning toggle (low/medium/high) that
each provider translates to its native parameter:

- Anthropic: thinking.budget_tokens (1024/8000/24000), with temperature
  forced to default and MaxTokens auto-grown above the budget.
- OpenAI/xAI/Groq via openaicompat: reasoning_effort string, gated by a
  new Rules.SupportsReasoning predicate so non-reasoning models don't
  receive the parameter. xAI uses Rules.MapReasoningEffort to remap
  "medium" to "high" since its API only accepts low|high.
- Google: thinking_config.thinking_budget + include_thoughts:true.
- DeepSeek: SupportsReasoning=false (reasoner is always-on; the
  reasoning_content trace was already extracted via openaicompat).

Reasoning content is surfaced as Response.Thinking on Complete and as
StreamEventThinking deltas during streaming. Provider-side: extracted
from Anthropic thinking content blocks, Google's part.Thought=true
parts, and the non-standard reasoning_content field that DeepSeek and
Groq emit (parsed out of raw JSON since openai-go doesn't type it).

Public API:
  - llm.ReasoningLevel + ReasoningLow/Medium/High constants
  - llm.WithReasoning(level) request option
  - Model.WithReasoning(level) for baked-in defaults
  - provider.Request.Reasoning, provider.Response.Thinking
  - provider.StreamEventThinking

Tests cover Rules-based gating, MapReasoningEffort, reasoning_content
extraction (Complete + Stream), Anthropic budget mapping, and
temperature suppression when thinking is enabled. Existing behavior is
unchanged when Reasoning is the empty string.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 03:58:42 +00:00

249 lines
6.2 KiB
Go

package llm
import (
"context"
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/provider"
)
// Client represents an LLM provider. Create with OpenAI(), Anthropic(), Google().
type Client struct {
p provider.Provider
middleware []Middleware
}
// NewClient creates a Client backed by the given provider.
// Use this to integrate custom provider implementations or for testing.
func NewClient(p provider.Provider) *Client {
return &Client{p: p}
}
// Model returns a Model for the specified model version.
func (c *Client) Model(modelVersion string) *Model {
return &Model{
provider: c.p,
model: modelVersion,
middleware: c.middleware,
}
}
// WithMiddleware returns a new Client with additional middleware applied to all models.
func (c *Client) WithMiddleware(mw ...Middleware) *Client {
c2 := &Client{
p: c.p,
middleware: append(append([]Middleware{}, c.middleware...), mw...),
}
return c2
}
// Model represents a specific model from a provider, ready for completions.
type Model struct {
provider provider.Provider
model string
middleware []Middleware
defaultReasoning ReasoningLevel
}
// WithReasoning returns a copy of the Model that uses the given reasoning
// level by default on every Complete/Stream/Chat call. Per-request use of the
// WithReasoning request option still takes precedence.
func (m *Model) WithReasoning(level ReasoningLevel) *Model {
c := *m
c.defaultReasoning = level
return &c
}
// Complete sends a non-streaming completion request.
func (m *Model) Complete(ctx context.Context, messages []Message, opts ...RequestOption) (Response, error) {
cfg := m.newRequestConfig(opts)
chain := m.buildChain()
return chain(ctx, m.model, messages, cfg)
}
// Stream sends a streaming completion request, returning a StreamReader.
func (m *Model) Stream(ctx context.Context, messages []Message, opts ...RequestOption) (*StreamReader, error) {
cfg := m.newRequestConfig(opts)
req := buildProviderRequest(m.model, messages, cfg)
return newStreamReader(ctx, m.provider, req)
}
// newRequestConfig builds a requestConfig pre-populated with the Model's
// defaults, then applies per-call options on top.
func (m *Model) newRequestConfig(opts []RequestOption) *requestConfig {
cfg := &requestConfig{
reasoning: m.defaultReasoning,
}
for _, opt := range opts {
opt(cfg)
}
return cfg
}
// WithMiddleware returns a new Model with additional middleware applied.
func (m *Model) WithMiddleware(mw ...Middleware) *Model {
return &Model{
provider: m.provider,
model: m.model,
middleware: append(append([]Middleware{}, m.middleware...), mw...),
}
}
func (m *Model) buildChain() CompletionFunc {
// Base handler that calls the provider
base := func(ctx context.Context, model string, messages []Message, cfg *requestConfig) (Response, error) {
req := buildProviderRequest(model, messages, cfg)
resp, err := m.provider.Complete(ctx, req)
if err != nil {
return Response{}, err
}
return convertProviderResponse(resp), nil
}
// Apply middleware in reverse order (first middleware wraps outermost)
chain := base
for i := len(m.middleware) - 1; i >= 0; i-- {
chain = m.middleware[i](chain)
}
return chain
}
func buildProviderRequest(model string, messages []Message, cfg *requestConfig) provider.Request {
req := provider.Request{
Model: model,
Messages: convertMessages(messages),
}
if cfg.temperature != nil {
req.Temperature = cfg.temperature
}
if cfg.maxTokens != nil {
req.MaxTokens = cfg.maxTokens
}
if cfg.topP != nil {
req.TopP = cfg.topP
}
if len(cfg.stop) > 0 {
req.Stop = cfg.stop
}
if cfg.reasoning != "" {
req.Reasoning = string(cfg.reasoning)
}
if cfg.tools != nil {
for _, tool := range cfg.tools.AllTools() {
req.Tools = append(req.Tools, provider.ToolDef{
Name: tool.Name,
Description: tool.Description,
Schema: tool.Schema,
})
}
}
if cfg.cacheConfig != nil && cfg.cacheConfig.enabled {
hints := &provider.CacheHints{LastCacheableMessageIndex: -1}
if len(req.Tools) > 0 {
hints.CacheTools = true
}
for _, m := range messages {
if m.Role == RoleSystem {
hints.CacheSystem = true
break
}
}
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Role != RoleSystem {
hints.LastCacheableMessageIndex = i
break
}
}
req.CacheHints = hints
}
return req
}
func convertMessages(msgs []Message) []provider.Message {
out := make([]provider.Message, len(msgs))
for i, m := range msgs {
pm := provider.Message{
Role: string(m.Role),
Content: m.Content.Text,
ToolCallID: m.ToolCallID,
}
for _, img := range m.Content.Images {
pm.Images = append(pm.Images, provider.Image{
URL: img.URL,
Base64: img.Base64,
ContentType: img.ContentType,
})
}
for _, aud := range m.Content.Audio {
pm.Audio = append(pm.Audio, provider.Audio{
URL: aud.URL,
Base64: aud.Base64,
ContentType: aud.ContentType,
})
}
for _, tc := range m.ToolCalls {
pm.ToolCalls = append(pm.ToolCalls, provider.ToolCall{
ID: tc.ID,
Name: tc.Name,
Arguments: tc.Arguments,
})
}
out[i] = pm
}
return out
}
func convertProviderResponse(resp provider.Response) Response {
r := Response{
Text: resp.Text,
Thinking: resp.Thinking,
}
for _, tc := range resp.ToolCalls {
r.ToolCalls = append(r.ToolCalls, ToolCall{
ID: tc.ID,
Name: tc.Name,
Arguments: tc.Arguments,
})
}
if resp.Usage != nil {
r.Usage = &Usage{
InputTokens: resp.Usage.InputTokens,
OutputTokens: resp.Usage.OutputTokens,
TotalTokens: resp.Usage.TotalTokens,
Details: resp.Usage.Details,
}
}
// Build the assistant message for conversation history
r.message = Message{
Role: RoleAssistant,
Content: Content{Text: resp.Text},
ToolCalls: r.ToolCalls,
}
return r
}
// --- Provider constructors ---
// These are defined here and delegate to provider-specific packages.
// They are set up via init() in the provider packages, or defined directly.
// ClientOption configures a client.
type ClientOption func(*clientConfig)
type clientConfig struct {
baseURL string
}
// WithBaseURL overrides the API base URL.
func WithBaseURL(url string) ClientOption {
return func(c *clientConfig) { c.baseURL = url }
}