Files
go-llm/v2/pricing.go
Steve Dudenhoeffer 5b687839b2
All checks were successful
CI / Lint (pull_request) Successful in 10m18s
CI / Root Module (pull_request) Successful in 11m4s
CI / V2 Module (pull_request) Successful in 11m5s
feat: comprehensive token usage tracking for V2
Add provider-specific usage details, fix streaming usage, and return
usage from all high-level APIs (Chat.Send, Generate[T], Agent.Run).

Breaking changes:
- Chat.Send/SendMessage/SendWithImages now return (string, *Usage, error)
- Generate[T]/GenerateWith[T] now return (T, *Usage, error)
- Agent.Run/RunMessages now return (string, *Usage, error)

New features:
- Usage.Details map for provider-specific token breakdowns
  (reasoning, cached, audio, thoughts tokens)
- OpenAI streaming now captures usage via StreamOptions.IncludeUsage
- Google streaming now captures UsageMetadata from final chunk
- UsageTracker.Details() for accumulated detail totals
- ModelPricing and PricingRegistry for cost computation

Closes #2

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 04:33:18 +00:00

84 lines
2.2 KiB
Go

package llm
import "sync"
// ModelPricing defines per-token pricing for a model.
type ModelPricing struct {
InputPricePerToken float64 // USD per input token
OutputPricePerToken float64 // USD per output token
CachedInputPricePerToken float64 // USD per cached input token (0 = same as input)
}
// Cost computes the total USD cost from a Usage.
// When CachedInputPricePerToken is set and the usage includes cached_input_tokens,
// those tokens are charged at the cached rate instead of the regular input rate.
func (mp ModelPricing) Cost(u *Usage) float64 {
if u == nil {
return 0
}
inputTokens := u.InputTokens
cachedTokens := 0
if u.Details != nil {
cachedTokens = u.Details[UsageDetailCachedInputTokens]
}
var cost float64
if mp.CachedInputPricePerToken > 0 && cachedTokens > 0 {
regularInput := inputTokens - cachedTokens
if regularInput < 0 {
regularInput = 0
}
cost += float64(regularInput) * mp.InputPricePerToken
cost += float64(cachedTokens) * mp.CachedInputPricePerToken
} else {
cost += float64(inputTokens) * mp.InputPricePerToken
}
cost += float64(u.OutputTokens) * mp.OutputPricePerToken
return cost
}
// PricingRegistry maps model names to their pricing.
// Callers populate it with the models and prices relevant to their use case.
type PricingRegistry struct {
mu sync.RWMutex
models map[string]ModelPricing
}
// NewPricingRegistry creates an empty pricing registry.
func NewPricingRegistry() *PricingRegistry {
return &PricingRegistry{
models: make(map[string]ModelPricing),
}
}
// Set registers pricing for a model.
func (pr *PricingRegistry) Set(model string, pricing ModelPricing) {
pr.mu.Lock()
defer pr.mu.Unlock()
pr.models[model] = pricing
}
// Has returns true if pricing is registered for the given model.
func (pr *PricingRegistry) Has(model string) bool {
pr.mu.RLock()
defer pr.mu.RUnlock()
_, ok := pr.models[model]
return ok
}
// Cost computes the USD cost for the given model and usage.
// Returns 0 if the model is not registered.
func (pr *PricingRegistry) Cost(model string, u *Usage) float64 {
pr.mu.RLock()
pricing, ok := pr.models[model]
pr.mu.RUnlock()
if !ok {
return 0
}
return pricing.Cost(u)
}