feat: comprehensive token usage tracking for V2
Add provider-specific usage details, fix streaming usage, and return usage from all high-level APIs (Chat.Send, Generate[T], Agent.Run). Breaking changes: - Chat.Send/SendMessage/SendWithImages now return (string, *Usage, error) - Generate[T]/GenerateWith[T] now return (T, *Usage, error) - Agent.Run/RunMessages now return (string, *Usage, error) New features: - Usage.Details map for provider-specific token breakdowns (reasoning, cached, audio, thoughts tokens) - OpenAI streaming now captures usage via StreamOptions.IncludeUsage - Google streaming now captures UsageMetadata from final chunk - UsageTracker.Details() for accumulated detail totals - ModelPricing and PricingRegistry for cost computation Closes #2 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
83
v2/pricing.go
Normal file
83
v2/pricing.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package llm
|
||||
|
||||
import "sync"
|
||||
|
||||
// ModelPricing defines per-token pricing for a model.
|
||||
type ModelPricing struct {
|
||||
InputPricePerToken float64 // USD per input token
|
||||
OutputPricePerToken float64 // USD per output token
|
||||
CachedInputPricePerToken float64 // USD per cached input token (0 = same as input)
|
||||
}
|
||||
|
||||
// Cost computes the total USD cost from a Usage.
|
||||
// When CachedInputPricePerToken is set and the usage includes cached_input_tokens,
|
||||
// those tokens are charged at the cached rate instead of the regular input rate.
|
||||
func (mp ModelPricing) Cost(u *Usage) float64 {
|
||||
if u == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
inputTokens := u.InputTokens
|
||||
cachedTokens := 0
|
||||
if u.Details != nil {
|
||||
cachedTokens = u.Details[UsageDetailCachedInputTokens]
|
||||
}
|
||||
|
||||
var cost float64
|
||||
|
||||
if mp.CachedInputPricePerToken > 0 && cachedTokens > 0 {
|
||||
regularInput := inputTokens - cachedTokens
|
||||
if regularInput < 0 {
|
||||
regularInput = 0
|
||||
}
|
||||
cost += float64(regularInput) * mp.InputPricePerToken
|
||||
cost += float64(cachedTokens) * mp.CachedInputPricePerToken
|
||||
} else {
|
||||
cost += float64(inputTokens) * mp.InputPricePerToken
|
||||
}
|
||||
|
||||
cost += float64(u.OutputTokens) * mp.OutputPricePerToken
|
||||
|
||||
return cost
|
||||
}
|
||||
|
||||
// PricingRegistry maps model names to their pricing.
|
||||
// Callers populate it with the models and prices relevant to their use case.
|
||||
type PricingRegistry struct {
|
||||
mu sync.RWMutex
|
||||
models map[string]ModelPricing
|
||||
}
|
||||
|
||||
// NewPricingRegistry creates an empty pricing registry.
|
||||
func NewPricingRegistry() *PricingRegistry {
|
||||
return &PricingRegistry{
|
||||
models: make(map[string]ModelPricing),
|
||||
}
|
||||
}
|
||||
|
||||
// Set registers pricing for a model.
|
||||
func (pr *PricingRegistry) Set(model string, pricing ModelPricing) {
|
||||
pr.mu.Lock()
|
||||
defer pr.mu.Unlock()
|
||||
pr.models[model] = pricing
|
||||
}
|
||||
|
||||
// Has returns true if pricing is registered for the given model.
|
||||
func (pr *PricingRegistry) Has(model string) bool {
|
||||
pr.mu.RLock()
|
||||
defer pr.mu.RUnlock()
|
||||
_, ok := pr.models[model]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Cost computes the USD cost for the given model and usage.
|
||||
// Returns 0 if the model is not registered.
|
||||
func (pr *PricingRegistry) Cost(model string, u *Usage) float64 {
|
||||
pr.mu.RLock()
|
||||
pricing, ok := pr.models[model]
|
||||
pr.mu.RUnlock()
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
return pricing.Cost(u)
|
||||
}
|
||||
Reference in New Issue
Block a user