majordomo/llm/response.go

package llm

import "strings"

// FinishReason explains why generation stopped.
type FinishReason string

const (
	// FinishStop: the model completed its answer (or hit a stop sequence).
	FinishStop FinishReason = "stop"
	// FinishLength: the MaxTokens (or context) limit was hit.
	FinishLength FinishReason = "length"
	// FinishToolCalls: the model stopped to request tool invocations.
	FinishToolCalls FinishReason = "tool_calls"
	// FinishContentFilter: the provider suppressed content.
	FinishContentFilter FinishReason = "content_filter"
	// FinishOther: any provider-specific reason not mapped above.
	FinishOther FinishReason = "other"
)

// Usage reports token accounting for one request. InputTokens and
// OutputTokens are always totals; the detail fields break out portions of
// those totals where the provider reports them (0 = not reported).
type Usage struct {
	InputTokens  int
	OutputTokens int

	// CacheReadTokens is the portion of InputTokens served from the
	// provider's prompt cache.
	CacheReadTokens int
	// CacheWriteTokens is the portion of InputTokens written to the
	// provider's prompt cache.
	CacheWriteTokens int
	// ReasoningTokens is the portion of OutputTokens spent on
	// thinking/reasoning.
	ReasoningTokens int
}

// Total returns input plus output tokens.
func (u Usage) Total() int { return u.InputTokens + u.OutputTokens }

// Add accumulates another usage record (used by agents summing steps).
func (u *Usage) Add(o Usage) {
	u.InputTokens += o.InputTokens
	u.OutputTokens += o.OutputTokens
	u.CacheReadTokens += o.CacheReadTokens
	u.CacheWriteTokens += o.CacheWriteTokens
	u.ReasoningTokens += o.ReasoningTokens
}

// Response is the canonical generation result.
type Response struct {
	// Parts is the response content (text, and for multimodal-output models,
	// other media).
	Parts []Part

	// ToolCalls are the tool invocations the model requested, if any.
	ToolCalls []ToolCall

	FinishReason FinishReason
	Usage        Usage

	// Model identifies the resolved target that produced this response as
	// "provider/model-id". With failover chains this names the element that
	// actually served the request.
	Model string

	// Raw is the provider-native response object, an escape hatch for
	// provider-specific fields. May be nil; never required for normal use.
	Raw any
}

// Text returns the concatenation of all text parts in the response.
func (r *Response) Text() string {
	var b strings.Builder
	for _, p := range r.Parts {
		if t, ok := p.(TextPart); ok {
			b.WriteString(t.Text)
		}
	}
	return b.String()
}

// Message converts the response into an assistant message suitable for
// appending to a conversation history.
func (r *Response) Message() Message {
	return Message{Role: RoleAssistant, Parts: r.Parts, ToolCalls: r.ToolCalls}
}