74474c6da0
A failover chain previously treated a successful-but-empty completion (no content parts and no tool calls — a "stop with nothing") as a valid result and returned it. The agent loop then ended the run with empty output, and the configured backup models were never tried because no error was raised. This let a single flaky model silently terminate an agent/skill run with no answer (observed in the wild with ollama-cloud/glm-5.2 returning empty completions right after a large tool/think turn). - Add llm.ErrEmptyResponse (classified transient) and Response.IsEmpty(): true only when there are no tool calls and no meaningful content (no parts, or whitespace-only text). A media/image part counts as content, so image-only responses are NOT empty. - chain.Generate converts an empty completion into ErrEmptyResponse so the chain fails over to the next target. Unlike an ordinary transient it is NOT retried on the same target (the model just produced it; these calls are expensive) — the chain penalizes health (so a persistently-empty target benches) and advances immediately. - When every target returns empty the call fails with ErrChainExhausted joined to ErrEmptyResponse — a visible error instead of a hollow success. Single-element chains therefore also surface empties as errors. Stream path is unchanged (can't inspect content before the consumer reads it). Tests: Response.IsEmpty table; chain fails over past an empty head; all-empty chain returns ErrChainExhausted/ErrEmptyResponse; repeated empties bench the target across requests. Full suite green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
115 lines
3.5 KiB
Go
115 lines
3.5 KiB
Go
package llm
|
|
|
|
import "strings"
|
|
|
|
// FinishReason explains why generation stopped.
|
|
type FinishReason string
|
|
|
|
const (
|
|
// FinishStop: the model completed its answer (or hit a stop sequence).
|
|
FinishStop FinishReason = "stop"
|
|
// FinishLength: the MaxTokens (or context) limit was hit.
|
|
FinishLength FinishReason = "length"
|
|
// FinishToolCalls: the model stopped to request tool invocations.
|
|
FinishToolCalls FinishReason = "tool_calls"
|
|
// FinishContentFilter: the provider suppressed content.
|
|
FinishContentFilter FinishReason = "content_filter"
|
|
// FinishOther: any provider-specific reason not mapped above.
|
|
FinishOther FinishReason = "other"
|
|
)
|
|
|
|
// Usage reports token accounting for one request. InputTokens and
|
|
// OutputTokens are always totals; the detail fields break out portions of
|
|
// those totals where the provider reports them (0 = not reported).
|
|
type Usage struct {
|
|
InputTokens int
|
|
OutputTokens int
|
|
|
|
// CacheReadTokens is the portion of InputTokens served from the
|
|
// provider's prompt cache.
|
|
CacheReadTokens int
|
|
// CacheWriteTokens is the portion of InputTokens written to the
|
|
// provider's prompt cache.
|
|
CacheWriteTokens int
|
|
// ReasoningTokens is the portion of OutputTokens spent on
|
|
// thinking/reasoning.
|
|
ReasoningTokens int
|
|
}
|
|
|
|
// Total returns input plus output tokens.
|
|
func (u Usage) Total() int { return u.InputTokens + u.OutputTokens }
|
|
|
|
// Add accumulates another usage record (used by agents summing steps).
|
|
func (u *Usage) Add(o Usage) {
|
|
u.InputTokens += o.InputTokens
|
|
u.OutputTokens += o.OutputTokens
|
|
u.CacheReadTokens += o.CacheReadTokens
|
|
u.CacheWriteTokens += o.CacheWriteTokens
|
|
u.ReasoningTokens += o.ReasoningTokens
|
|
}
|
|
|
|
// Response is the canonical generation result.
|
|
type Response struct {
|
|
// Parts is the response content (text, and for multimodal-output models,
|
|
// other media).
|
|
Parts []Part
|
|
|
|
// ToolCalls are the tool invocations the model requested, if any.
|
|
ToolCalls []ToolCall
|
|
|
|
FinishReason FinishReason
|
|
Usage Usage
|
|
|
|
// Model identifies the resolved target that produced this response as
|
|
// "provider/model-id". With failover chains this names the element that
|
|
// actually served the request.
|
|
Model string
|
|
|
|
// Raw is the provider-native response object, an escape hatch for
|
|
// provider-specific fields. May be nil; never required for normal use.
|
|
Raw any
|
|
}
|
|
|
|
// Text returns the concatenation of all text parts in the response.
|
|
func (r *Response) Text() string {
|
|
var b strings.Builder
|
|
for _, p := range r.Parts {
|
|
if t, ok := p.(TextPart); ok {
|
|
b.WriteString(t.Text)
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
// Message converts the response into an assistant message suitable for
|
|
// appending to a conversation history.
|
|
func (r *Response) Message() Message {
|
|
return Message{Role: RoleAssistant, Parts: r.Parts, ToolCalls: r.ToolCalls}
|
|
}
|
|
|
|
// IsEmpty reports whether the response carries no usable output: no tool
|
|
// calls and no meaningful content (no parts at all, or only whitespace
|
|
// text). A media/image part — or any non-text part — counts as content, so
|
|
// an image-only response is NOT empty. A "stop with nothing" like this is
|
|
// never a valid completion for an agent step or a Generate call; failover
|
|
// chains treat it as a per-target failure (see ErrEmptyResponse).
|
|
func (r *Response) IsEmpty() bool {
|
|
if r == nil {
|
|
return true
|
|
}
|
|
if len(r.ToolCalls) > 0 {
|
|
return false
|
|
}
|
|
for _, p := range r.Parts {
|
|
if t, ok := p.(TextPart); ok {
|
|
if strings.TrimSpace(t.Text) != "" {
|
|
return false
|
|
}
|
|
continue
|
|
}
|
|
// Any non-text part (image/media) is meaningful output.
|
|
return false
|
|
}
|
|
return true
|
|
}
|