Files
majordomo/chain.go
T
steve 74474c6da0
CI / Tidy (push) Successful in 9m26s
CI / Build & Test (push) Successful in 10m29s
feat(chain): fail over on empty/degenerate responses
A failover chain previously treated a successful-but-empty completion (no
content parts and no tool calls — a "stop with nothing") as a valid result
and returned it. The agent loop then ended the run with empty output, and
the configured backup models were never tried because no error was raised.
This let a single flaky model silently terminate an agent/skill run with
no answer (observed in the wild with ollama-cloud/glm-5.2 returning empty
completions right after a large tool/think turn).

- Add llm.ErrEmptyResponse (classified transient) and Response.IsEmpty():
  true only when there are no tool calls and no meaningful content (no
  parts, or whitespace-only text). A media/image part counts as content,
  so image-only responses are NOT empty.
- chain.Generate converts an empty completion into ErrEmptyResponse so the
  chain fails over to the next target. Unlike an ordinary transient it is
  NOT retried on the same target (the model just produced it; these calls
  are expensive) — the chain penalizes health (so a persistently-empty
  target benches) and advances immediately.
- When every target returns empty the call fails with ErrChainExhausted
  joined to ErrEmptyResponse — a visible error instead of a hollow success.
  Single-element chains therefore also surface empties as errors.

Stream path is unchanged (can't inspect content before the consumer reads
it). Tests: Response.IsEmpty table; chain fails over past an empty head;
all-empty chain returns ErrChainExhausted/ErrEmptyResponse; repeated
empties bench the target across requests. Full suite green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 10:35:07 -04:00

182 lines
6.9 KiB
Go

package majordomo
import (
"context"
"errors"
"fmt"
"gitea.stevedudenhoeffer.com/steve/majordomo/health"
"gitea.stevedudenhoeffer.com/steve/majordomo/llm"
"gitea.stevedudenhoeffer.com/steve/majordomo/media"
)
// ErrChainExhausted reports that every element of a failover chain failed
// (or was skipped while backed off). It is always joined with the
// per-target errors.
var ErrChainExhausted = errors.New("all chain targets failed")
// chainTarget is one resolved element of a failover chain.
type chainTarget struct {
// key identifies the target for health tracking: "provider/model-id".
key string
model llm.Model
}
// chain implements llm.Model over an ordered list of targets with
// health-tracked failover. A single-element spec is a chain of one — the
// behavior (retry-on-transient, backoff bookkeeping) is identical, so
// callers never branch on what Parse returned.
//
// Semantics (ADR-0006, ADR-0008):
// - Targets are tried head-to-tail; targets currently backed off are
// skipped.
// - A transient error is retried on the same target (ChainConfig
// TransientRetries, default 1). Every failed attempt counts toward the
// target's consecutive-failure threshold; when the tracker benches the
// target (default: 2 consecutive transient failures → exponential
// capped cooldown) the chain stops retrying it and advances.
// - Model-not-found advances without penalizing health. Other permanent
// errors fail fast by default (AdvanceOnPermanent flips this).
// - Any success resets the target's health.
// - When every target fails or is skipped, the returned error joins
// ErrChainExhausted with each target's reason.
type chain struct {
targets []chainTarget
tracker *health.Tracker
cfg ChainConfig
}
// Targets returns the resolved "provider/model" keys in chain order
// (diagnostics and tests).
func (c *chain) Targets() []string {
keys := make([]string, len(c.targets))
for i, t := range c.targets {
keys[i] = t.key
}
return keys
}
// Capabilities reports the head element's capabilities — the chain's
// preferred target (ADR-0008). Per-attempt media normalization uses the
// actual target's capabilities, not this value.
func (c *chain) Capabilities() llm.Capabilities {
return c.targets[0].model.Capabilities()
}
// Generate tries each target per the chain semantics above.
//
// A target that returns, without error, an empty/degenerate response (no
// content and no tool calls — see Response.IsEmpty) is treated as a
// per-target failure (llm.ErrEmptyResponse): the chain benches it and
// advances to the next element. This stops a single flaky model from
// silently ending an agent run with nothing; if every target comes back
// empty the call fails with ErrChainExhausted rather than a hollow success.
func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
req = req.Apply(opts...)
return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (*llm.Response, error) {
resp, err := t.model.Generate(ctx, nreq)
if err != nil {
return nil, err
}
if resp.IsEmpty() {
return nil, llm.ErrEmptyResponse
}
return resp, nil
})
}
// Stream tries each target per the chain semantics. Failover applies to
// establishing the stream; once a stream is open, mid-stream errors
// propagate to the consumer rather than restarting on another target
// (replaying half-delivered output would duplicate content).
func (c *chain) Stream(ctx context.Context, req llm.Request, opts ...llm.Option) (llm.Stream, error) {
req = req.Apply(opts...)
return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (llm.Stream, error) {
return t.model.Stream(ctx, nreq)
})
}
// chainDo runs the head-to-tail failover algorithm around an attempt
// function, generic over the result type (response vs stream). Before each
// target is tried, the request's media is normalized against THAT target's
// capabilities (ADR-0008/0009) — a request that cannot be made to fit one
// target advances to the next without a health penalty.
func chainDo[T any](ctx context.Context, c *chain, req llm.Request, attempt func(context.Context, chainTarget, llm.Request) (T, error)) (T, error) {
var zero T
var failures []error
observe := func(ev FailoverEvent) {
if c.cfg.Observer != nil {
c.cfg.Observer(ev)
}
}
for _, t := range c.targets {
if !c.tracker.Available(t.key) {
until := c.tracker.BackedOffUntil(t.key)
failures = append(failures, fmt.Errorf("%s: skipped (backed off until %s)", t.key, until.Format("15:04:05.000")))
observe(FailoverEvent{Target: t.key, Skipped: true})
continue
}
nreq, err := media.Normalize(req, t.model.Capabilities())
if err != nil {
// Always ErrUnsupported-wrapped: this target cannot take the
// request by declaration. Advance, no health penalty.
failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
continue
}
retries := c.cfg.retries()
for attemptN := 0; ; attemptN++ {
if err := ctx.Err(); err != nil {
return zero, err
}
result, err := attempt(ctx, t, nreq)
if err == nil {
c.tracker.ReportSuccess(t.key)
return result, nil
}
if errors.Is(err, llm.ErrEmptyResponse) {
// The target returned successfully but with nothing usable.
// Don't spend an (expensive) same-target retry — it just did
// this. Penalize health so a persistently-empty target
// benches and is skipped next time, then advance to the next
// element immediately.
benched := c.tracker.ReportFailure(t.key)
observe(FailoverEvent{Target: t.key, Err: err, Class: llm.ClassTransient, Attempt: attemptN, Benched: benched})
failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
break
}
class := c.cfg.classify(err)
if class == llm.ClassPermanent {
observe(FailoverEvent{Target: t.key, Err: err, Class: class, Attempt: attemptN})
if errors.Is(err, llm.ErrModelNotFound) || errors.Is(err, llm.ErrUnsupported) || c.cfg.AdvanceOnPermanent {
// Not a health problem (or policy says keep going):
// advance without penalizing the target.
failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
break
}
// Failing over cannot fix a bad request or bad credentials.
return zero, fmt.Errorf("%s: %w", t.key, err)
}
// Transient: every failed attempt counts toward the target's
// consecutive-failure threshold. Retry the same target while
// attempts remain — but advance as soon as the tracker benches
// it (a freshly backed-off target is not worth more retries).
benched := c.tracker.ReportFailure(t.key)
observe(FailoverEvent{Target: t.key, Err: err, Class: class, Attempt: attemptN, Benched: benched})
if !benched && attemptN < retries {
continue
}
failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
break
}
}
return zero, errors.Join(append([]error{ErrChainExhausted}, failures...)...)
}