feat(chain): fail over on empty/degenerate responses

A failover chain previously treated a successful-but-empty completion (no content parts and no tool calls — a "stop with nothing") as a valid result and returned it. The agent loop then ended the run with empty output, and the configured backup models were never tried because no error was raised. This let a single flaky model silently terminate an agent/skill run with no answer (observed in the wild with ollama-cloud/glm-5.2 returning empty completions right after a large tool/think turn). - Add llm.ErrEmptyResponse (classified transient) and Response.IsEmpty(): true only when there are no tool calls and no meaningful content (no parts, or whitespace-only text). A media/image part counts as content, so image-only responses are NOT empty. - chain.Generate converts an empty completion into ErrEmptyResponse so the chain fails over to the next target. Unlike an ordinary transient it is NOT retried on the same target (the model just produced it; these calls are expensive) — the chain penalizes health (so a persistently-empty target benches) and advances immediately. - When every target returns empty the call fails with ErrChainExhausted joined to ErrEmptyResponse — a visible error instead of a hollow success. Single-element chains therefore also surface empties as errors. Stream path is unchanged (can't inspect content before the consumer reads it). Tests: Response.IsEmpty table; chain fails over past an empty head; all-empty chain returns ErrChainExhausted/ErrEmptyResponse; repeated empties bench the target across requests. Full suite green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 10:35:07 -04:00
parent 3e81fbd540
commit 74474c6da0
6 changed files with 217 additions and 1 deletions
@@ -64,10 +64,24 @@ func (c *chain) Capabilities() llm.Capabilities {
 }

 // Generate tries each target per the chain semantics above.
+//
+// A target that returns, without error, an empty/degenerate response (no
+// content and no tool calls — see Response.IsEmpty) is treated as a
+// per-target failure (llm.ErrEmptyResponse): the chain benches it and
+// advances to the next element. This stops a single flaky model from
+// silently ending an agent run with nothing; if every target comes back
+// empty the call fails with ErrChainExhausted rather than a hollow success.
 func (c *chain) Generate(ctx context.Context, req llm.Request, opts ...llm.Option) (*llm.Response, error) {
 	req = req.Apply(opts...)
 	return chainDo(ctx, c, req, func(ctx context.Context, t chainTarget, nreq llm.Request) (*llm.Response, error) {
-		return t.model.Generate(ctx, nreq)
+		resp, err := t.model.Generate(ctx, nreq)
+		if err != nil {
+			return nil, err
+		}
+		if resp.IsEmpty() {
+			return nil, llm.ErrEmptyResponse
+		}
+		return resp, nil
 	})
 }

@@ -124,6 +138,18 @@ func chainDo[T any](ctx context.Context, c *chain, req llm.Request, attempt func
 				return result, nil
 			}

+			if errors.Is(err, llm.ErrEmptyResponse) {
+				// The target returned successfully but with nothing usable.
+				// Don't spend an (expensive) same-target retry — it just did
+				// this. Penalize health so a persistently-empty target
+				// benches and is skipped next time, then advance to the next
+				// element immediately.
+				benched := c.tracker.ReportFailure(t.key)
+				observe(FailoverEvent{Target: t.key, Err: err, Class: llm.ClassTransient, Attempt: attemptN, Benched: benched})
+				failures = append(failures, fmt.Errorf("%s: %w", t.key, err))
+				break
+			}
+
 			class := c.cfg.classify(err)
 			if class == llm.ClassPermanent {
 				observe(FailoverEvent{Target: t.key, Err: err, Class: class, Attempt: attemptN})