ae8e194fad
Parse("a,b,c") now returns one composite *llm.Model that tries each model
in order, retrying transient failures, benching dead models, and failing
over to the next. Comma-free specs are completely unchanged.
- classify.go: Classify(err) ErrKind + IsTransient(err) error classifier
mapping anthropic (typed Is*Err helpers + RequestError status),
openai-go (*openai.Error status), openaicompat.FeatureUnsupportedError,
context errors, and ollama "HTTP <code>" strings to
transient/auth-dead/request-specific/unknown.
- failover.go: failoverProvider (satisfies provider.Provider) wrapped into a
*Model via NewClient. Process-wide mutex-guarded modelHealth bench
registry keyed by concrete spec, with cooldowns and a control API
(ListBenched/BenchModel/UnbenchModel/IsBenched). NewFailoverModel +
ParseChain constructors, FailoverOption config, FailoverObserver (carries
the full request), and configurable package-level defaults.
- parse.go: comma-aware Parse splits into a failover chain; alias/resolver
targets that expand to comma chains are routed through the comma-aware
path and flattened.
All access to global health is mutex-guarded; tests reset it via
resetHealthForTest and pass under go test -race.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
208 lines
6.7 KiB
Go
208 lines
6.7 KiB
Go
package llm
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"strings"
|
|
|
|
anth "github.com/liushuangls/go-anthropic/v2"
|
|
"github.com/openai/openai-go"
|
|
|
|
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/openaicompat"
|
|
)
|
|
|
|
// ErrKind classifies a provider error for failover decision-making.
|
|
//
|
|
// Why: failover must decide, per error, whether to retry the same model
|
|
// (transient), bench it as broken (auth/model dead), or fail over without
|
|
// benching (this request's fault). Without a classifier every error looks
|
|
// the same and we'd either thrash a dead model or bench a healthy one.
|
|
// What: an enum of the four outcomes the failover algorithm distinguishes.
|
|
// Test: see classify_test.go — every branch is table-tested with faked SDK errors.
|
|
type ErrKind int
|
|
|
|
const (
|
|
// ErrUnknown is an unrecognized error. Failover treats it as transient
|
|
// (conservative — retry then fail over), EXCEPT context.Canceled which
|
|
// the caller special-cases as an abort.
|
|
ErrUnknown ErrKind = iota
|
|
// ErrTransient is a temporary failure (429/5xx/timeout): retry, then
|
|
// bench-and-fail-over if retries are exhausted.
|
|
ErrTransient
|
|
// ErrAuthDead is an auth failure or model-not-found (401/403/404): the
|
|
// model is unusable; bench immediately and fail over.
|
|
ErrAuthDead
|
|
// ErrRequestSpecific is the caller's fault for THIS request (400/413/422,
|
|
// unsupported feature): fail over to try a more capable model, but do NOT
|
|
// bench — the model itself is healthy.
|
|
ErrRequestSpecific
|
|
)
|
|
|
|
// classifyStatus maps an HTTP status code to an ErrKind.
|
|
//
|
|
// Why: openai-go and anthropic RequestError both expose a numeric StatusCode;
|
|
// centralizing the mapping keeps the per-SDK branches thin and consistent.
|
|
// What: 408/409/429/5xx → transient, 401/403/404 → auth-dead, 400/413/422 →
|
|
// request-specific, anything else → unknown.
|
|
// Test: covered indirectly via Classify table tests for each SDK.
|
|
func classifyStatus(code int) ErrKind {
|
|
switch code {
|
|
case 408, 409, 429, 500, 502, 503, 504:
|
|
return ErrTransient
|
|
case 401, 403, 404:
|
|
return ErrAuthDead
|
|
case 400, 413, 422:
|
|
return ErrRequestSpecific
|
|
default:
|
|
return ErrUnknown
|
|
}
|
|
}
|
|
|
|
// Classify inspects a provider error and returns its ErrKind.
|
|
//
|
|
// Why: the failover composite needs typed, status-code-aware classification to
|
|
// retry/bench/skip correctly across the anthropic, openai-compat, and ollama
|
|
// providers, each of which surfaces errors differently.
|
|
// What: prefers anthropic's typed Is*Err helpers, falls back to numeric status
|
|
// codes (openai-go, anthropic RequestError), then the openaicompat
|
|
// FeatureUnsupportedError, context errors, and finally an ollama HTTP-string
|
|
// fallback; unrecognized errors are ErrUnknown.
|
|
// Test: classify_test.go faked SDK errors exercise every branch.
|
|
func Classify(err error) ErrKind {
|
|
if err == nil {
|
|
return ErrUnknown
|
|
}
|
|
|
|
// context.Canceled is reported as ErrUnknown here; the failover algorithm
|
|
// special-cases it as an abort before consulting the kind.
|
|
if errors.Is(err, context.Canceled) {
|
|
return ErrUnknown
|
|
}
|
|
if errors.Is(err, context.DeadlineExceeded) {
|
|
return ErrTransient
|
|
}
|
|
|
|
// FeatureUnsupportedError is a permanent, request-shaped failure.
|
|
var featErr *openaicompat.FeatureUnsupportedError
|
|
if errors.As(err, &featErr) {
|
|
return ErrRequestSpecific
|
|
}
|
|
|
|
// Anthropic APIError: prefer the typed helpers (no StatusCode available).
|
|
var apiErr *anth.APIError
|
|
if errors.As(err, &apiErr) {
|
|
switch {
|
|
case apiErr.IsRateLimitErr(), apiErr.IsOverloadedErr(), apiErr.IsApiErr():
|
|
return ErrTransient
|
|
case apiErr.IsAuthenticationErr(), apiErr.IsPermissionErr(), apiErr.IsNotFoundErr():
|
|
return ErrAuthDead
|
|
case apiErr.IsTooLargeErr(), apiErr.IsInvalidRequestErr():
|
|
return ErrRequestSpecific
|
|
default:
|
|
return ErrUnknown
|
|
}
|
|
}
|
|
|
|
// Anthropic RequestError: status-code based.
|
|
var anthReqErr *anth.RequestError
|
|
if errors.As(err, &anthReqErr) {
|
|
return classifyStatus(anthReqErr.StatusCode)
|
|
}
|
|
|
|
// openai-go (openai/deepseek/moonshot/xai/groq): status-code based.
|
|
var oaiErr *openai.Error
|
|
if errors.As(err, &oaiErr) {
|
|
return classifyStatus(oaiErr.StatusCode)
|
|
}
|
|
|
|
// Ollama: no typed status — fall back to its "ollama: HTTP <code>:" string.
|
|
if k := classifyOllamaString(err.Error()); k != ErrUnknown {
|
|
return k
|
|
}
|
|
|
|
return ErrUnknown
|
|
}
|
|
|
|
// classifyOllamaString extracts an HTTP status from ollama's error string
|
|
// format ("ollama: HTTP <code>: ...") and classifies it.
|
|
//
|
|
// Why: the ollama provider stringifies errors without a typed status code, so
|
|
// failover can only classify by parsing the message.
|
|
// What: looks for "HTTP <code>" in the message and maps the code; returns
|
|
// ErrUnknown when no recognizable status is present.
|
|
// Test: classify_test.go ollama cases cover 5xx/429/401/404/400.
|
|
func classifyOllamaString(msg string) ErrKind {
|
|
const marker = "HTTP "
|
|
idx := strings.Index(msg, marker)
|
|
if idx < 0 {
|
|
return ErrUnknown
|
|
}
|
|
rest := msg[idx+len(marker):]
|
|
// Read up to 3 leading digits.
|
|
end := 0
|
|
for end < len(rest) && end < 3 && rest[end] >= '0' && rest[end] <= '9' {
|
|
end++
|
|
}
|
|
if end == 0 {
|
|
return ErrUnknown
|
|
}
|
|
code := 0
|
|
for i := 0; i < end; i++ {
|
|
code = code*10 + int(rest[i]-'0')
|
|
}
|
|
return classifyStatus(code)
|
|
}
|
|
|
|
// extractStatus best-effort pulls an HTTP status code out of a provider error
|
|
// for structured logging. Returns 0 when none is available.
|
|
//
|
|
// Why: log lines benefit from the numeric status even though classification
|
|
// may use typed helpers; this keeps that detail out of the hot path.
|
|
// What: checks anthropic RequestError and openai-go Error StatusCode fields,
|
|
// then parses ollama's "HTTP <code>" string; returns 0 otherwise.
|
|
// Test: covered indirectly via failover log assertions / manual inspection.
|
|
func extractStatus(err error) int {
|
|
if err == nil {
|
|
return 0
|
|
}
|
|
var anthReqErr *anth.RequestError
|
|
if errors.As(err, &anthReqErr) {
|
|
return anthReqErr.StatusCode
|
|
}
|
|
var oaiErr *openai.Error
|
|
if errors.As(err, &oaiErr) {
|
|
return oaiErr.StatusCode
|
|
}
|
|
const marker = "HTTP "
|
|
msg := err.Error()
|
|
if idx := strings.Index(msg, marker); idx >= 0 {
|
|
rest := msg[idx+len(marker):]
|
|
code, n := 0, 0
|
|
for n < len(rest) && n < 3 && rest[n] >= '0' && rest[n] <= '9' {
|
|
code = code*10 + int(rest[n]-'0')
|
|
n++
|
|
}
|
|
if n > 0 {
|
|
return code
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// IsTransient reports whether an error should be retried/failed-over rather
|
|
// than treated as a hard, model-specific failure.
|
|
//
|
|
// Why: callers (and failover) want a one-call "is this worth retrying?" check
|
|
// that is conservative about unknown errors.
|
|
// What: returns true for ErrTransient and ErrUnknown (conservative), false for
|
|
// ErrAuthDead and ErrRequestSpecific.
|
|
// Test: TestIsTransient asserts 503→true, unknown→true, 401→false, 400→false.
|
|
func IsTransient(err error) bool {
|
|
switch Classify(err) {
|
|
case ErrTransient, ErrUnknown:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|