feat(failover): model failover chains via comma-separated specs
Parse("a,b,c") now returns one composite *llm.Model that tries each model
in order, retrying transient failures, benching dead models, and failing
over to the next. Comma-free specs are completely unchanged.
- classify.go: Classify(err) ErrKind + IsTransient(err) error classifier
mapping anthropic (typed Is*Err helpers + RequestError status),
openai-go (*openai.Error status), openaicompat.FeatureUnsupportedError,
context errors, and ollama "HTTP <code>" strings to
transient/auth-dead/request-specific/unknown.
- failover.go: failoverProvider (satisfies provider.Provider) wrapped into a
*Model via NewClient. Process-wide mutex-guarded modelHealth bench
registry keyed by concrete spec, with cooldowns and a control API
(ListBenched/BenchModel/UnbenchModel/IsBenched). NewFailoverModel +
ParseChain constructors, FailoverOption config, FailoverObserver (carries
the full request), and configurable package-level defaults.
- parse.go: comma-aware Parse splits into a failover chain; alias/resolver
targets that expand to comma chains are routed through the comma-aware
path and flattened.
All access to global health is mutex-guarded; tests reset it via
resetHealthForTest and pass under go test -race.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,115 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
anth "github.com/liushuangls/go-anthropic/v2"
|
||||
"github.com/openai/openai-go"
|
||||
|
||||
"gitea.stevedudenhoeffer.com/steve/go-llm/v2/openaicompat"
|
||||
)
|
||||
|
||||
func TestClassify(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want ErrKind
|
||||
}{
|
||||
// nil
|
||||
{"nil", nil, ErrUnknown},
|
||||
|
||||
// openai-go status codes (transient)
|
||||
{"openai 408", &openai.Error{StatusCode: 408}, ErrTransient},
|
||||
{"openai 409", &openai.Error{StatusCode: 409}, ErrTransient},
|
||||
{"openai 429", &openai.Error{StatusCode: 429}, ErrTransient},
|
||||
{"openai 500", &openai.Error{StatusCode: 500}, ErrTransient},
|
||||
{"openai 502", &openai.Error{StatusCode: 502}, ErrTransient},
|
||||
{"openai 503", &openai.Error{StatusCode: 503}, ErrTransient},
|
||||
{"openai 504", &openai.Error{StatusCode: 504}, ErrTransient},
|
||||
|
||||
// openai-go status codes (auth dead)
|
||||
{"openai 401", &openai.Error{StatusCode: 401}, ErrAuthDead},
|
||||
{"openai 403", &openai.Error{StatusCode: 403}, ErrAuthDead},
|
||||
{"openai 404", &openai.Error{StatusCode: 404}, ErrAuthDead},
|
||||
|
||||
// openai-go status codes (request specific)
|
||||
{"openai 400", &openai.Error{StatusCode: 400}, ErrRequestSpecific},
|
||||
{"openai 413", &openai.Error{StatusCode: 413}, ErrRequestSpecific},
|
||||
{"openai 422", &openai.Error{StatusCode: 422}, ErrRequestSpecific},
|
||||
|
||||
// openai unrecognized status -> unknown
|
||||
{"openai 418", &openai.Error{StatusCode: 418}, ErrUnknown},
|
||||
|
||||
// wrapped openai error (providers wrap with %w)
|
||||
{"wrapped openai 503", fmt.Errorf("openai completion error: %w", &openai.Error{StatusCode: 503}), ErrTransient},
|
||||
|
||||
// FeatureUnsupportedError -> request specific
|
||||
{"feature unsupported", &openaicompat.FeatureUnsupportedError{Feature: "tools", Model: "m"}, ErrRequestSpecific},
|
||||
{"wrapped feature unsupported", fmt.Errorf("x: %w", &openaicompat.FeatureUnsupportedError{Feature: "vision", Model: "m"}), ErrRequestSpecific},
|
||||
|
||||
// anthropic RequestError (status-code based)
|
||||
{"anth req 503", &anth.RequestError{StatusCode: 503}, ErrTransient},
|
||||
{"anth req 429", &anth.RequestError{StatusCode: 429}, ErrTransient},
|
||||
{"anth req 401", &anth.RequestError{StatusCode: 401}, ErrAuthDead},
|
||||
{"anth req 400", &anth.RequestError{StatusCode: 400}, ErrRequestSpecific},
|
||||
{"wrapped anth req 502", fmt.Errorf("anthropic completion error: %w", &anth.RequestError{StatusCode: 502}), ErrTransient},
|
||||
|
||||
// anthropic APIError (helper based)
|
||||
{"anth rate limit", &anth.APIError{Type: anth.ErrTypeRateLimit}, ErrTransient},
|
||||
{"anth overloaded", &anth.APIError{Type: anth.ErrTypeOverloaded}, ErrTransient},
|
||||
{"anth api", &anth.APIError{Type: anth.ErrTypeApi}, ErrTransient},
|
||||
{"anth auth", &anth.APIError{Type: anth.ErrTypeAuthentication}, ErrAuthDead},
|
||||
{"anth permission", &anth.APIError{Type: anth.ErrTypePermission}, ErrAuthDead},
|
||||
{"anth not found", &anth.APIError{Type: anth.ErrTypeNotFound}, ErrAuthDead},
|
||||
{"anth too large", &anth.APIError{Type: anth.ErrTypeTooLarge}, ErrRequestSpecific},
|
||||
{"anth invalid request", &anth.APIError{Type: anth.ErrTypeInvalidRequest}, ErrRequestSpecific},
|
||||
{"wrapped anth api error", fmt.Errorf("error, status code: 529, message: %w", &anth.APIError{Type: anth.ErrTypeOverloaded}), ErrTransient},
|
||||
|
||||
// context errors
|
||||
{"context canceled", context.Canceled, ErrUnknown},
|
||||
{"context deadline", context.DeadlineExceeded, ErrTransient},
|
||||
{"wrapped deadline", fmt.Errorf("call failed: %w", context.DeadlineExceeded), ErrTransient},
|
||||
|
||||
// ollama string-based
|
||||
{"ollama HTTP 503", errors.New("ollama: HTTP 503: service unavailable"), ErrTransient},
|
||||
{"ollama HTTP 500", errors.New("ollama: HTTP 500: internal"), ErrTransient},
|
||||
{"ollama HTTP 502", errors.New("ollama: HTTP 502: bad gateway"), ErrTransient},
|
||||
{"ollama HTTP 504", errors.New("ollama: HTTP 504: timeout"), ErrTransient},
|
||||
{"ollama HTTP 429", errors.New("ollama: HTTP 429: too many requests"), ErrTransient},
|
||||
{"ollama HTTP 401", errors.New("ollama: HTTP 401: unauthorized"), ErrAuthDead},
|
||||
{"ollama HTTP 404", errors.New("ollama: HTTP 404: not found"), ErrAuthDead},
|
||||
{"ollama HTTP 400", errors.New("ollama: HTTP 400: bad request"), ErrRequestSpecific},
|
||||
|
||||
// unknown
|
||||
{"random error", errors.New("something weird"), ErrUnknown},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := Classify(tt.err)
|
||||
if got != tt.want {
|
||||
t.Errorf("Classify(%v) = %v, want %v", tt.err, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsTransient(t *testing.T) {
|
||||
// IsTransient treats both ErrTransient and ErrUnknown as "should retry"
|
||||
// (conservative). Auth/request-specific are not transient.
|
||||
if !IsTransient(&openai.Error{StatusCode: 503}) {
|
||||
t.Error("503 should be transient")
|
||||
}
|
||||
if !IsTransient(errors.New("mystery")) {
|
||||
t.Error("unknown should be treated as transient (conservative)")
|
||||
}
|
||||
if IsTransient(&openai.Error{StatusCode: 401}) {
|
||||
t.Error("401 (auth dead) should NOT be transient")
|
||||
}
|
||||
if IsTransient(&openai.Error{StatusCode: 400}) {
|
||||
t.Error("400 (request specific) should NOT be transient")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user