feat: foundations — canonical types, Parse grammar, env DSNs, health, chains
Phase 1 of the majordomo build: - llm/ canonical contract (messages, parts, tools, capabilities, streaming, Model/Provider, error classification) - health/ clock-injected tracker (threshold bench, exponential capped cooldown, reset-on-success) - root Registry + Parse (verbatim model ids, inline recursive alias expansion with cycle detection, chain dedup), LLM_* env-DSN providers (go-llm parity: lazy fallback + eager LoadEnv), health-aware chain executor behind the Model interface - provider/fake scriptable test provider; hermetic test suite incl. the trailing-thinking chain and foreman:// env loading - ADRs 0001-0008, CLAUDE.md, README (honest matrix), CI workflow, docs/phase-1-design.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+119
@@ -0,0 +1,119 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// ErrorClass buckets errors for retry/failover decisions.
|
||||
type ErrorClass int
|
||||
|
||||
const (
|
||||
// ClassTransient errors may succeed on retry or on another target:
|
||||
// rate limits, server errors, timeouts, connection failures.
|
||||
ClassTransient ErrorClass = iota
|
||||
// ClassPermanent errors will not improve on retry of the same request:
|
||||
// malformed requests, auth failures, model-not-found.
|
||||
ClassPermanent
|
||||
)
|
||||
|
||||
// ErrModelNotFound marks a permanent "this target does not know this model"
|
||||
// condition. Chains advance past it without penalizing the target's health.
|
||||
var ErrModelNotFound = errors.New("model not found")
|
||||
|
||||
// APIError is a structured provider error carrying enough context to
|
||||
// classify it and to debug it.
|
||||
type APIError struct {
|
||||
// Provider and Model identify the target that failed.
|
||||
Provider string
|
||||
Model string
|
||||
|
||||
// Status is the HTTP status code, or 0 when the failure was not an HTTP
|
||||
// response (connection error, decode error, ...).
|
||||
Status int
|
||||
|
||||
// Code is the provider-specific error code, when one was supplied.
|
||||
Code string
|
||||
|
||||
// Message is the provider's human-readable error message.
|
||||
Message string
|
||||
|
||||
// Err is the wrapped underlying cause, if any.
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *APIError) Error() string {
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "%s/%s", e.Provider, e.Model)
|
||||
if e.Status != 0 {
|
||||
fmt.Fprintf(&b, ": HTTP %d", e.Status)
|
||||
}
|
||||
if e.Code != "" {
|
||||
fmt.Fprintf(&b, " [%s]", e.Code)
|
||||
}
|
||||
if e.Message != "" {
|
||||
fmt.Fprintf(&b, ": %s", e.Message)
|
||||
}
|
||||
if e.Err != nil {
|
||||
fmt.Fprintf(&b, ": %v", e.Err)
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (e *APIError) Unwrap() error {
|
||||
if e.Err != nil {
|
||||
return e.Err
|
||||
}
|
||||
if e.Status == http.StatusNotFound {
|
||||
return ErrModelNotFound
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Classify buckets an error as transient or permanent.
|
||||
//
|
||||
// The default policy (overridable via health configuration):
|
||||
// - context.Canceled is permanent — the caller gave up; retrying defies
|
||||
// their intent. context.DeadlineExceeded is transient.
|
||||
// - Network timeouts, refused/reset connections, and DNS failures are
|
||||
// transient ("high demand" conditions).
|
||||
// - HTTP 400/401/403/404/405/422 (and ErrModelNotFound) are permanent;
|
||||
// 408/429 and all 5xx are transient.
|
||||
// - Anything unrecognized is transient: when in doubt, failing over to the
|
||||
// next target in a chain can only help availability.
|
||||
func Classify(err error) ErrorClass {
|
||||
if err == nil {
|
||||
return ClassTransient
|
||||
}
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return ClassPermanent
|
||||
}
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
return ClassTransient
|
||||
}
|
||||
if errors.Is(err, ErrModelNotFound) {
|
||||
return ClassPermanent
|
||||
}
|
||||
if errors.Is(err, syscall.ECONNREFUSED) || errors.Is(err, syscall.ECONNRESET) {
|
||||
return ClassTransient
|
||||
}
|
||||
if _, ok := errors.AsType[net.Error](err); ok {
|
||||
return ClassTransient
|
||||
}
|
||||
if apiErr, ok := errors.AsType[*APIError](err); ok && apiErr.Status != 0 {
|
||||
switch {
|
||||
case apiErr.Status == http.StatusRequestTimeout, // 408
|
||||
apiErr.Status == http.StatusTooManyRequests, // 429
|
||||
apiErr.Status >= 500:
|
||||
return ClassTransient
|
||||
case apiErr.Status >= 400:
|
||||
return ClassPermanent
|
||||
}
|
||||
}
|
||||
return ClassTransient
|
||||
}
|
||||
Reference in New Issue
Block a user