dcd004289f
Phase 1 of the majordomo build: - llm/ canonical contract (messages, parts, tools, capabilities, streaming, Model/Provider, error classification) - health/ clock-injected tracker (threshold bench, exponential capped cooldown, reset-on-success) - root Registry + Parse (verbatim model ids, inline recursive alias expansion with cycle detection, chain dedup), LLM_* env-DSN providers (go-llm parity: lazy fallback + eager LoadEnv), health-aware chain executor behind the Model interface - provider/fake scriptable test provider; hermetic test suite incl. the trailing-thinking chain and foreman:// env loading - ADRs 0001-0008, CLAUDE.md, README (honest matrix), CI workflow, docs/phase-1-design.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
166 lines
4.0 KiB
Go
166 lines
4.0 KiB
Go
package health
|
|
|
|
import (
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// fakeClock is a manually-advanced clock for deterministic backoff tests.
|
|
type fakeClock struct {
|
|
mu sync.Mutex
|
|
now time.Time
|
|
}
|
|
|
|
func newFakeClock() *fakeClock {
|
|
return &fakeClock{now: time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)}
|
|
}
|
|
|
|
func (c *fakeClock) Now() time.Time {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
return c.now
|
|
}
|
|
|
|
func (c *fakeClock) Advance(d time.Duration) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
c.now = c.now.Add(d)
|
|
}
|
|
|
|
func newTestTracker(clock *fakeClock) *Tracker {
|
|
return NewTracker(Config{
|
|
FailureThreshold: 2,
|
|
BaseCooldown: 5 * time.Second,
|
|
MaxCooldown: 5 * time.Minute,
|
|
Multiplier: 2,
|
|
Clock: clock.Now,
|
|
})
|
|
}
|
|
|
|
func TestSingleFailureStaysAvailable(t *testing.T) {
|
|
clock := newFakeClock()
|
|
tr := newTestTracker(clock)
|
|
if backedOff := tr.ReportFailure("k"); backedOff {
|
|
t.Error("first failure must not back off")
|
|
}
|
|
if !tr.Available("k") {
|
|
t.Error("key should remain available after one failure")
|
|
}
|
|
}
|
|
|
|
func TestThresholdTriggersBackoff(t *testing.T) {
|
|
clock := newFakeClock()
|
|
tr := newTestTracker(clock)
|
|
tr.ReportFailure("k")
|
|
if backedOff := tr.ReportFailure("k"); !backedOff {
|
|
t.Error("second consecutive failure should back off")
|
|
}
|
|
if tr.Available("k") {
|
|
t.Error("key should be unavailable during backoff")
|
|
}
|
|
if until := tr.BackedOffUntil("k"); !until.Equal(clock.Now().Add(5 * time.Second)) {
|
|
t.Errorf("BackedOffUntil = %v, want now+5s", until)
|
|
}
|
|
}
|
|
|
|
func TestCooldownExpiryReadmits(t *testing.T) {
|
|
clock := newFakeClock()
|
|
tr := newTestTracker(clock)
|
|
tr.ReportFailure("k")
|
|
tr.ReportFailure("k")
|
|
clock.Advance(5*time.Second - time.Millisecond)
|
|
if tr.Available("k") {
|
|
t.Error("still inside cooldown")
|
|
}
|
|
clock.Advance(time.Millisecond)
|
|
if !tr.Available("k") {
|
|
t.Error("cooldown expiry should re-admit the key")
|
|
}
|
|
}
|
|
|
|
func TestExponentialCooldownWithCap(t *testing.T) {
|
|
clock := newFakeClock()
|
|
tr := newTestTracker(clock)
|
|
|
|
// Consecutive backoffs: 5s, 10s, 20s, ... capped at 5m.
|
|
wantCooldowns := []time.Duration{
|
|
5 * time.Second, 10 * time.Second, 20 * time.Second, 40 * time.Second,
|
|
80 * time.Second, 160 * time.Second, 5 * time.Minute, 5 * time.Minute,
|
|
}
|
|
for i, want := range wantCooldowns {
|
|
tr.ReportFailure("k")
|
|
tr.ReportFailure("k")
|
|
until := tr.BackedOffUntil("k")
|
|
if got := until.Sub(clock.Now()); got != want {
|
|
t.Fatalf("backoff #%d cooldown = %v, want %v", i+1, got, want)
|
|
}
|
|
clock.Advance(want)
|
|
}
|
|
}
|
|
|
|
func TestSuccessResetsEverything(t *testing.T) {
|
|
clock := newFakeClock()
|
|
tr := newTestTracker(clock)
|
|
|
|
// Build up to a long cooldown...
|
|
for range 3 {
|
|
tr.ReportFailure("k")
|
|
tr.ReportFailure("k")
|
|
clock.Advance(tr.BackedOffUntil("k").Sub(clock.Now()))
|
|
}
|
|
// ...then a success resets both the count and the exponent.
|
|
tr.ReportSuccess("k")
|
|
tr.ReportFailure("k")
|
|
if !tr.Available("k") {
|
|
t.Error("one failure after success must not back off")
|
|
}
|
|
tr.ReportFailure("k")
|
|
if got := tr.BackedOffUntil("k").Sub(clock.Now()); got != 5*time.Second {
|
|
t.Errorf("post-reset cooldown = %v, want base 5s", got)
|
|
}
|
|
}
|
|
|
|
func TestKeysAreIndependent(t *testing.T) {
|
|
clock := newFakeClock()
|
|
tr := newTestTracker(clock)
|
|
tr.ReportFailure("a")
|
|
tr.ReportFailure("a")
|
|
if tr.Available("a") {
|
|
t.Error("a should be backed off")
|
|
}
|
|
if !tr.Available("b") {
|
|
t.Error("b must be unaffected")
|
|
}
|
|
}
|
|
|
|
func TestDefaultsApplied(t *testing.T) {
|
|
tr := NewTracker(Config{})
|
|
if tr.cfg.FailureThreshold != DefaultFailureThreshold ||
|
|
tr.cfg.BaseCooldown != DefaultBaseCooldown ||
|
|
tr.cfg.MaxCooldown != DefaultMaxCooldown ||
|
|
tr.cfg.Multiplier != DefaultMultiplier ||
|
|
tr.cfg.Clock == nil {
|
|
t.Errorf("defaults not applied: %+v", tr.cfg)
|
|
}
|
|
}
|
|
|
|
func TestTrackerConcurrency(t *testing.T) {
|
|
clock := newFakeClock()
|
|
tr := newTestTracker(clock)
|
|
var wg sync.WaitGroup
|
|
for i := range 8 {
|
|
wg.Add(1)
|
|
go func(n int) {
|
|
defer wg.Done()
|
|
key := []string{"a", "b"}[n%2]
|
|
for range 200 {
|
|
tr.ReportFailure(key)
|
|
tr.Available(key)
|
|
tr.ReportSuccess(key)
|
|
}
|
|
}(i)
|
|
}
|
|
wg.Wait()
|
|
}
|