majordomo/health/health_test.go

package health

import (
	"sync"
	"testing"
	"time"
)

// fakeClock is a manually-advanced clock for deterministic backoff tests.
type fakeClock struct {
	mu  sync.Mutex
	now time.Time
}

func newFakeClock() *fakeClock {
	return &fakeClock{now: time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)}
}

func (c *fakeClock) Now() time.Time {
	c.mu.Lock()
	defer c.mu.Unlock()
	return c.now
}

func (c *fakeClock) Advance(d time.Duration) {
	c.mu.Lock()
	defer c.mu.Unlock()
	c.now = c.now.Add(d)
}

func newTestTracker(clock *fakeClock) *Tracker {
	return NewTracker(Config{
		FailureThreshold: 2,
		BaseCooldown:     5 * time.Second,
		MaxCooldown:      5 * time.Minute,
		Multiplier:       2,
		Clock:            clock.Now,
	})
}

func TestSingleFailureStaysAvailable(t *testing.T) {
	clock := newFakeClock()
	tr := newTestTracker(clock)
	if backedOff := tr.ReportFailure("k"); backedOff {
		t.Error("first failure must not back off")
	}
	if !tr.Available("k") {
		t.Error("key should remain available after one failure")
	}
}

func TestThresholdTriggersBackoff(t *testing.T) {
	clock := newFakeClock()
	tr := newTestTracker(clock)
	tr.ReportFailure("k")
	if backedOff := tr.ReportFailure("k"); !backedOff {
		t.Error("second consecutive failure should back off")
	}
	if tr.Available("k") {
		t.Error("key should be unavailable during backoff")
	}
	if until := tr.BackedOffUntil("k"); !until.Equal(clock.Now().Add(5 * time.Second)) {
		t.Errorf("BackedOffUntil = %v, want now+5s", until)
	}
}

func TestCooldownExpiryReadmits(t *testing.T) {
	clock := newFakeClock()
	tr := newTestTracker(clock)
	tr.ReportFailure("k")
	tr.ReportFailure("k")
	clock.Advance(5*time.Second - time.Millisecond)
	if tr.Available("k") {
		t.Error("still inside cooldown")
	}
	clock.Advance(time.Millisecond)
	if !tr.Available("k") {
		t.Error("cooldown expiry should re-admit the key")
	}
}

func TestExponentialCooldownWithCap(t *testing.T) {
	clock := newFakeClock()
	tr := newTestTracker(clock)

	// Consecutive backoffs: 5s, 10s, 20s, ... capped at 5m.
	wantCooldowns := []time.Duration{
		5 * time.Second, 10 * time.Second, 20 * time.Second, 40 * time.Second,
		80 * time.Second, 160 * time.Second, 5 * time.Minute, 5 * time.Minute,
	}
	for i, want := range wantCooldowns {
		tr.ReportFailure("k")
		tr.ReportFailure("k")
		until := tr.BackedOffUntil("k")
		if got := until.Sub(clock.Now()); got != want {
			t.Fatalf("backoff #%d cooldown = %v, want %v", i+1, got, want)
		}
		clock.Advance(want)
	}
}

func TestSuccessResetsEverything(t *testing.T) {
	clock := newFakeClock()
	tr := newTestTracker(clock)

	// Build up to a long cooldown...
	for range 3 {
		tr.ReportFailure("k")
		tr.ReportFailure("k")
		clock.Advance(tr.BackedOffUntil("k").Sub(clock.Now()))
	}
	// ...then a success resets both the count and the exponent.
	tr.ReportSuccess("k")
	tr.ReportFailure("k")
	if !tr.Available("k") {
		t.Error("one failure after success must not back off")
	}
	tr.ReportFailure("k")
	if got := tr.BackedOffUntil("k").Sub(clock.Now()); got != 5*time.Second {
		t.Errorf("post-reset cooldown = %v, want base 5s", got)
	}
}

func TestKeysAreIndependent(t *testing.T) {
	clock := newFakeClock()
	tr := newTestTracker(clock)
	tr.ReportFailure("a")
	tr.ReportFailure("a")
	if tr.Available("a") {
		t.Error("a should be backed off")
	}
	if !tr.Available("b") {
		t.Error("b must be unaffected")
	}
}

func TestDefaultsApplied(t *testing.T) {
	tr := NewTracker(Config{})
	if tr.cfg.FailureThreshold != DefaultFailureThreshold ||
		tr.cfg.BaseCooldown != DefaultBaseCooldown ||
		tr.cfg.MaxCooldown != DefaultMaxCooldown ||
		tr.cfg.Multiplier != DefaultMultiplier ||
		tr.cfg.Clock == nil {
		t.Errorf("defaults not applied: %+v", tr.cfg)
	}
}

func TestTrackerConcurrency(t *testing.T) {
	clock := newFakeClock()
	tr := newTestTracker(clock)
	var wg sync.WaitGroup
	for i := range 8 {
		wg.Add(1)
		go func(n int) {
			defer wg.Done()
			key := []string{"a", "b"}[n%2]
			for range 200 {
				tr.ReportFailure(key)
				tr.Available(key)
				tr.ReportSuccess(key)
			}
		}(i)
	}
	wg.Wait()
}