fix(failover): preserve manual bench against automatic cooldown downgrade
CI / Build, Test & Lint (push) Successful in 10m48s
CI / Build, Test & Lint (push) Successful in 10m48s
recordTransientFailure and benchNow unconditionally set manual=false and reset until to now+cooldown. When the best-effort all-benched failover path re-tries a model an operator manually benched via BenchModel, a subsequent failure downgraded manual=true -> false and could shorten the operator's window to the short auto cooldown. Both functions now read existing state: if it is an active manual bench (manual && now.Before(until)) they bump consecutiveFails but keep manual=true and the later until. Non-manual or expired-manual state still gets the automatic cooldown. Adds TestFailover_ManualBenchSurvivesAutomaticDowngrade covering no-prior, prior-auto, active-manual, and expired-manual cases. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -127,7 +127,14 @@ func (h *modelHealth) recordTransientFailure(key string, cooldown time.Duration,
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
st := h.disabled[key]
|
||||
// Preserve an active manual bench: automatic logic must not clear the
|
||||
// operator's manual flag or shorten their window. Still count the failure.
|
||||
manualActive := st.manual && now.Before(st.until)
|
||||
st.consecutiveFails++
|
||||
if manualActive {
|
||||
h.disabled[key] = st
|
||||
return true, st.until
|
||||
}
|
||||
if st.consecutiveFails >= benchThreshold {
|
||||
st.until = now.Add(cooldown)
|
||||
st.manual = false
|
||||
@@ -143,7 +150,14 @@ func (h *modelHealth) benchNow(key string, cooldown time.Duration, now time.Time
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
st := h.disabled[key]
|
||||
// Preserve an active manual bench: automatic logic must not clear the
|
||||
// operator's manual flag or shorten their window. Still count the failure.
|
||||
manualActive := st.manual && now.Before(st.until)
|
||||
st.consecutiveFails++
|
||||
if manualActive {
|
||||
h.disabled[key] = st
|
||||
return st.until
|
||||
}
|
||||
st.until = now.Add(cooldown)
|
||||
st.manual = false
|
||||
h.disabled[key] = st
|
||||
|
||||
Reference in New Issue
Block a user