From 231e62291c9a0c989c7a565adf684878736484a4 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 20 Apr 2026 00:21:11 -0700 Subject: [PATCH] proxy: fix matrix race and process stop bug (#677) - matrix.go change logic to consider any proxy.Process not in StateStopped or StateShutdown - process.StopImmediately, and Stop() which called it had a subtle bug where it only handled state transitions from StateReady to StateStopping. StateStarting -> StateStopping was ignored completely. fix: #670 --- proxy/matrix.go | 4 ++-- proxy/process.go | 15 +++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/proxy/matrix.go b/proxy/matrix.go index 70fc7738..16ed202a 100644 --- a/proxy/matrix.go +++ b/proxy/matrix.go @@ -297,7 +297,7 @@ func (m *Matrix) Shutdown() { wg.Wait() } -// RunningModels returns model names currently in StateReady. +// RunningModels returns model names currently in an active (non-stopped) state. func (m *Matrix) RunningModels() []string { m.Lock() defer m.Unlock() @@ -308,7 +308,7 @@ func (m *Matrix) RunningModels() []string { func (m *Matrix) runningModels() []string { var running []string for id, process := range m.processes { - if process.CurrentState() == StateReady { + if process.CurrentState() != StateStopped && process.CurrentState() != StateShutdown { running = append(running, id) } } diff --git a/proxy/process.go b/proxy/process.go index 1025d18e..06bfb577 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -432,7 +432,10 @@ func (p *Process) start() error { // Stop will wait for inflight requests to complete before stopping the process. func (p *Process) Stop() { + + // guard to prevent multiple goroutines from stopping if !isValidTransition(p.CurrentState(), StateStopping) { + p.proxyLogger.Debugf("<%s> Stop() suppressing invalid transition from %s to StateStopping", p.ID, p.CurrentState()) return } @@ -445,13 +448,17 @@ func (p *Process) Stop() { // StopImmediately will transition the process to the stopping state and stop the process with a SIGTERM. // If the process does not stop within the specified timeout, it will be forcefully stopped with a SIGKILL. func (p *Process) StopImmediately() { - if !isValidTransition(p.CurrentState(), StateStopping) { + + // guard to prevent multiple goroutines from stopping the process + enterState := p.CurrentState() + if !isValidTransition(enterState, StateStopping) { + p.proxyLogger.Debugf("<%s> StopImmediate() suppressing invalid transition from %s to StateStopping", p.ID, p.CurrentState()) return } - p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState()) - if curState, err := p.swapState(StateReady, StateStopping); err != nil { - p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState) + p.proxyLogger.Debugf("<%s> Stopping process, enter state: %s", p.ID, enterState) + if curState, err := p.swapState(enterState, StateStopping); err != nil { + p.proxyLogger.Infof("<%s> Stop() %s -> StateStopping err: %v, current state: %v", p.ID, enterState, err, curState) return }