c3f0d43e6e
I pointed Opus 4.7 (high effort) at proxy.ProcessGroup to identify any race conditions in the swapping code. It found a race condition where there is a small window in the fast path for routing a request to a loaded model. There is a very small window where: - model M1 is loaded and ready for requests - a request, R1, for M1 comes in - a request, R2, for M2 comes in almost immediately after - R1 acquires the lock, sees M1 is loaded (fast path), releases the lock `[race window]` and the request is ready to be forwarded - the race window occurs between the release of the lock and the request being forwarded - the lock is released so requests can be handled concurrently - R2 comes in within the `[race window]`, acquires the lock, triggers a model swap to M2. stopping M1 - R1 is forwarded to a model that is unloaded or in the process of shutting down creating an error response In deployed systems the race window is very small and doesn't happen often. However with #635 and PR #656 I though this deserved a bit more attention. It is not concluded that this race is the cause of #635 but the race is likely to happen more often under sustained or high load. AI Note: Opus 4.7 x-high effort took about an hour to write the original patch. With the pattern discovered the fix to matrix.go was very quick. GLM 5.1 using the previous established patterns was able to easily write the fix for ProcessGroup.StopProcesses(). Supersedes: #656 Updates: #277, #635
194 lines
4.8 KiB
Go
194 lines
4.8 KiB
Go
package proxy
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"slices"
|
|
"sync"
|
|
|
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
|
)
|
|
|
|
type ProcessGroup struct {
|
|
sync.Mutex
|
|
|
|
config config.Config
|
|
id string
|
|
swap bool
|
|
exclusive bool
|
|
persistent bool
|
|
|
|
proxyLogger *LogMonitor
|
|
upstreamLogger *LogMonitor
|
|
|
|
// map of current processes
|
|
processes map[string]*Process
|
|
lastUsedProcess string
|
|
|
|
// inflight tracks fast-path requests (requests for the already-selected
|
|
// model in a swap group). Fast-path requests Add(1) while holding pg.Lock
|
|
// and Done() on completion; a concurrent swap request calls inflight.Wait()
|
|
// under pg.Lock before stopping the current process. Without this tracking,
|
|
// a fast-path request that has released pg.Lock but has not yet called
|
|
// Process.inFlightRequests.Add(1) races with Stop()'s Wait() and can be
|
|
// killed mid-request.
|
|
inflight sync.WaitGroup
|
|
|
|
// testDelayFastPath is a test-only hook that, when non-nil, is invoked in
|
|
// the fast path after pg.Lock is released but before the request is
|
|
// dispatched to Process.ProxyRequest. Tests use it to park a fast-path
|
|
// request at the exact race window to deterministically reproduce the
|
|
// fast-path vs swap race.
|
|
testDelayFastPath func()
|
|
}
|
|
|
|
func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup {
|
|
groupConfig, ok := config.Groups[id]
|
|
if !ok {
|
|
panic("Unable to find configuration for group id: " + id)
|
|
}
|
|
|
|
pg := &ProcessGroup{
|
|
id: id,
|
|
config: config,
|
|
swap: groupConfig.Swap,
|
|
exclusive: groupConfig.Exclusive,
|
|
persistent: groupConfig.Persistent,
|
|
proxyLogger: proxyLogger,
|
|
upstreamLogger: upstreamLogger,
|
|
processes: make(map[string]*Process),
|
|
}
|
|
|
|
// Create a Process for each member in the group
|
|
for _, modelID := range groupConfig.Members {
|
|
modelConfig, modelID, _ := pg.config.FindConfig(modelID)
|
|
processLogger := NewLogMonitorWriter(upstreamLogger)
|
|
process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger)
|
|
pg.processes[modelID] = process
|
|
}
|
|
|
|
return pg
|
|
}
|
|
|
|
// ProxyRequest proxies a request to the specified model
|
|
func (pg *ProcessGroup) ProxyRequest(modelID string, writer http.ResponseWriter, request *http.Request) error {
|
|
if !pg.HasMember(modelID) {
|
|
return fmt.Errorf("model %s not part of group %s", modelID, pg.id)
|
|
}
|
|
|
|
if pg.swap {
|
|
pg.Lock()
|
|
if pg.lastUsedProcess != modelID {
|
|
|
|
// Wait for in-flight fast-path requests to drain before stopping
|
|
// the previous process. Without this, a fast-path request that has
|
|
// released pg.Lock but has not yet incremented
|
|
// Process.inFlightRequests races with Stop() and can be killed
|
|
// mid-request.
|
|
pg.inflight.Wait()
|
|
|
|
// is there something already running?
|
|
if pg.lastUsedProcess != "" {
|
|
pg.processes[pg.lastUsedProcess].Stop()
|
|
}
|
|
|
|
// wait for the request to the new model to be fully handled
|
|
// and prevent race conditions see issue #277
|
|
pg.processes[modelID].ProxyRequest(writer, request)
|
|
pg.lastUsedProcess = modelID
|
|
|
|
// short circuit and exit
|
|
pg.Unlock()
|
|
return nil
|
|
}
|
|
|
|
// Fast path: register this request in inflight before releasing
|
|
// pg.Lock so a concurrent swap will wait for it to complete.
|
|
pg.inflight.Add(1)
|
|
defer pg.inflight.Done()
|
|
pg.Unlock()
|
|
|
|
if pg.testDelayFastPath != nil {
|
|
pg.testDelayFastPath()
|
|
}
|
|
}
|
|
|
|
pg.processes[modelID].ProxyRequest(writer, request)
|
|
return nil
|
|
}
|
|
|
|
func (pg *ProcessGroup) HasMember(modelName string) bool {
|
|
return slices.Contains(pg.config.Groups[pg.id].Members, modelName)
|
|
}
|
|
|
|
func (pg *ProcessGroup) GetMember(modelName string) (*Process, bool) {
|
|
if pg.HasMember(modelName) {
|
|
return pg.processes[modelName], true
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
func (pg *ProcessGroup) StopProcess(modelID string, strategy StopStrategy) error {
|
|
pg.Lock()
|
|
|
|
process, exists := pg.processes[modelID]
|
|
if !exists {
|
|
pg.Unlock()
|
|
return fmt.Errorf("process not found for %s", modelID)
|
|
}
|
|
|
|
if pg.lastUsedProcess == modelID {
|
|
pg.lastUsedProcess = ""
|
|
}
|
|
pg.Unlock()
|
|
|
|
switch strategy {
|
|
case StopImmediately:
|
|
process.StopImmediately()
|
|
default:
|
|
process.Stop()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (pg *ProcessGroup) StopProcesses(strategy StopStrategy) {
|
|
pg.Lock()
|
|
defer pg.Unlock()
|
|
|
|
if strategy != StopImmediately {
|
|
pg.inflight.Wait()
|
|
}
|
|
|
|
if len(pg.processes) == 0 {
|
|
return
|
|
}
|
|
|
|
// stop Processes in parallel
|
|
var wg sync.WaitGroup
|
|
for _, process := range pg.processes {
|
|
wg.Add(1)
|
|
go func(process *Process) {
|
|
defer wg.Done()
|
|
switch strategy {
|
|
case StopImmediately:
|
|
process.StopImmediately()
|
|
default:
|
|
process.Stop()
|
|
}
|
|
}(process)
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
func (pg *ProcessGroup) Shutdown() {
|
|
var wg sync.WaitGroup
|
|
for _, process := range pg.processes {
|
|
wg.Add(1)
|
|
go func(process *Process) {
|
|
defer wg.Done()
|
|
process.Shutdown()
|
|
}(process)
|
|
}
|
|
wg.Wait()
|
|
}
|