proxy: add swap matrix with solver-based model swapping (#646)

Add a new swap matrix to supersede groups for running concurrent models.
The matrix uses a solver that picks the lowest cost evictions to make a
requested model available. This simple approach along with a very basic
DSL grammar can enable very complex swapping scenarios.

- add DSL parser for set expressions with & (AND), | (OR), (), +ref
- add MatrixConfig structs, validation, and topological sort for +ref
- add MatrixSolver with cost-minimizing swap decisions
- add Matrix runtime integrating solver with Process lifecycle
- integrate matrix into ProxyManager with if-branches at all endpoints
- update config.example.yaml and config-schema.json with matrix schema
- config enforces groups XOR matrix (cannot use both)

fixes #643
This commit is contained in:
Benson Wong
2026-04-14 21:55:30 -07:00
committed by GitHub
parent 40e39f7a86
commit 35193f82f1
13 changed files with 2080 additions and 186 deletions
+34 -28
View File
@@ -55,27 +55,28 @@ func (pm *ProxyManager) getModelStatus() []Model {
// Iterate over sorted keys
for _, modelID := range modelIDs {
// Get process state
processGroup := pm.findGroupByModelName(modelID)
state := "unknown"
if processGroup != nil {
process := processGroup.processes[modelID]
if process != nil {
var stateStr string
switch process.CurrentState() {
case StateReady:
stateStr = "ready"
case StateStarting:
stateStr = "starting"
case StateStopping:
stateStr = "stopping"
case StateShutdown:
stateStr = "shutdown"
case StateStopped:
stateStr = "stopped"
default:
stateStr = "unknown"
}
state = stateStr
var process *Process
if pm.matrix != nil {
process, _ = pm.matrix.GetProcess(modelID)
} else {
processGroup := pm.findGroupByModelName(modelID)
if processGroup != nil {
process = processGroup.processes[modelID]
}
}
if process != nil {
switch process.CurrentState() {
case StateReady:
state = "ready"
case StateStarting:
state = "starting"
case StateStopping:
state = "stopping"
case StateShutdown:
state = "shutdown"
case StateStopped:
state = "stopped"
}
}
models = append(models, Model{
@@ -254,18 +255,23 @@ func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) {
return
}
processGroup := pm.findGroupByModelName(realModelName)
if processGroup == nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("process group not found for model %s", requestedModel))
return
var stopErr error
if pm.matrix != nil {
stopErr = pm.matrix.StopProcess(realModelName, StopImmediately)
} else {
processGroup := pm.findGroupByModelName(realModelName)
if processGroup == nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("process group not found for model %s", requestedModel))
return
}
stopErr = processGroup.StopProcess(realModelName, StopImmediately)
}
if err := processGroup.StopProcess(realModelName, StopImmediately); err != nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error stopping process: %s", err.Error()))
if stopErr != nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error stopping process: %s", stopErr.Error()))
return
} else {
c.String(http.StatusOK, "OK")
}
c.String(http.StatusOK, "OK")
}
func (pm *ProxyManager) apiGetVersion(c *gin.Context) {