From 44e1501e8118588279e3874f3b8ee848548af808 Mon Sep 17 00:00:00 2001 From: Benson Wong <83972+mostlygeek@users.noreply.github.com> Date: Tue, 9 Jun 2026 20:49:58 -0700 Subject: [PATCH] internal/process,server: fix unload regression (#828) In v221 the shutdown behaviour was refactored so shutdown behaviour was more reliable in stopping a process group. This exposed an existing bug where the unload API had a timeout of 0 that snuck in during the big refactor. - set a default timeout of 10 seconds for unloads called via the API - add logging around shutdown routine updates: #807, #808 fixes: #827 --- internal/process/process_command.go | 7 ++++++- internal/server/api.go | 5 ++++- internal/server/apigroup.go | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/internal/process/process_command.go b/internal/process/process_command.go index 1390b6b0..c55762da 100644 --- a/internal/process/process_command.go +++ b/internal/process/process_command.go @@ -585,7 +585,12 @@ func (p *ProcessCommand) killProcess(cmd *exec.Cmd, cancel context.CancelFunc, c // cannot block the run() goroutine; the gracefulTimeout + Process.Kill // path below still guarantees teardown. if cmd != nil { - go func() { _ = p.sendStopSignal(cmd) }() + go func() { + p.proxyLogger.Debugf("[%s] sending stop signal with timeout %v", p.id, gracefulTimeout) + if err := p.sendStopSignal(cmd); err != nil { + p.proxyLogger.Warnf("[%s] stop signal failed: %v", p.id, err) + } + }() } timer := time.NewTimer(gracefulTimeout) diff --git a/internal/server/api.go b/internal/server/api.go index b1df34f6..d3723c63 100644 --- a/internal/server/api.go +++ b/internal/server/api.go @@ -13,6 +13,9 @@ import ( "github.com/mostlygeek/llama-swap/internal/shared" ) +// apiUnloadTimeout is used by the API endpoints to stop processes +const apiUnloadTimeout = 10 * time.Second + // modelRecord is one entry in the OpenAI-compatible /v1/models listing. type modelRecord struct { ID string `json:"id"` @@ -94,7 +97,7 @@ type runningModel struct { // handleUnload stops every running local process. Peer models are remote and // unaffected. func (s *Server) handleUnload(w http.ResponseWriter, r *http.Request) { - s.local.Unload(0) + s.local.Unload(apiUnloadTimeout) w.WriteHeader(http.StatusOK) w.Write([]byte("OK")) } diff --git a/internal/server/apigroup.go b/internal/server/apigroup.go index e71b29cd..1e3131bb 100644 --- a/internal/server/apigroup.go +++ b/internal/server/apigroup.go @@ -66,7 +66,7 @@ func (s *Server) modelStatus() []apiModel { // handleAPIUnloadAll stops every running local process. func (s *Server) handleAPIUnloadAll(w http.ResponseWriter, r *http.Request) { - s.local.Unload(0) + s.local.Unload(apiUnloadTimeout) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{"msg": "ok"}) } @@ -83,7 +83,7 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) { router.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model") return } - s.local.Unload(0, realName) + s.local.Unload(apiUnloadTimeout, realName) w.WriteHeader(http.StatusOK) w.Write([]byte("OK")) }