From 44e1501e8118588279e3874f3b8ee848548af808 Mon Sep 17 00:00:00 2001
From: Benson Wong <83972+mostlygeek@users.noreply.github.com>
Date: Tue, 9 Jun 2026 20:49:58 -0700
Subject: [PATCH] internal/process,server: fix unload regression (#828)

In v221 the shutdown behaviour was refactored so shutdown behaviour was
more reliable in stopping a process group. This exposed an existing bug
where the unload API had a timeout of 0 that snuck in during the big
refactor.

- set a default timeout of 10 seconds for unloads called via the API
- add logging around shutdown routine

updates: #807, #808
fixes: #827
---
 internal/process/process_command.go | 7 ++++++-
 internal/server/api.go              | 5 ++++-
 internal/server/apigroup.go         | 4 ++--
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/internal/process/process_command.go b/internal/process/process_command.go
index 1390b6b0..c55762da 100644
--- a/internal/process/process_command.go
+++ b/internal/process/process_command.go
@@ -585,7 +585,12 @@ func (p *ProcessCommand) killProcess(cmd *exec.Cmd, cancel context.CancelFunc, c
 	// cannot block the run() goroutine; the gracefulTimeout + Process.Kill
 	// path below still guarantees teardown.
 	if cmd != nil {
-		go func() { _ = p.sendStopSignal(cmd) }()
+		go func() {
+			p.proxyLogger.Debugf("[%s] sending stop signal with timeout %v", p.id, gracefulTimeout)
+			if err := p.sendStopSignal(cmd); err != nil {
+				p.proxyLogger.Warnf("[%s] stop signal failed: %v", p.id, err)
+			}
+		}()
 	}
 
 	timer := time.NewTimer(gracefulTimeout)
diff --git a/internal/server/api.go b/internal/server/api.go
index b1df34f6..d3723c63 100644
--- a/internal/server/api.go
+++ b/internal/server/api.go
@@ -13,6 +13,9 @@ import (
 	"github.com/mostlygeek/llama-swap/internal/shared"
 )
 
+// apiUnloadTimeout is used by the API endpoints to stop processes
+const apiUnloadTimeout = 10 * time.Second
+
 // modelRecord is one entry in the OpenAI-compatible /v1/models listing.
 type modelRecord struct {
 	ID          string         `json:"id"`
@@ -94,7 +97,7 @@ type runningModel struct {
 // handleUnload stops every running local process. Peer models are remote and
 // unaffected.
 func (s *Server) handleUnload(w http.ResponseWriter, r *http.Request) {
-	s.local.Unload(0)
+	s.local.Unload(apiUnloadTimeout)
 	w.WriteHeader(http.StatusOK)
 	w.Write([]byte("OK"))
 }
diff --git a/internal/server/apigroup.go b/internal/server/apigroup.go
index e71b29cd..1e3131bb 100644
--- a/internal/server/apigroup.go
+++ b/internal/server/apigroup.go
@@ -66,7 +66,7 @@ func (s *Server) modelStatus() []apiModel {
 
 // handleAPIUnloadAll stops every running local process.
 func (s *Server) handleAPIUnloadAll(w http.ResponseWriter, r *http.Request) {
-	s.local.Unload(0)
+	s.local.Unload(apiUnloadTimeout)
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(map[string]string{"msg": "ok"})
 }
@@ -83,7 +83,7 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) {
 		router.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
 		return
 	}
-	s.local.Unload(0, realName)
+	s.local.Unload(apiUnloadTimeout, realName)
 	w.WriteHeader(http.StatusOK)
 	w.Write([]byte("OK"))
 }