internal/process,server: fix unload regression (#828)

In v221 the shutdown behaviour was refactored so shutdown behaviour was
more reliable in stopping a process group. This exposed an existing bug
where the unload API had a timeout of 0 that snuck in during the big
refactor.

- set a default timeout of 10 seconds for unloads called via the API
- add logging around shutdown routine

updates: #807, #808
fixes: #827
This commit is contained in:
Benson Wong
2026-06-09 20:49:58 -07:00
committed by GitHub
parent 46cea36bc2
commit 44e1501e81
3 changed files with 12 additions and 4 deletions
+4 -1
View File
@@ -13,6 +13,9 @@ import (
"github.com/mostlygeek/llama-swap/internal/shared"
)
// apiUnloadTimeout is used by the API endpoints to stop processes
const apiUnloadTimeout = 10 * time.Second
// modelRecord is one entry in the OpenAI-compatible /v1/models listing.
type modelRecord struct {
ID string `json:"id"`
@@ -94,7 +97,7 @@ type runningModel struct {
// handleUnload stops every running local process. Peer models are remote and
// unaffected.
func (s *Server) handleUnload(w http.ResponseWriter, r *http.Request) {
s.local.Unload(0)
s.local.Unload(apiUnloadTimeout)
w.WriteHeader(http.StatusOK)
w.Write([]byte("OK"))
}
+2 -2
View File
@@ -66,7 +66,7 @@ func (s *Server) modelStatus() []apiModel {
// handleAPIUnloadAll stops every running local process.
func (s *Server) handleAPIUnloadAll(w http.ResponseWriter, r *http.Request) {
s.local.Unload(0)
s.local.Unload(apiUnloadTimeout)
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{"msg": "ok"})
}
@@ -83,7 +83,7 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) {
router.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
return
}
s.local.Unload(0, realName)
s.local.Unload(apiUnloadTimeout, realName)
w.WriteHeader(http.StatusOK)
w.Write([]byte("OK"))
}