internal/process,server: fix unload regression (#828)
In v221 the shutdown behaviour was refactored so shutdown behaviour was more reliable in stopping a process group. This exposed an existing bug where the unload API had a timeout of 0 that snuck in during the big refactor. - set a default timeout of 10 seconds for unloads called via the API - add logging around shutdown routine updates: #807, #808 fixes: #827
This commit is contained in:
@@ -585,7 +585,12 @@ func (p *ProcessCommand) killProcess(cmd *exec.Cmd, cancel context.CancelFunc, c
|
|||||||
// cannot block the run() goroutine; the gracefulTimeout + Process.Kill
|
// cannot block the run() goroutine; the gracefulTimeout + Process.Kill
|
||||||
// path below still guarantees teardown.
|
// path below still guarantees teardown.
|
||||||
if cmd != nil {
|
if cmd != nil {
|
||||||
go func() { _ = p.sendStopSignal(cmd) }()
|
go func() {
|
||||||
|
p.proxyLogger.Debugf("[%s] sending stop signal with timeout %v", p.id, gracefulTimeout)
|
||||||
|
if err := p.sendStopSignal(cmd); err != nil {
|
||||||
|
p.proxyLogger.Warnf("[%s] stop signal failed: %v", p.id, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
timer := time.NewTimer(gracefulTimeout)
|
timer := time.NewTimer(gracefulTimeout)
|
||||||
|
|||||||
@@ -13,6 +13,9 @@ import (
|
|||||||
"github.com/mostlygeek/llama-swap/internal/shared"
|
"github.com/mostlygeek/llama-swap/internal/shared"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// apiUnloadTimeout is used by the API endpoints to stop processes
|
||||||
|
const apiUnloadTimeout = 10 * time.Second
|
||||||
|
|
||||||
// modelRecord is one entry in the OpenAI-compatible /v1/models listing.
|
// modelRecord is one entry in the OpenAI-compatible /v1/models listing.
|
||||||
type modelRecord struct {
|
type modelRecord struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
@@ -94,7 +97,7 @@ type runningModel struct {
|
|||||||
// handleUnload stops every running local process. Peer models are remote and
|
// handleUnload stops every running local process. Peer models are remote and
|
||||||
// unaffected.
|
// unaffected.
|
||||||
func (s *Server) handleUnload(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) handleUnload(w http.ResponseWriter, r *http.Request) {
|
||||||
s.local.Unload(0)
|
s.local.Unload(apiUnloadTimeout)
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
w.Write([]byte("OK"))
|
w.Write([]byte("OK"))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ func (s *Server) modelStatus() []apiModel {
|
|||||||
|
|
||||||
// handleAPIUnloadAll stops every running local process.
|
// handleAPIUnloadAll stops every running local process.
|
||||||
func (s *Server) handleAPIUnloadAll(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) handleAPIUnloadAll(w http.ResponseWriter, r *http.Request) {
|
||||||
s.local.Unload(0)
|
s.local.Unload(apiUnloadTimeout)
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
json.NewEncoder(w).Encode(map[string]string{"msg": "ok"})
|
json.NewEncoder(w).Encode(map[string]string{"msg": "ok"})
|
||||||
}
|
}
|
||||||
@@ -83,7 +83,7 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) {
|
|||||||
router.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
|
router.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
s.local.Unload(0, realName)
|
s.local.Unload(apiUnloadTimeout, realName)
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
w.Write([]byte("OK"))
|
w.Write([]byte("OK"))
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user