ui: improve manual model load and cancel (#847)

- When a model is manually loaded show a cancel buttton and a queued
status
- Implement cancellation in scheduler.Scheduler interface and FIFO
scheduler
- Add cache bust query parameter to bypass browser cache

Fixes #844
This commit is contained in:
Benson Wong
2026-06-14 13:38:10 -07:00
committed by GitHub
parent 92b90447e8
commit ed77385d08
7 changed files with 193 additions and 6 deletions
+14
View File
@@ -54,6 +54,7 @@ type baseRouter struct {
procCancel context.CancelFunc
handlerCh chan scheduler.HandlerReq
cancelCh chan scheduler.HandlerReq
shutdownCh chan shutdownReq
unloadCh chan unloadReq
swapDoneCh chan scheduler.SwapDone
@@ -88,6 +89,7 @@ func newBaseRouter(
procCtx: procCtx,
procCancel: procCancel,
handlerCh: make(chan scheduler.HandlerReq),
cancelCh: make(chan scheduler.HandlerReq),
shutdownCh: make(chan shutdownReq),
unloadCh: make(chan unloadReq),
swapDoneCh: make(chan scheduler.SwapDone),
@@ -117,6 +119,10 @@ func (b *baseRouter) run() {
b.schedule.OnRequest(req)
b.notifyProcessed()
case req := <-b.cancelCh:
b.schedule.OnCancel(req)
b.notifyProcessed()
case req := <-b.unloadCh:
b.schedule.OnUnload(req.targets, req.timeout)
close(req.respond)
@@ -473,6 +479,14 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
finishLoading()
case <-req.Context().Done():
finishLoading()
// Notify the scheduler so it can prune this request from its queue
// and swap waiters. Without this, a queued request whose client left
// would sit in the scheduler until drainQueue eventually starts a
// wasted model load for it.
select {
case b.cancelCh <- hr:
case <-b.shutdownCtx.Done():
}
return
case <-b.shutdownCtx.Done():
finishLoading()