ui: improve manual model load and cancel (#847)

- When a model is manually loaded show a cancel buttton and a queued status - Implement cancellation in scheduler.Scheduler interface and FIFO scheduler - Add cache bust query parameter to bypass browser cache Fixes #844
2026-06-14 13:38:10 -07:00
parent 92b90447e8
commit ed77385d08
7 changed files with 193 additions and 6 deletions
@@ -54,6 +54,7 @@ type baseRouter struct {
 	procCancel context.CancelFunc

 	handlerCh   chan scheduler.HandlerReq
+	cancelCh    chan scheduler.HandlerReq
 	shutdownCh  chan shutdownReq
 	unloadCh    chan unloadReq
 	swapDoneCh  chan scheduler.SwapDone
@@ -88,6 +89,7 @@ func newBaseRouter(
 		procCtx:     procCtx,
 		procCancel:  procCancel,
 		handlerCh:   make(chan scheduler.HandlerReq),
+		cancelCh:    make(chan scheduler.HandlerReq),
 		shutdownCh:  make(chan shutdownReq),
 		unloadCh:    make(chan unloadReq),
 		swapDoneCh:  make(chan scheduler.SwapDone),
@@ -117,6 +119,10 @@ func (b *baseRouter) run() {
 			b.schedule.OnRequest(req)
 			b.notifyProcessed()

+		case req := <-b.cancelCh:
+			b.schedule.OnCancel(req)
+			b.notifyProcessed()
+
 		case req := <-b.unloadCh:
 			b.schedule.OnUnload(req.targets, req.timeout)
 			close(req.respond)
@@ -473,6 +479,14 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 		finishLoading()
 	case <-req.Context().Done():
 		finishLoading()
+		// Notify the scheduler so it can prune this request from its queue
+		// and swap waiters. Without this, a queued request whose client left
+		// would sit in the scheduler until drainQueue eventually starts a
+		// wasted model load for it.
+		select {
+		case b.cancelCh <- hr:
+		case <-b.shutdownCtx.Done():
+		}
 		return
 	case <-b.shutdownCtx.Done():
 		finishLoading()
@@ -116,6 +116,46 @@ func (s *FIFO) OnRequest(req HandlerReq) {
 	s.startSwap(req, evict, running)
 }

+// OnCancel removes a request whose client has disconnected from the queue and
+// from every in-flight swap's waiters. If the request was the sole waiter of an
+// active swap, the swap goroutine is left to complete on its own — OnSwapDone
+// will find no waiters and simply clean up. This prevents drainQueue from ever
+// starting a model load for a caller that is no longer there.
+func (s *FIFO) OnCancel(req HandlerReq) {
+	removed := false
+
+	// Prune from the queue.
+	if len(s.queued) > 0 {
+		kept := s.queued[:0]
+		for _, q := range s.queued {
+			if q.Respond == req.Respond {
+				removed = true
+				continue
+			}
+			kept = append(kept, q)
+		}
+		s.queued = kept
+	}
+
+	// Prune from any active swap's waiters.
+	for _, sw := range s.active {
+		filtered := sw.waiters[:0]
+		for _, w := range sw.waiters {
+			if w.Respond == req.Respond {
+				removed = true
+				continue
+			}
+			filtered = append(filtered, w)
+		}
+		sw.waiters = filtered
+	}
+
+	if removed {
+		s.logger.Debugf("%s: cancelled request for model %s pruned from scheduler", s.name, req.Model)
+		broadcastQueuePositions(s.queued)
+	}
+}
+
 // OnSwapDone fans the result out to every waiter that joined this swap, removes
 // the swap from the active map, then walks the queue once, promoting any items
 // that no longer collide with the remaining active set. FIFO order is preserved:
@@ -143,6 +143,15 @@ func newFIFO(planner Swapper, eff Effects) *FIFO {

 func req(model string) HandlerReq { return HandlerReq{Model: model} }

+// reqCh creates a HandlerReq with a unique Respond channel so OnCancel can
+// identify it among queued requests and swap waiters.
+func reqCh(model string) HandlerReq {
+	return HandlerReq{
+		Model:   model,
+		Respond: make(chan HandlerResp, 1),
+	}
+}
+
 func TestFIFO_FastPath(t *testing.T) {
 	eff := newFakeEffects()
 	eff.states["a"] = process.StateReady
@@ -535,3 +544,90 @@ func TestFIFO_PriorityQueueOrder(t *testing.T) {
 		}
 	}
 }
+
+// TestFIFO_OnCancel_QueuedRequest verifies that cancelling a queued request
+// prevents drainQueue from ever starting a model load for it. Without OnCancel
+// the dead request would sit in the queue until a drain triggers a wasted swap.
+func TestFIFO_OnCancel_QueuedRequest(t *testing.T) {
+	eff := newFakeEffects()
+	eff.states["a"] = process.StateStopped
+	eff.states["b"] = process.StateStopped
+	// b evicts a, so a request for b queues while a is loading.
+	s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
+
+	s.OnRequest(req("a")) // StartSwap(a)
+
+	cancelledReq := reqCh("b")
+	s.OnRequest(cancelledReq) // queued (collides with a's in-flight swap)
+	if len(s.queued) != 1 {
+		t.Fatalf("queue len=%d want 1 before cancel", len(s.queued))
+	}
+
+	// Client disconnects.
+	s.OnCancel(cancelledReq)
+
+	if len(s.queued) != 0 {
+		t.Fatalf("queue len=%d want 0 after cancel", len(s.queued))
+	}
+
+	// a's swap finishes; drainQueue runs but b is gone — no swap for b.
+	eff.states["a"] = process.StateReady
+	s.OnSwapDone(SwapDone{ModelID: "a"})
+
+	if got := eff.startsFor("b"); got != 0 {
+		t.Errorf("StartSwap(b)=%d want 0 (cancelled request should not trigger a load)", got)
+	}
+}
+
+// TestFIFO_OnCancel_SwapWaiter verifies that cancelling a request that joined an
+// in-flight swap removes it from the waiter list. When the swap completes, the
+// cancelled waiter receives no grant and does not bump the in-flight count.
+func TestFIFO_OnCancel_SwapWaiter(t *testing.T) {
+	eff := newFakeEffects()
+	eff.states["a"] = process.StateStopped
+	s := newFIFO(&stubPlanner{}, eff)
+
+	liveReq := reqCh("a")
+	cancelledReq := reqCh("a")
+	s.OnRequest(liveReq)      // starts swap
+	s.OnRequest(cancelledReq) // joins
+
+	if sw := s.active["a"]; len(sw.waiters) != 2 {
+		t.Fatalf("waiters=%d want 2", len(sw.waiters))
+	}
+
+	s.OnCancel(cancelledReq)
+
+	if sw := s.active["a"]; len(sw.waiters) != 1 {
+		t.Fatalf("waiters=%d want 1 after cancel", len(sw.waiters))
+	}
+
+	// Swap finishes: only the live waiter is granted.
+	eff.states["a"] = process.StateReady
+	s.OnSwapDone(SwapDone{ModelID: "a"})
+
+	if got := eff.served("a"); got != 1 {
+		t.Errorf("served(a)=%d want 1 (only the non-cancelled waiter)", got)
+	}
+}
+
+// TestFIFO_OnCancel_NotPresent is a no-op: cancelling a request that was already
+// granted (and is no longer queued or waiting) must not affect anything.
+func TestFIFO_OnCancel_NotPresent(t *testing.T) {
+	eff := newFakeEffects()
+	eff.states["a"] = process.StateReady
+	s := newFIFO(&stubPlanner{}, eff)
+
+	r := reqCh("a")
+	s.OnRequest(r) // fast-path served immediately
+
+	// Cancel after grant — should be a harmless no-op.
+	s.OnCancel(r)
+
+	if got := eff.served("a"); got != 1 {
+		t.Errorf("served(a)=%d want 1 (cancel of granted request is a no-op)", got)
+	}
+	if len(s.queued) != 0 {
+		t.Errorf("queue should be empty, len=%d", len(s.queued))
+	}
+}
@@ -47,6 +47,11 @@ type Swapper interface {
 type Scheduler interface {
 	// OnRequest handles one incoming ServeHTTP request.
 	OnRequest(req HandlerReq)
+	// OnCancel handles a request whose client has disconnected before it was
+	// granted. The scheduler must remove the request from its queue and from
+	// any in-flight swap's waiters so it never triggers a model load or grant
+	// for a caller that is no longer there.
+	OnCancel(req HandlerReq)
 	// OnSwapDone handles a swap goroutine reporting completion.
 	OnSwapDone(ev SwapDone)
 	// OnServeDone handles a tracked ServeHTTP finishing (in-flight decrement).
@@ -6,6 +6,8 @@

  let isUnloading = $state(false);
  let menuOpen = $state(false);
+  let pendingLoads = $state<Record<string, boolean>>({});
+  const loadControllers = new Map<string, AbortController>();

  const showUnlistedStore = persistentStore<boolean>("showUnlisted", true);
  const showIdorNameStore = persistentStore<"id" | "name">("showIdorName", "id");
@@ -42,6 +44,25 @@
    }
  }

+  async function handleLoadModel(modelId: string): Promise<void> {
+    if (pendingLoads[modelId]) return;
+    const controller = new AbortController();
+    loadControllers.set(modelId, controller);
+    pendingLoads[modelId] = true;
+    try {
+      await loadModel(modelId, controller.signal);
+    } catch (e) {
+      console.error(e);
+    } finally {
+      loadControllers.delete(modelId);
+      delete pendingLoads[modelId];
+    }
+  }
+
+  function cancelLoad(modelId: string): void {
+    loadControllers.get(modelId)?.abort();
+  }
+
  function toggleIdorName(): void {
    showIdorNameStore.update((prev) => (prev === "name" ? "id" : "name"));
  }
@@ -170,14 +191,20 @@
              {/if}
            </td>
            <td class="w-12">
-              {#if model.state === "stopped"}
-                <button class="btn btn--sm" onclick={() => loadModel(model.id)}>Load</button>
+              {#if model.state === "stopped" && pendingLoads[model.id]}
+                <button class="btn btn--sm" onclick={() => cancelLoad(model.id)}>Cancel</button>
+              {:else if model.state === "stopped"}
+                <button class="btn btn--sm" onclick={() => handleLoadModel(model.id)}>Load</button>
              {:else}
                <button class="btn btn--sm" onclick={() => unloadSingleModel(model.id)} disabled={model.state !== "ready"}>Unload</button>
              {/if}
            </td>
            <td class="w-20">
-              <span class="w-16 text-center status status--{model.state}">{model.state}</span>
+              {#if model.state === "stopped" && pendingLoads[model.id]}
+                <span class="w-16 text-center status status--queued">queued</span>
+              {:else}
+                <span class="w-16 text-center status status--{model.state}">{model.state}</span>
+              {/if}
            </td>
          </tr>
        {/each}
@@ -139,7 +139,8 @@
  }

  .status--starting,
-  .status--stopping {
+  .status--stopping,
+  .status--queued {
    @apply bg-warning/10 text-warning;
  }

@@ -176,15 +176,19 @@ export async function unloadSingleModel(model: string): Promise<void> {
  }
 }

-export async function loadModel(model: string): Promise<void> {
+export async function loadModel(model: string, signal?: AbortSignal): Promise<void> {
  try {
-    const response = await fetch(`/upstream/${model}/`, {
+    const response = await fetch(`/upstream/${model}/?_=${Date.now()}`, {
      method: "GET",
+      signal,
    });
    if (!response.ok) {
      throw new Error(`Failed to load model: ${response.status}`);
    }
  } catch (error) {
+    if (error instanceof DOMException && error.name === "AbortError") {
+      return;
+    }
    console.error("Failed to load model:", error);
    throw error;
  }