ui: improve manual model load and cancel (#847)
- When a model is manually loaded show a cancel buttton and a queued status - Implement cancellation in scheduler.Scheduler interface and FIFO scheduler - Add cache bust query parameter to bypass browser cache Fixes #844
This commit is contained in:
@@ -54,6 +54,7 @@ type baseRouter struct {
|
|||||||
procCancel context.CancelFunc
|
procCancel context.CancelFunc
|
||||||
|
|
||||||
handlerCh chan scheduler.HandlerReq
|
handlerCh chan scheduler.HandlerReq
|
||||||
|
cancelCh chan scheduler.HandlerReq
|
||||||
shutdownCh chan shutdownReq
|
shutdownCh chan shutdownReq
|
||||||
unloadCh chan unloadReq
|
unloadCh chan unloadReq
|
||||||
swapDoneCh chan scheduler.SwapDone
|
swapDoneCh chan scheduler.SwapDone
|
||||||
@@ -88,6 +89,7 @@ func newBaseRouter(
|
|||||||
procCtx: procCtx,
|
procCtx: procCtx,
|
||||||
procCancel: procCancel,
|
procCancel: procCancel,
|
||||||
handlerCh: make(chan scheduler.HandlerReq),
|
handlerCh: make(chan scheduler.HandlerReq),
|
||||||
|
cancelCh: make(chan scheduler.HandlerReq),
|
||||||
shutdownCh: make(chan shutdownReq),
|
shutdownCh: make(chan shutdownReq),
|
||||||
unloadCh: make(chan unloadReq),
|
unloadCh: make(chan unloadReq),
|
||||||
swapDoneCh: make(chan scheduler.SwapDone),
|
swapDoneCh: make(chan scheduler.SwapDone),
|
||||||
@@ -117,6 +119,10 @@ func (b *baseRouter) run() {
|
|||||||
b.schedule.OnRequest(req)
|
b.schedule.OnRequest(req)
|
||||||
b.notifyProcessed()
|
b.notifyProcessed()
|
||||||
|
|
||||||
|
case req := <-b.cancelCh:
|
||||||
|
b.schedule.OnCancel(req)
|
||||||
|
b.notifyProcessed()
|
||||||
|
|
||||||
case req := <-b.unloadCh:
|
case req := <-b.unloadCh:
|
||||||
b.schedule.OnUnload(req.targets, req.timeout)
|
b.schedule.OnUnload(req.targets, req.timeout)
|
||||||
close(req.respond)
|
close(req.respond)
|
||||||
@@ -473,6 +479,14 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
|||||||
finishLoading()
|
finishLoading()
|
||||||
case <-req.Context().Done():
|
case <-req.Context().Done():
|
||||||
finishLoading()
|
finishLoading()
|
||||||
|
// Notify the scheduler so it can prune this request from its queue
|
||||||
|
// and swap waiters. Without this, a queued request whose client left
|
||||||
|
// would sit in the scheduler until drainQueue eventually starts a
|
||||||
|
// wasted model load for it.
|
||||||
|
select {
|
||||||
|
case b.cancelCh <- hr:
|
||||||
|
case <-b.shutdownCtx.Done():
|
||||||
|
}
|
||||||
return
|
return
|
||||||
case <-b.shutdownCtx.Done():
|
case <-b.shutdownCtx.Done():
|
||||||
finishLoading()
|
finishLoading()
|
||||||
|
|||||||
@@ -116,6 +116,46 @@ func (s *FIFO) OnRequest(req HandlerReq) {
|
|||||||
s.startSwap(req, evict, running)
|
s.startSwap(req, evict, running)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OnCancel removes a request whose client has disconnected from the queue and
|
||||||
|
// from every in-flight swap's waiters. If the request was the sole waiter of an
|
||||||
|
// active swap, the swap goroutine is left to complete on its own — OnSwapDone
|
||||||
|
// will find no waiters and simply clean up. This prevents drainQueue from ever
|
||||||
|
// starting a model load for a caller that is no longer there.
|
||||||
|
func (s *FIFO) OnCancel(req HandlerReq) {
|
||||||
|
removed := false
|
||||||
|
|
||||||
|
// Prune from the queue.
|
||||||
|
if len(s.queued) > 0 {
|
||||||
|
kept := s.queued[:0]
|
||||||
|
for _, q := range s.queued {
|
||||||
|
if q.Respond == req.Respond {
|
||||||
|
removed = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
kept = append(kept, q)
|
||||||
|
}
|
||||||
|
s.queued = kept
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prune from any active swap's waiters.
|
||||||
|
for _, sw := range s.active {
|
||||||
|
filtered := sw.waiters[:0]
|
||||||
|
for _, w := range sw.waiters {
|
||||||
|
if w.Respond == req.Respond {
|
||||||
|
removed = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered = append(filtered, w)
|
||||||
|
}
|
||||||
|
sw.waiters = filtered
|
||||||
|
}
|
||||||
|
|
||||||
|
if removed {
|
||||||
|
s.logger.Debugf("%s: cancelled request for model %s pruned from scheduler", s.name, req.Model)
|
||||||
|
broadcastQueuePositions(s.queued)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// OnSwapDone fans the result out to every waiter that joined this swap, removes
|
// OnSwapDone fans the result out to every waiter that joined this swap, removes
|
||||||
// the swap from the active map, then walks the queue once, promoting any items
|
// the swap from the active map, then walks the queue once, promoting any items
|
||||||
// that no longer collide with the remaining active set. FIFO order is preserved:
|
// that no longer collide with the remaining active set. FIFO order is preserved:
|
||||||
|
|||||||
@@ -143,6 +143,15 @@ func newFIFO(planner Swapper, eff Effects) *FIFO {
|
|||||||
|
|
||||||
func req(model string) HandlerReq { return HandlerReq{Model: model} }
|
func req(model string) HandlerReq { return HandlerReq{Model: model} }
|
||||||
|
|
||||||
|
// reqCh creates a HandlerReq with a unique Respond channel so OnCancel can
|
||||||
|
// identify it among queued requests and swap waiters.
|
||||||
|
func reqCh(model string) HandlerReq {
|
||||||
|
return HandlerReq{
|
||||||
|
Model: model,
|
||||||
|
Respond: make(chan HandlerResp, 1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestFIFO_FastPath(t *testing.T) {
|
func TestFIFO_FastPath(t *testing.T) {
|
||||||
eff := newFakeEffects()
|
eff := newFakeEffects()
|
||||||
eff.states["a"] = process.StateReady
|
eff.states["a"] = process.StateReady
|
||||||
@@ -535,3 +544,90 @@ func TestFIFO_PriorityQueueOrder(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestFIFO_OnCancel_QueuedRequest verifies that cancelling a queued request
|
||||||
|
// prevents drainQueue from ever starting a model load for it. Without OnCancel
|
||||||
|
// the dead request would sit in the queue until a drain triggers a wasted swap.
|
||||||
|
func TestFIFO_OnCancel_QueuedRequest(t *testing.T) {
|
||||||
|
eff := newFakeEffects()
|
||||||
|
eff.states["a"] = process.StateStopped
|
||||||
|
eff.states["b"] = process.StateStopped
|
||||||
|
// b evicts a, so a request for b queues while a is loading.
|
||||||
|
s := newFIFO(&stubPlanner{evict: map[string][]string{"b": {"a"}}}, eff)
|
||||||
|
|
||||||
|
s.OnRequest(req("a")) // StartSwap(a)
|
||||||
|
|
||||||
|
cancelledReq := reqCh("b")
|
||||||
|
s.OnRequest(cancelledReq) // queued (collides with a's in-flight swap)
|
||||||
|
if len(s.queued) != 1 {
|
||||||
|
t.Fatalf("queue len=%d want 1 before cancel", len(s.queued))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Client disconnects.
|
||||||
|
s.OnCancel(cancelledReq)
|
||||||
|
|
||||||
|
if len(s.queued) != 0 {
|
||||||
|
t.Fatalf("queue len=%d want 0 after cancel", len(s.queued))
|
||||||
|
}
|
||||||
|
|
||||||
|
// a's swap finishes; drainQueue runs but b is gone — no swap for b.
|
||||||
|
eff.states["a"] = process.StateReady
|
||||||
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||||
|
|
||||||
|
if got := eff.startsFor("b"); got != 0 {
|
||||||
|
t.Errorf("StartSwap(b)=%d want 0 (cancelled request should not trigger a load)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestFIFO_OnCancel_SwapWaiter verifies that cancelling a request that joined an
|
||||||
|
// in-flight swap removes it from the waiter list. When the swap completes, the
|
||||||
|
// cancelled waiter receives no grant and does not bump the in-flight count.
|
||||||
|
func TestFIFO_OnCancel_SwapWaiter(t *testing.T) {
|
||||||
|
eff := newFakeEffects()
|
||||||
|
eff.states["a"] = process.StateStopped
|
||||||
|
s := newFIFO(&stubPlanner{}, eff)
|
||||||
|
|
||||||
|
liveReq := reqCh("a")
|
||||||
|
cancelledReq := reqCh("a")
|
||||||
|
s.OnRequest(liveReq) // starts swap
|
||||||
|
s.OnRequest(cancelledReq) // joins
|
||||||
|
|
||||||
|
if sw := s.active["a"]; len(sw.waiters) != 2 {
|
||||||
|
t.Fatalf("waiters=%d want 2", len(sw.waiters))
|
||||||
|
}
|
||||||
|
|
||||||
|
s.OnCancel(cancelledReq)
|
||||||
|
|
||||||
|
if sw := s.active["a"]; len(sw.waiters) != 1 {
|
||||||
|
t.Fatalf("waiters=%d want 1 after cancel", len(sw.waiters))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swap finishes: only the live waiter is granted.
|
||||||
|
eff.states["a"] = process.StateReady
|
||||||
|
s.OnSwapDone(SwapDone{ModelID: "a"})
|
||||||
|
|
||||||
|
if got := eff.served("a"); got != 1 {
|
||||||
|
t.Errorf("served(a)=%d want 1 (only the non-cancelled waiter)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestFIFO_OnCancel_NotPresent is a no-op: cancelling a request that was already
|
||||||
|
// granted (and is no longer queued or waiting) must not affect anything.
|
||||||
|
func TestFIFO_OnCancel_NotPresent(t *testing.T) {
|
||||||
|
eff := newFakeEffects()
|
||||||
|
eff.states["a"] = process.StateReady
|
||||||
|
s := newFIFO(&stubPlanner{}, eff)
|
||||||
|
|
||||||
|
r := reqCh("a")
|
||||||
|
s.OnRequest(r) // fast-path served immediately
|
||||||
|
|
||||||
|
// Cancel after grant — should be a harmless no-op.
|
||||||
|
s.OnCancel(r)
|
||||||
|
|
||||||
|
if got := eff.served("a"); got != 1 {
|
||||||
|
t.Errorf("served(a)=%d want 1 (cancel of granted request is a no-op)", got)
|
||||||
|
}
|
||||||
|
if len(s.queued) != 0 {
|
||||||
|
t.Errorf("queue should be empty, len=%d", len(s.queued))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -47,6 +47,11 @@ type Swapper interface {
|
|||||||
type Scheduler interface {
|
type Scheduler interface {
|
||||||
// OnRequest handles one incoming ServeHTTP request.
|
// OnRequest handles one incoming ServeHTTP request.
|
||||||
OnRequest(req HandlerReq)
|
OnRequest(req HandlerReq)
|
||||||
|
// OnCancel handles a request whose client has disconnected before it was
|
||||||
|
// granted. The scheduler must remove the request from its queue and from
|
||||||
|
// any in-flight swap's waiters so it never triggers a model load or grant
|
||||||
|
// for a caller that is no longer there.
|
||||||
|
OnCancel(req HandlerReq)
|
||||||
// OnSwapDone handles a swap goroutine reporting completion.
|
// OnSwapDone handles a swap goroutine reporting completion.
|
||||||
OnSwapDone(ev SwapDone)
|
OnSwapDone(ev SwapDone)
|
||||||
// OnServeDone handles a tracked ServeHTTP finishing (in-flight decrement).
|
// OnServeDone handles a tracked ServeHTTP finishing (in-flight decrement).
|
||||||
|
|||||||
@@ -6,6 +6,8 @@
|
|||||||
|
|
||||||
let isUnloading = $state(false);
|
let isUnloading = $state(false);
|
||||||
let menuOpen = $state(false);
|
let menuOpen = $state(false);
|
||||||
|
let pendingLoads = $state<Record<string, boolean>>({});
|
||||||
|
const loadControllers = new Map<string, AbortController>();
|
||||||
|
|
||||||
const showUnlistedStore = persistentStore<boolean>("showUnlisted", true);
|
const showUnlistedStore = persistentStore<boolean>("showUnlisted", true);
|
||||||
const showIdorNameStore = persistentStore<"id" | "name">("showIdorName", "id");
|
const showIdorNameStore = persistentStore<"id" | "name">("showIdorName", "id");
|
||||||
@@ -42,6 +44,25 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function handleLoadModel(modelId: string): Promise<void> {
|
||||||
|
if (pendingLoads[modelId]) return;
|
||||||
|
const controller = new AbortController();
|
||||||
|
loadControllers.set(modelId, controller);
|
||||||
|
pendingLoads[modelId] = true;
|
||||||
|
try {
|
||||||
|
await loadModel(modelId, controller.signal);
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
} finally {
|
||||||
|
loadControllers.delete(modelId);
|
||||||
|
delete pendingLoads[modelId];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function cancelLoad(modelId: string): void {
|
||||||
|
loadControllers.get(modelId)?.abort();
|
||||||
|
}
|
||||||
|
|
||||||
function toggleIdorName(): void {
|
function toggleIdorName(): void {
|
||||||
showIdorNameStore.update((prev) => (prev === "name" ? "id" : "name"));
|
showIdorNameStore.update((prev) => (prev === "name" ? "id" : "name"));
|
||||||
}
|
}
|
||||||
@@ -170,14 +191,20 @@
|
|||||||
{/if}
|
{/if}
|
||||||
</td>
|
</td>
|
||||||
<td class="w-12">
|
<td class="w-12">
|
||||||
{#if model.state === "stopped"}
|
{#if model.state === "stopped" && pendingLoads[model.id]}
|
||||||
<button class="btn btn--sm" onclick={() => loadModel(model.id)}>Load</button>
|
<button class="btn btn--sm" onclick={() => cancelLoad(model.id)}>Cancel</button>
|
||||||
|
{:else if model.state === "stopped"}
|
||||||
|
<button class="btn btn--sm" onclick={() => handleLoadModel(model.id)}>Load</button>
|
||||||
{:else}
|
{:else}
|
||||||
<button class="btn btn--sm" onclick={() => unloadSingleModel(model.id)} disabled={model.state !== "ready"}>Unload</button>
|
<button class="btn btn--sm" onclick={() => unloadSingleModel(model.id)} disabled={model.state !== "ready"}>Unload</button>
|
||||||
{/if}
|
{/if}
|
||||||
</td>
|
</td>
|
||||||
<td class="w-20">
|
<td class="w-20">
|
||||||
<span class="w-16 text-center status status--{model.state}">{model.state}</span>
|
{#if model.state === "stopped" && pendingLoads[model.id]}
|
||||||
|
<span class="w-16 text-center status status--queued">queued</span>
|
||||||
|
{:else}
|
||||||
|
<span class="w-16 text-center status status--{model.state}">{model.state}</span>
|
||||||
|
{/if}
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{/each}
|
{/each}
|
||||||
|
|||||||
@@ -139,7 +139,8 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.status--starting,
|
.status--starting,
|
||||||
.status--stopping {
|
.status--stopping,
|
||||||
|
.status--queued {
|
||||||
@apply bg-warning/10 text-warning;
|
@apply bg-warning/10 text-warning;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -176,15 +176,19 @@ export async function unloadSingleModel(model: string): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function loadModel(model: string): Promise<void> {
|
export async function loadModel(model: string, signal?: AbortSignal): Promise<void> {
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`/upstream/${model}/`, {
|
const response = await fetch(`/upstream/${model}/?_=${Date.now()}`, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
|
signal,
|
||||||
});
|
});
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error(`Failed to load model: ${response.status}`);
|
throw new Error(`Failed to load model: ${response.status}`);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
if (error instanceof DOMException && error.name === "AbortError") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
console.error("Failed to load model:", error);
|
console.error("Failed to load model:", error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user