internal/router,server,shared: refactor auth, libs (#839)

- refactor shared http functionality into internal/shared/http.go - remove stripping of Authorization and x-api-key - add Request Context middleware to internal/server - add /ui and /metrics behind auth middleware, fixes #717 Fix #717 Updates: #834
2026-06-13 10:19:04 -07:00
parent 8c660dcb90
commit 62aea0e83d
18 changed files with 497 additions and 377 deletions
@@ -12,6 +12,7 @@ import (
 	"github.com/mostlygeek/llama-swap/internal/logmon"
 	"github.com/mostlygeek/llama-swap/internal/process"
 	"github.com/mostlygeek/llama-swap/internal/router/scheduler"
+	"github.com/mostlygeek/llama-swap/internal/shared"
 )

 type shutdownReq struct {
@@ -399,13 +400,13 @@ func (b *baseRouter) Shutdown(timeout time.Duration) error {

 func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 	if b.shuttingDown.Load() {
-		SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
+		shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
 		return
 	}

-	data, err := FetchContext(req, b.config)
+	data, err := shared.FetchContext(req, b.config)
 	if err != nil {
-		SendError(w, req, err)
+		shared.SendError(w, req, err)
 		return
 	}

@@ -424,7 +425,7 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 	case <-req.Context().Done():
 		return
 	case <-b.shutdownCtx.Done():
-		SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
+		shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
 		return
 	}

@@ -475,12 +476,12 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 		return
 	case <-b.shutdownCtx.Done():
 		finishLoading()
-		SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
+		shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
 		return
 	}

 	if resp.Err != nil {
-		SendError(w, req, resp.Err)
+		shared.SendError(w, req, resp.Err)
 		return
 	}
 	resp.HandleFunc(w, req)
@@ -226,69 +226,6 @@ func TestIsLoadingPath(t *testing.T) {
 	}
 }

-func TestExtractContext_Streaming_GET(t *testing.T) {
-	tests := []struct {
-		name          string
-		query         string
-		wantStreaming bool
-	}{
-		{"streaming true", "model=llama3&stream=true", true},
-		{"streaming false", "model=llama3&stream=false", false},
-		{"no stream param", "model=llama3", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
-			got, err := ExtractContext(r)
-			if err != nil {
-				t.Fatalf("ExtractContext: %v", err)
-			}
-			if got.Streaming != tt.wantStreaming {
-				t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
-			}
-		})
-	}
-}
-
-func TestExtractContext_Streaming_JSON(t *testing.T) {
-	tests := []struct {
-		name          string
-		body          string
-		wantStreaming bool
-	}{
-		{"streaming true", `{"model":"llama3","stream":true}`, true},
-		{"streaming false", `{"model":"llama3","stream":false}`, false},
-		{"no stream param", `{"model":"llama3"}`, false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
-			r.Header.Set("Content-Type", "application/json")
-			got, err := ExtractContext(r)
-			if err != nil {
-				t.Fatalf("ExtractContext: %v", err)
-			}
-			if got.Streaming != tt.wantStreaming {
-				t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
-			}
-		})
-	}
-}
-
-func TestExtractContext_Streaming_URLEncodedForm(t *testing.T) {
-	r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader("model=whisper-1&stream=true"))
-	r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-	got, err := ExtractContext(r)
-	if err != nil {
-		t.Fatalf("ExtractContext: %v", err)
-	}
-	if !got.Streaming {
-		t.Error("Streaming should be true")
-	}
-}
-
 func countSSEMessages(s string) int {
 	scanner := bufio.NewScanner(strings.NewReader(s))
 	count := 0
@@ -15,6 +15,7 @@ import (

 	"github.com/mostlygeek/llama-swap/internal/config"
 	"github.com/mostlygeek/llama-swap/internal/logmon"
+	"github.com/mostlygeek/llama-swap/internal/shared"
 )

 type peerMember struct {
@@ -146,22 +147,22 @@ func (r *Peer) Shutdown(timeout time.Duration) error {

 func (r *Peer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 	if r.shuttingDown.Load() {
-		SendError(w, req, fmt.Errorf("peer proxy is shutting down"))
+		shared.SendError(w, req, fmt.Errorf("peer proxy is shutting down"))
 		return
 	}
 	r.inflight.Add(1)
 	defer r.inflight.Done()

-	data, err := FetchContext(req, r.cfg)
+	data, err := shared.FetchContext(req, r.cfg)
 	if err != nil {
-		SendError(w, req, err)
+		shared.SendError(w, req, err)
 		return
 	}

 	pp, found := r.peers[data.ModelID]
 	if !found {
 		r.logger.Warnf("peer model not found: %s", data.ModelID)
-		SendError(w, req, ErrNoPeerModelFound)
+		shared.SendError(w, req, ErrNoPeerModelFound)
 		return
 	}

@@ -12,6 +12,7 @@ import (

 	"github.com/mostlygeek/llama-swap/internal/config"
 	"github.com/mostlygeek/llama-swap/internal/logmon"
+	"github.com/mostlygeek/llama-swap/internal/shared"
 )

 var testLogger = logmon.NewWriter(os.Stdout)
@@ -142,7 +143,7 @@ func TestPeer_ServeHTTP_Success(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -178,7 +179,7 @@ func TestPeer_ServeHTTP_PeerModelNotFound(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "nonexistent-model", ModelID: "nonexistent-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "nonexistent-model", ModelID: "nonexistent-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -212,7 +213,7 @@ func TestPeer_ServeHTTP_ApiKeyInjection(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -246,7 +247,7 @@ func TestPeer_ServeHTTP_NoApiKey(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -279,7 +280,7 @@ func TestPeer_ServeHTTP_HostHeaderSet(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -311,7 +312,7 @@ func TestPeer_ServeHTTP_SSEHeaderModification(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -347,7 +348,7 @@ func TestPeer_ServeHTTP_ShutdownRejectsNewRequests(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -385,7 +386,7 @@ func TestPeer_ServeHTTP_WaitsForInflightDuringShutdown(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))

 	var wg sync.WaitGroup
 	wg.Add(1)
@@ -448,7 +449,7 @@ func TestPeer_ServeHTTP_ShutdownTimeoutCancelsInflight(t *testing.T) {
 	}

 	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))

 	var wg sync.WaitGroup
 	wg.Add(1)
@@ -551,7 +552,7 @@ func TestPeer_ServeHTTP_ContextOverridesBodyModel(t *testing.T) {
 	body := strings.NewReader(`{"model":"body-model","prompt":"hello"}`)
 	req := httptest.NewRequest("POST", "/v1/chat/completions", body)
 	req.Header.Set("Content-Type", "application/json")
-	*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "context-model", ModelID: "context-model"}))
+	*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "context-model", ModelID: "context-model"}))
 	w := httptest.NewRecorder()

 	pr.ServeHTTP(w, req)
@@ -1,40 +1,18 @@
 package router

 import (
-	"bytes"
-	"context"
-	"errors"
-	"fmt"
-	"io"
 	"net/http"
-	"strings"
 	"time"

-	"github.com/mostlygeek/llama-swap/internal/config"
 	"github.com/mostlygeek/llama-swap/internal/logmon"
 	"github.com/mostlygeek/llama-swap/internal/process"
-	"github.com/mostlygeek/llama-swap/internal/router/scheduler"
-	"github.com/tidwall/gjson"
+	"github.com/mostlygeek/llama-swap/internal/shared"
 )

-type contextkey struct {
-	name string
-}
-
-type ReqContextData struct {
-	Model            string
-	ModelID          string
-	Streaming        bool
-	SendLoadingState bool
-}
-
 var (
-	ErrNoModelInContext  = fmt.Errorf("no model in request context")
-	ErrNoRouterFound     = fmt.Errorf("no router found for model")
-	ErrNoPeerModelFound  = fmt.Errorf("peer model not found")
-	ErrNoLocalModelFound = scheduler.ErrModelNotFound
-
-	ContextKey = &contextkey{"context"}
+	ErrNoRouterFound     = shared.ErrNoRouterFound
+	ErrNoPeerModelFound  = shared.ErrNoPeerModelFound
+	ErrNoLocalModelFound = shared.ErrNoLocalModelFound
 )

 type Router interface {
@@ -72,129 +50,3 @@ type LocalRouter interface {
 	// model is not known to this router.
 	ProcessLogger(modelID string) (*logmon.Monitor, bool)
 }
-
-// FetchContext will attempt to get the model id from the context then
-// from the model body. If it extracts the model from the body it will
-// store the model in the context for downstream handlers. An error
-// will be returned when model can not be fetch from either location.
-func FetchContext(r *http.Request, cfg config.Config) (ReqContextData, error) {
-	data, ok := ReadContext(r.Context())
-	if ok {
-		return data, nil
-	}
-
-	if data, err := ExtractContext(r); err == nil {
-		realName, _ := cfg.RealModelName(data.Model)
-		if realName == "" {
-			realName = data.Model
-		}
-		data.ModelID = realName
-		if mc, ok := cfg.Models[realName]; ok {
-			data.SendLoadingState = mc.SendLoadingState != nil && *mc.SendLoadingState
-		}
-		*r = *r.WithContext(SetContext(r.Context(), data))
-		return data, nil
-	}
-
-	return ReqContextData{}, ErrNoModelInContext
-}
-
-func SetContext(ctx context.Context, data ReqContextData) context.Context {
-	return context.WithValue(ctx, ContextKey, data)
-}
-
-func ReadContext(ctx context.Context) (ReqContextData, bool) {
-	data, ok := ctx.Value(ContextKey).(ReqContextData)
-	return data, ok
-}
-
-// ExtractContext pulls the model name from an HTTP request without consuming the
-// body. For GET requests it reads the "model" query parameter. For POST
-// requests it inspects Content-Type and parses JSON, multipart/form-data, or
-// application/x-www-form-urlencoded bodies. The request body is always restored
-// before returning so downstream handlers — including reverse proxies that
-// forward raw bytes upstream — can still read it.
-func ExtractContext(r *http.Request) (ReqContextData, error) {
-	if r.Method == http.MethodGet {
-		if model := r.URL.Query().Get("model"); model != "" {
-			return ReqContextData{Model: model, Streaming: r.URL.Query().Get("stream") == "true"}, nil
-		}
-		return ReqContextData{}, fmt.Errorf("missing 'model' query parameter")
-	}
-
-	bodyBytes, err := io.ReadAll(r.Body)
-	if err != nil {
-		return ReqContextData{}, fmt.Errorf("error reading request body: %w", err)
-	}
-	defer func() {
-		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
-	}()
-
-	contentType := r.Header.Get("Content-Type")
-
-	if strings.Contains(contentType, "application/json") {
-		model := gjson.GetBytes(bodyBytes, "model").String()
-		if model == "" {
-			return ReqContextData{}, fmt.Errorf("missing or empty 'model' in JSON body")
-		}
-		return ReqContextData{Model: model, Streaming: gjson.GetBytes(bodyBytes, "stream").Bool()}, nil
-	}
-
-	// Form parsers read from r.Body, so feed them a fresh reader over the
-	// buffered bytes. The deferred restore above will reset r.Body again
-	// after parsing.
-	r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
-	if strings.Contains(contentType, "multipart/form-data") {
-		if err := r.ParseMultipartForm(32 << 20); err != nil {
-			return ReqContextData{}, fmt.Errorf("error parsing multipart form: %w", err)
-		}
-	} else {
-		if err := r.ParseForm(); err != nil {
-			return ReqContextData{}, fmt.Errorf("error parsing form: %w", err)
-		}
-	}
-
-	if model := r.FormValue("model"); model != "" {
-		return ReqContextData{Model: model, Streaming: r.FormValue("stream") == "true"}, nil
-	}
-
-	return ReqContextData{}, fmt.Errorf("missing 'model' parameter")
-}
-
-func SendError(w http.ResponseWriter, r *http.Request, err error) {
-	switch {
-	case errors.Is(err, ErrNoModelInContext):
-		SendResponse(w, r, http.StatusNotFound, "no model id could be identified")
-	case errors.Is(err, ErrNoPeerModelFound):
-		SendResponse(w, r, http.StatusNotFound, "no peer found for requested model")
-	case errors.Is(err, ErrNoLocalModelFound):
-		SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
-	case errors.Is(err, ErrNoRouterFound):
-		SendResponse(w, r, http.StatusNotFound, "no router for requested model")
-	default:
-		SendResponse(w, r, http.StatusInternalServerError, fmt.Sprintf("unspecific error: %v", err))
-	}
-}
-
-// SendResponse detects what content type the client prefers and returns an error response in that format.
-func SendResponse(w http.ResponseWriter, r *http.Request, status int, message string) {
-	// Check Accept header for preferred response format
-	acceptHeader := r.Header.Get("Accept")
-	if strings.Contains(acceptHeader, "text/plain") {
-		w.Header().Set("Content-Type", "text/plain")
-		w.WriteHeader(status)
-		w.Write([]byte(fmt.Sprintf("llama-swap: %s", message)))
-		return
-	}
-
-	if strings.Contains(acceptHeader, "text/html") {
-		w.Header().Set("Content-Type", "text/html")
-		w.WriteHeader(status)
-		w.Write([]byte(fmt.Sprintf(`<html><body><h1>llama-swap</h1><p>%s</p></body></html>`, message)))
-		return
-	}
-
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(status)
-	w.Write([]byte(fmt.Sprintf(`{"src":"llama-swap", "error": "%s"}`, message)))
-}
@@ -1,275 +0,0 @@
-package router
-
-import (
-	"bytes"
-	"context"
-	"io"
-	"mime/multipart"
-	"net/http"
-	"net/url"
-	"strings"
-	"testing"
-)
-
-func TestExtractContext_GET(t *testing.T) {
-	tests := []struct {
-		name      string
-		query     string
-		wantModel string
-		wantErr   bool
-	}{
-		{"model present", "model=llama3", "llama3", false},
-		{"model with slashes", "model=author/model-7b", "author/model-7b", false},
-		{"model missing", "", "", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
-			got, err := ExtractContext(r)
-			if (err != nil) != tt.wantErr {
-				t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
-			}
-			if got.Model != tt.wantModel {
-				t.Errorf("want %q got %q", tt.wantModel, got.Model)
-			}
-		})
-	}
-}
-
-func TestExtractContext_JSON(t *testing.T) {
-	tests := []struct {
-		name      string
-		body      string
-		wantModel string
-		wantErr   bool
-	}{
-		{"model present", `{"model":"llama3","stream":true}`, "llama3", false},
-		{"model with slashes", `{"model":"author/model-7b"}`, "author/model-7b", false},
-		{"model empty string", `{"model":""}`, "", true},
-		{"model key missing", `{"stream":true}`, "", true},
-		{"invalid json", `not-json`, "", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
-			r.Header.Set("Content-Type", "application/json")
-			got, err := ExtractContext(r)
-			if (err != nil) != tt.wantErr {
-				t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
-			}
-			if got.Model != tt.wantModel {
-				t.Errorf("want %q got %q", tt.wantModel, got.Model)
-			}
-		})
-	}
-}
-
-func TestExtractContext_URLEncodedForm(t *testing.T) {
-	tests := []struct {
-		name      string
-		formModel string
-		wantModel string
-		wantErr   bool
-	}{
-		{"model present", "whisper-1", "whisper-1", false},
-		{"model missing", "", "", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			form := url.Values{}
-			if tt.formModel != "" {
-				form.Set("model", tt.formModel)
-			}
-			r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(form.Encode()))
-			r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-			got, err := ExtractContext(r)
-			if (err != nil) != tt.wantErr {
-				t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
-			}
-			if got.Model != tt.wantModel {
-				t.Errorf("want %q got %q", tt.wantModel, got.Model)
-			}
-		})
-	}
-}
-
-func TestExtractContext_MultipartForm(t *testing.T) {
-	tests := []struct {
-		name      string
-		formModel string
-		wantModel string
-		wantErr   bool
-	}{
-		{"model present", "whisper-1", "whisper-1", false},
-		{"model missing", "", "", true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			var buf bytes.Buffer
-			mw := multipart.NewWriter(&buf)
-			if tt.formModel != "" {
-				fw, _ := mw.CreateFormField("model")
-				fw.Write([]byte(tt.formModel))
-			}
-			mw.Close()
-
-			r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", &buf)
-			r.Header.Set("Content-Type", mw.FormDataContentType())
-			got, err := ExtractContext(r)
-			if (err != nil) != tt.wantErr {
-				t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
-			}
-			if got.Model != tt.wantModel {
-				t.Errorf("want %q got %q", tt.wantModel, got.Model)
-			}
-		})
-	}
-}
-
-func TestExtractContext_JSONBodyRestored(t *testing.T) {
-	body := `{"model":"llama3","stream":true}`
-	r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
-	r.Header.Set("Content-Type", "application/json")
-
-	if _, err := ExtractContext(r); err != nil {
-		t.Fatalf("ExtractContext: %v", err)
-	}
-
-	remaining, err := io.ReadAll(r.Body)
-	if err != nil {
-		t.Fatalf("reading body after ExtractContext: %v", err)
-	}
-	if string(remaining) != body {
-		t.Errorf("body not restored: want %q got %q", body, string(remaining))
-	}
-}
-
-func TestExtractContext_MultipartBodyRestored(t *testing.T) {
-	var buf bytes.Buffer
-	mw := multipart.NewWriter(&buf)
-	fw, _ := mw.CreateFormField("model")
-	fw.Write([]byte("whisper-1"))
-	ff, _ := mw.CreateFormFile("file", "audio.wav")
-	ff.Write([]byte("fake-audio-bytes"))
-	mw.Close()
-
-	original := buf.Bytes()
-
-	r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", bytes.NewReader(original))
-	r.Header.Set("Content-Type", mw.FormDataContentType())
-
-	if _, err := ExtractContext(r); err != nil {
-		t.Fatalf("ExtractContext: %v", err)
-	}
-
-	remaining, err := io.ReadAll(r.Body)
-	if err != nil {
-		t.Fatalf("reading body after ExtractContext: %v", err)
-	}
-	if !bytes.Equal(remaining, original) {
-		t.Errorf("multipart body not restored: want %d bytes got %d bytes", len(original), len(remaining))
-	}
-}
-
-func TestExtractContext_URLEncodedBodyRestored(t *testing.T) {
-	body := "model=whisper-1&extra=value"
-	r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(body))
-	r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-
-	if _, err := ExtractContext(r); err != nil {
-		t.Fatalf("ExtractContext: %v", err)
-	}
-
-	remaining, err := io.ReadAll(r.Body)
-	if err != nil {
-		t.Fatalf("reading body after ExtractContext: %v", err)
-	}
-	if string(remaining) != body {
-		t.Errorf("url-encoded body not restored: want %q got %q", body, string(remaining))
-	}
-}
-
-func TestSetContext(t *testing.T) {
-	ctx := SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"})
-	data, ok := ctx.Value(ContextKey).(ReqContextData)
-	if !ok {
-		t.Fatalf("ContextKey not set or wrong type")
-	}
-	if data.Model != "llama3" {
-		t.Errorf("want %q got %q", "llama3", data.Model)
-	}
-	if data.ModelID != "llama3" {
-		t.Errorf("want %q got %q", "llama3", data.ModelID)
-	}
-}
-
-func TestSetContext_WithAlias(t *testing.T) {
-	ctx := SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"})
-	data, _ := ctx.Value(ContextKey).(ReqContextData)
-	if data.Model != "llama" {
-		t.Errorf("want requested %q got %q", "llama", data.Model)
-	}
-	if data.ModelID != "llama3" {
-		t.Errorf("want real %q got %q", "llama3", data.ModelID)
-	}
-}
-
-func TestSetContext_DoesNotMutateParent(t *testing.T) {
-	parent := context.Background()
-	_ = SetContext(parent, ReqContextData{Model: "llama3", ModelID: "llama3"})
-	if v := parent.Value(ContextKey); v != nil {
-		t.Errorf("parent context was mutated: %v", v)
-	}
-}
-
-func TestReadContext(t *testing.T) {
-	tests := []struct {
-		name     string
-		ctx      context.Context
-		wantReq  string
-		wantReal string
-		wantBool bool
-	}{
-		{
-			name:     "model present, same name",
-			ctx:      SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"}),
-			wantReq:  "llama3",
-			wantReal: "llama3",
-			wantBool: true,
-		},
-		{
-			name:     "model present, aliased",
-			ctx:      SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"}),
-			wantReq:  "llama",
-			wantReal: "llama3",
-			wantBool: true,
-		},
-		{
-			name:     "model absent",
-			ctx:      context.Background(),
-			wantReq:  "",
-			wantReal: "",
-			wantBool: false,
-		},
-		{
-			name:     "model is empty string",
-			ctx:      SetContext(context.Background(), ReqContextData{Model: "", ModelID: ""}),
-			wantReq:  "",
-			wantReal: "",
-			wantBool: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			gotData, ok := ReadContext(tt.ctx)
-			if gotData.Model != tt.wantReq || gotData.ModelID != tt.wantReal || ok != tt.wantBool {
-				t.Errorf("want (%q, %q, %v) got (%q, %q, %v)", tt.wantReq, tt.wantReal, tt.wantBool, gotData.Model, gotData.ModelID, ok)
-			}
-		})
-	}
-}
@@ -11,17 +11,17 @@ package scheduler

 import (
 	"context"
-	"fmt"
 	"net/http"
 	"time"

 	"github.com/mostlygeek/llama-swap/internal/logmon"
 	"github.com/mostlygeek/llama-swap/internal/process"
+	"github.com/mostlygeek/llama-swap/internal/shared"
 )

 // ErrModelNotFound is granted to callers whose model is not handled by this
-// router. The router package aliases it so SendError can match it.
-var ErrModelNotFound = fmt.Errorf("local model not found")
+// router. It is an alias for shared.ErrNoLocalModelFound.
+var ErrModelNotFound = shared.ErrNoLocalModelFound

 // Swapper is the eviction policy: it decides which running models must be
 // stopped before a target can serve. It is orthogonal to the scheduling