internal/router,server,shared: refactor auth, libs (#839)

- refactor shared http functionality into internal/shared/http.go
- remove stripping of Authorization and x-api-key
- add Request Context middleware to internal/server
- add /ui and /metrics behind auth middleware, fixes #717

Fix #717
Updates: #834
This commit is contained in:
Benson Wong
2026-06-13 10:19:04 -07:00
committed by GitHub
parent 8c660dcb90
commit 62aea0e83d
18 changed files with 497 additions and 377 deletions
+4 -5
View File
@@ -9,7 +9,6 @@ import (
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/event"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
@@ -163,7 +162,7 @@ func (s *Server) startPreload() {
if err != nil {
continue
}
req = req.WithContext(router.SetContext(req.Context(), router.ReqContextData{Model: modelID, ModelID: modelID}))
req = req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: modelID, ModelID: modelID}))
dw := &discardResponseWriter{status: http.StatusOK}
s.local.ServeHTTP(dw, req)
@@ -208,7 +207,7 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
searchName, modelID, remainingPath, found := findModelInPath(s.cfg, "/"+upstreamPath)
if !found {
router.SendResponse(w, r, http.StatusNotFound, "model not found")
shared.SendResponse(w, r, http.StatusNotFound, "model not found")
return
}
@@ -230,7 +229,7 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
// Strip the /upstream/<model> prefix before forwarding.
r.URL.Path = remainingPath
// Pin the resolved model so the router skips body/query extraction.
*r = *r.WithContext(router.SetContext(r.Context(), router.ReqContextData{Model: searchName, ModelID: modelID}))
*r = *r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: searchName, ModelID: modelID}))
switch {
case s.local.Handles(modelID):
@@ -238,7 +237,7 @@ func (s *Server) handleUpstream(w http.ResponseWriter, r *http.Request) {
case s.peer.Handles(modelID):
s.peer.ServeHTTP(w, r)
default:
router.SendResponse(w, r, http.StatusNotFound, "no router for model "+modelID)
shared.SendResponse(w, r, http.StatusNotFound, "no router for model "+modelID)
}
}
+9 -10
View File
@@ -12,7 +12,6 @@ import (
"github.com/mostlygeek/llama-swap/internal/event"
"github.com/mostlygeek/llama-swap/internal/perf"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
@@ -76,11 +75,11 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) {
requested := strings.TrimPrefix(r.PathValue("model"), "/")
realName, found := s.cfg.RealModelName(requested)
if !found {
router.SendResponse(w, r, http.StatusNotFound, "model not found")
shared.SendResponse(w, r, http.StatusNotFound, "model not found")
return
}
if !s.local.Handles(realName) {
router.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
shared.SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
return
}
s.local.Unload(apiUnloadTimeout, realName)
@@ -92,7 +91,7 @@ func (s *Server) handleAPIUnloadModel(w http.ResponseWriter, r *http.Request) {
func (s *Server) handleAPIMetrics(w http.ResponseWriter, r *http.Request) {
data, err := s.metrics.getMetricsJSON()
if err != nil {
router.SendResponse(w, r, http.StatusInternalServerError, "failed to get metrics")
shared.SendResponse(w, r, http.StatusInternalServerError, "failed to get metrics")
return
}
w.Header().Set("Content-Type", "application/json")
@@ -103,7 +102,7 @@ func (s *Server) handleAPIMetrics(w http.ResponseWriter, r *http.Request) {
// filtered to samples after the ?after=<RFC3339> timestamp.
func (s *Server) handleAPIPerformance(w http.ResponseWriter, r *http.Request) {
if s.perf == nil {
router.SendResponse(w, r, http.StatusServiceUnavailable, "performance monitor not available")
shared.SendResponse(w, r, http.StatusServiceUnavailable, "performance monitor not available")
return
}
@@ -112,7 +111,7 @@ func (s *Server) handleAPIPerformance(w http.ResponseWriter, r *http.Request) {
if afterStr := r.URL.Query().Get("after"); afterStr != "" {
after, err := time.Parse(time.RFC3339, afterStr)
if err != nil {
router.SendResponse(w, r, http.StatusBadRequest, "invalid 'after' timestamp, use RFC3339 format")
shared.SendResponse(w, r, http.StatusBadRequest, "invalid 'after' timestamp, use RFC3339 format")
return
}
filteredSys := make([]perf.SysStat, 0, len(sysStats))
@@ -153,19 +152,19 @@ func (s *Server) handleAPIVersion(w http.ResponseWriter, r *http.Request) {
func (s *Server) handleAPICapture(w http.ResponseWriter, r *http.Request) {
id, err := strconv.Atoi(r.PathValue("id"))
if err != nil {
router.SendResponse(w, r, http.StatusBadRequest, "invalid capture ID")
shared.SendResponse(w, r, http.StatusBadRequest, "invalid capture ID")
return
}
capture := s.metrics.getCaptureByID(id)
if capture == nil {
router.SendResponse(w, r, http.StatusNotFound, "capture not found")
shared.SendResponse(w, r, http.StatusNotFound, "capture not found")
return
}
jsonBytes, err := json.Marshal(capture)
if err != nil {
router.SendResponse(w, r, http.StatusInternalServerError, "failed to marshal capture")
shared.SendResponse(w, r, http.StatusInternalServerError, "failed to marshal capture")
return
}
w.Header().Set("Content-Type", "application/json")
@@ -198,7 +197,7 @@ func (s *Server) handleAPIEvents(w http.ResponseWriter, r *http.Request) {
flusher, ok := w.(http.Flusher)
if !ok {
router.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
shared.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
return
}
+17 -31
View File
@@ -1,19 +1,17 @@
package server
import (
"encoding/base64"
"net/http"
"strings"
"github.com/mostlygeek/llama-swap/internal/chain"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
// CreateAuthMiddleware returns middleware that validates API keys when the
// config declares any. It accepts the key via Authorization: Bearer,
// Authorization: Basic (password field), or x-api-key. On success the auth
// headers are stripped so they never leak to upstream. When no keys are
// Authorization: Basic (password field), or x-api-key. When no keys are
// configured the middleware is a pass-through.
func CreateAuthMiddleware(cfg config.Config) chain.Middleware {
keys := cfg.RequiredAPIKeys
@@ -22,7 +20,7 @@ func CreateAuthMiddleware(cfg config.Config) chain.Middleware {
return next
}
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
provided := extractAPIKey(r)
provided := shared.ExtractAPIKey(r)
valid := false
for _, key := range keys {
@@ -33,41 +31,29 @@ func CreateAuthMiddleware(cfg config.Config) chain.Middleware {
}
if !valid {
w.Header().Set("WWW-Authenticate", `Basic realm="llama-swap"`)
router.SendResponse(w, r, http.StatusUnauthorized, "unauthorized: invalid or missing API key")
shared.SendResponse(w, r, http.StatusUnauthorized, "unauthorized: invalid or missing API key")
return
}
r.Header.Del("Authorization")
r.Header.Del("x-api-key")
next.ServeHTTP(w, r)
})
}
}
// extractAPIKey pulls a candidate API key from the request, preferring Basic,
// then Bearer, then x-api-key.
func extractAPIKey(r *http.Request) string {
var bearerKey, basicKey string
if auth := r.Header.Get("Authorization"); auth != "" {
if strings.HasPrefix(auth, "Bearer ") {
bearerKey = strings.TrimPrefix(auth, "Bearer ")
} else if strings.HasPrefix(auth, "Basic ") {
encoded := strings.TrimPrefix(auth, "Basic ")
if decoded, err := base64.StdEncoding.DecodeString(encoded); err == nil {
if parts := strings.SplitN(string(decoded), ":", 2); len(parts) == 2 {
basicKey = parts[1] // password field is the API key
}
// CreateRequestContextMiddleware returns middleware that extracts model and
// auth info from the request into the context. Requests where no model can be
// identified are rejected with a 404.
func CreateRequestContextMiddleware(cfg config.Config) chain.Middleware {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
data, err := shared.FetchContext(r, cfg)
if err != nil {
shared.SendError(w, r, shared.ErrNoModelInContext)
return
}
}
}
switch {
case basicKey != "":
return basicKey
case bearerKey != "":
return bearerKey
default:
return r.Header.Get("x-api-key")
_ = data
next.ServeHTTP(w, r)
})
}
}
+35 -38
View File
@@ -1,48 +1,14 @@
package server
import (
"encoding/base64"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/mostlygeek/llama-swap/internal/config"
)
func TestServer_ExtractAPIKey(t *testing.T) {
basicHeader := func(user, pass string) string {
return "Basic " + base64.StdEncoding.EncodeToString([]byte(user+":"+pass))
}
cases := []struct {
name string
auth string
xapi string
want string
}{
{"none", "", "", ""},
{"bearer", "Bearer tok123", "", "tok123"},
{"basic", basicHeader("user", "pw-key"), "", "pw-key"},
{"x-api-key", "", "xkey", "xkey"},
{"basic beats bearer", basicHeader("u", "bk"), "", "bk"},
{"bearer beats x-api-key", "Bearer btok", "xkey", "btok"},
{"malformed basic falls back to x-api-key", "Basic !!!notbase64", "xkey", "xkey"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
r := httptest.NewRequest(http.MethodGet, "/", nil)
if c.auth != "" {
r.Header.Set("Authorization", c.auth)
}
if c.xapi != "" {
r.Header.Set("x-api-key", c.xapi)
}
if got := extractAPIKey(r); got != c.want {
t.Errorf("extractAPIKey() = %q, want %q", got, c.want)
}
})
}
}
func TestServer_SanitizeAccessControlRequestHeaders(t *testing.T) {
cases := []struct {
in string
@@ -74,11 +40,42 @@ func TestServer_IsTokenChar(t *testing.T) {
}
}
func TestServer_RequestContextMiddleware(t *testing.T) {
cfg := config.Config{
Models: map[string]config.ModelConfig{
"llama3": {},
},
}
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
})
mw := CreateRequestContextMiddleware(cfg)
t.Run("known model passes through", func(t *testing.T) {
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{"model":"llama3"}`))
r.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
mw(final).ServeHTTP(w, r)
if w.Code != http.StatusOK {
t.Errorf("status = %d, want 200", w.Code)
}
})
t.Run("missing model returns 404", func(t *testing.T) {
r := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(`{}`))
r.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
mw(final).ServeHTTP(w, r)
if w.Code != http.StatusNotFound {
t.Errorf("status = %d, want 404", w.Code)
}
})
}
func TestServer_AuthMiddleware(t *testing.T) {
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Header.Get("Authorization") != "" || r.Header.Get("x-api-key") != "" {
t.Error("auth headers leaked to upstream")
}
w.WriteHeader(http.StatusOK)
})
+3 -3
View File
@@ -7,7 +7,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/chain"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
// defaultConcurrencyLimit caps simultaneous in-flight requests per model when
@@ -32,9 +32,9 @@ func CreateConcurrencyMiddleware(cfg config.Config) chain.Middleware {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
data, err := router.FetchContext(r, cfg)
data, err := shared.FetchContext(r, cfg)
if err != nil {
router.SendError(w, r, router.ErrNoModelInContext)
shared.SendError(w, r, shared.ErrNoModelInContext)
return
}
+2 -2
View File
@@ -7,12 +7,12 @@ import (
"testing"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
func concurrencyTestReq(model string) *http.Request {
r := httptest.NewRequest("GET", "/v1/chat/completions", nil)
return r.WithContext(router.SetContext(r.Context(), router.ReqContextData{Model: model, ModelID: model}))
return r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: model, ModelID: model}))
}
func TestServer_ConcurrencyMiddleware_RejectsOverLimit(t *testing.T) {
+9 -9
View File
@@ -11,7 +11,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/chain"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
"github.com/tidwall/sjson"
)
@@ -34,9 +34,9 @@ func CreateFilterMiddleware(cfg config.Config) chain.Middleware {
return
}
data, err := router.FetchContext(r, cfg)
data, err := shared.FetchContext(r, cfg)
if err != nil {
router.SendError(w, r, router.ErrNoModelInContext)
shared.SendError(w, r, shared.ErrNoModelInContext)
return
}
@@ -48,13 +48,13 @@ func CreateFilterMiddleware(cfg config.Config) chain.Middleware {
body, err := io.ReadAll(r.Body)
if err != nil {
router.SendResponse(w, r, http.StatusBadRequest, "could not read request body")
shared.SendResponse(w, r, http.StatusBadRequest, "could not read request body")
return
}
body, err = applyFilters(body, data.Model, useModelName, filters)
if err != nil {
router.SendResponse(w, r, http.StatusInternalServerError, err.Error())
shared.SendResponse(w, r, http.StatusInternalServerError, err.Error())
return
}
@@ -84,9 +84,9 @@ func CreateFormFilterMiddleware(cfg config.Config) chain.Middleware {
return
}
data, err := router.FetchContext(r, cfg)
data, err := shared.FetchContext(r, cfg)
if err != nil {
router.SendError(w, r, router.ErrNoModelInContext)
shared.SendError(w, r, shared.ErrNoModelInContext)
return
}
@@ -97,13 +97,13 @@ func CreateFormFilterMiddleware(cfg config.Config) chain.Middleware {
}
if err := r.ParseMultipartForm(32 << 20); err != nil {
router.SendResponse(w, r, http.StatusBadRequest, fmt.Sprintf("error parsing multipart form: %s", err.Error()))
shared.SendResponse(w, r, http.StatusBadRequest, fmt.Sprintf("error parsing multipart form: %s", err.Error()))
return
}
body, contentType, err := rewriteMultipartModel(r.MultipartForm, useModelName)
if err != nil {
router.SendResponse(w, r, http.StatusInternalServerError, err.Error())
shared.SendResponse(w, r, http.StatusInternalServerError, err.Error())
return
}
+3 -3
View File
@@ -14,7 +14,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/chain"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
// NewLoggers builds the proxy, upstream, and combined (mux) log monitors,
@@ -102,13 +102,13 @@ func (s *Server) handleLogStream(w http.ResponseWriter, r *http.Request) {
logger, err := s.getLogger(logMonitorID)
if err != nil {
router.SendResponse(w, r, http.StatusBadRequest, err.Error())
shared.SendResponse(w, r, http.StatusBadRequest, err.Error())
return
}
flusher, ok := w.(http.Flusher)
if !ok {
router.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
shared.SendResponse(w, r, http.StatusInternalServerError, "streaming unsupported")
return
}
+3 -3
View File
@@ -7,7 +7,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/chain"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
// CreateMetricsMiddleware returns middleware that records token metrics for
@@ -23,9 +23,9 @@ func CreateMetricsMiddleware(mm *metricsMonitor, cfg config.Config) chain.Middle
// Resolve the model now so downstream dispatch hits the context
// fast path; FetchContext restores the request body.
data, err := router.FetchContext(r, cfg)
data, err := shared.FetchContext(r, cfg)
if err != nil {
router.SendError(w, r, router.ErrNoModelInContext)
shared.SendError(w, r, shared.ErrNoModelInContext)
return
}
+12 -18
View File
@@ -15,6 +15,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/perf"
"github.com/mostlygeek/llama-swap/internal/router"
"github.com/mostlygeek/llama-swap/internal/shared"
)
// Server owns the HTTP mux, cross-cutting middleware, and the local/peer model
@@ -138,13 +139,13 @@ func New(cfg config.Config, muxlog *logmon.Monitor, proxylog *logmon.Monitor, up
}
// localPeerHandler dispatches a model-routed request to the local or peer
// router. The model is resolved once via router.FetchContext.
// router. The model is resolved once via shared.FetchContext.
func (s *Server) localPeerHandler(w http.ResponseWriter, r *http.Request) {
stripVersionPrefix(r)
data, err := router.FetchContext(r, s.cfg)
data, err := shared.FetchContext(r, s.cfg)
if err != nil {
router.SendError(w, r, router.ErrNoModelInContext)
shared.SendError(w, r, shared.ErrNoModelInContext)
return
}
@@ -156,7 +157,7 @@ func (s *Server) localPeerHandler(w http.ResponseWriter, r *http.Request) {
s.proxylog.Debugf("dispatch: using peer for model: %s", data.ModelID)
s.peer.ServeHTTP(w, r)
default:
router.SendError(w, r, router.ErrNoRouterFound)
shared.SendError(w, r, router.ErrNoRouterFound)
}
}
@@ -171,21 +172,14 @@ func stripVersionPrefix(r *http.Request) {
// routes builds the mux, registers every route, and wraps the mux with the
// global CORS middleware.
func (s *Server) routes() {
authMW := CreateAuthMiddleware(s.cfg)
filterMW := CreateFilterMiddleware(s.cfg)
formFilterMW := CreateFormFilterMiddleware(s.cfg)
// Model-dispatched routes get auth + per-model concurrency limiting + body
// filters + in-flight tracking + token metrics. concurrencyMW rejects with
// 429 before the body filters do any rewrite work. filterMW rewrites JSON
// bodies and formFilterMW rewrites multipart bodies; each is a no-op for the
// other's Content-Type. Both run before the metrics middleware so it buffers
// the rewritten body.
authMW := CreateAuthMiddleware(s.cfg)
modelChain := chain.New(
authMW,
CreateRequestContextMiddleware(s.cfg),
CreateConcurrencyMiddleware(s.cfg),
filterMW,
formFilterMW,
CreateFilterMiddleware(s.cfg),
CreateFormFilterMiddleware(s.cfg),
CreateInflightMiddleware(s.inflight),
CreateMetricsMiddleware(s.metrics, s.cfg),
)
@@ -216,11 +210,11 @@ func (s *Server) routes() {
mux.HandleFunc("GET /{$}", handleRootRedirect)
// Embedded UI.
mux.HandleFunc("GET /ui/", s.handleUI)
mux.Handle("GET /ui/", chain.New(authMW).ThenFunc(s.handleUI))
mux.HandleFunc("GET /favicon.ico", s.handleFavicon)
// Prometheus metrics (no auth, matches the legacy endpoint).
mux.HandleFunc("GET /metrics", s.handleMetrics)
// Prometheus metrics (wrapped by apiChain, matches the legacy endpoint).
mux.Handle("GET /metrics", apiChain.ThenFunc(s.handleMetrics))
// Operations endpoints.
mux.Handle("GET /unload", apiChain.ThenFunc(s.handleUnload))