internal/router,server,shared: refactor auth, libs (#839)

- refactor shared http functionality into internal/shared/http.go
- remove stripping of Authorization and x-api-key
- add Request Context middleware to internal/server
- add /ui and /metrics behind auth middleware, fixes #717

Fix #717
Updates: #834
This commit is contained in:
Benson Wong
2026-06-13 10:19:04 -07:00
committed by GitHub
parent 8c660dcb90
commit 62aea0e83d
18 changed files with 497 additions and 377 deletions
+7 -6
View File
@@ -12,6 +12,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/process"
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
"github.com/mostlygeek/llama-swap/internal/shared"
)
type shutdownReq struct {
@@ -399,13 +400,13 @@ func (b *baseRouter) Shutdown(timeout time.Duration) error {
func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if b.shuttingDown.Load() {
SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
return
}
data, err := FetchContext(req, b.config)
data, err := shared.FetchContext(req, b.config)
if err != nil {
SendError(w, req, err)
shared.SendError(w, req, err)
return
}
@@ -424,7 +425,7 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
case <-req.Context().Done():
return
case <-b.shutdownCtx.Done():
SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
return
}
@@ -475,12 +476,12 @@ func (b *baseRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) {
return
case <-b.shutdownCtx.Done():
finishLoading()
SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
shared.SendError(w, req, fmt.Errorf("%s is shutting down", b.name))
return
}
if resp.Err != nil {
SendError(w, req, resp.Err)
shared.SendError(w, req, resp.Err)
return
}
resp.HandleFunc(w, req)
-63
View File
@@ -226,69 +226,6 @@ func TestIsLoadingPath(t *testing.T) {
}
}
func TestExtractContext_Streaming_GET(t *testing.T) {
tests := []struct {
name string
query string
wantStreaming bool
}{
{"streaming true", "model=llama3&stream=true", true},
{"streaming false", "model=llama3&stream=false", false},
{"no stream param", "model=llama3", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
got, err := ExtractContext(r)
if err != nil {
t.Fatalf("ExtractContext: %v", err)
}
if got.Streaming != tt.wantStreaming {
t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
}
})
}
}
func TestExtractContext_Streaming_JSON(t *testing.T) {
tests := []struct {
name string
body string
wantStreaming bool
}{
{"streaming true", `{"model":"llama3","stream":true}`, true},
{"streaming false", `{"model":"llama3","stream":false}`, false},
{"no stream param", `{"model":"llama3"}`, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
r.Header.Set("Content-Type", "application/json")
got, err := ExtractContext(r)
if err != nil {
t.Fatalf("ExtractContext: %v", err)
}
if got.Streaming != tt.wantStreaming {
t.Errorf("Streaming: want %v, got %v", tt.wantStreaming, got.Streaming)
}
})
}
}
func TestExtractContext_Streaming_URLEncodedForm(t *testing.T) {
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader("model=whisper-1&stream=true"))
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
got, err := ExtractContext(r)
if err != nil {
t.Fatalf("ExtractContext: %v", err)
}
if !got.Streaming {
t.Error("Streaming should be true")
}
}
func countSSEMessages(s string) int {
scanner := bufio.NewScanner(strings.NewReader(s))
count := 0
+5 -4
View File
@@ -15,6 +15,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/shared"
)
type peerMember struct {
@@ -146,22 +147,22 @@ func (r *Peer) Shutdown(timeout time.Duration) error {
func (r *Peer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if r.shuttingDown.Load() {
SendError(w, req, fmt.Errorf("peer proxy is shutting down"))
shared.SendError(w, req, fmt.Errorf("peer proxy is shutting down"))
return
}
r.inflight.Add(1)
defer r.inflight.Done()
data, err := FetchContext(req, r.cfg)
data, err := shared.FetchContext(req, r.cfg)
if err != nil {
SendError(w, req, err)
shared.SendError(w, req, err)
return
}
pp, found := r.peers[data.ModelID]
if !found {
r.logger.Warnf("peer model not found: %s", data.ModelID)
SendError(w, req, ErrNoPeerModelFound)
shared.SendError(w, req, ErrNoPeerModelFound)
return
}
+11 -10
View File
@@ -12,6 +12,7 @@ import (
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/shared"
)
var testLogger = logmon.NewWriter(os.Stdout)
@@ -142,7 +143,7 @@ func TestPeer_ServeHTTP_Success(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
@@ -178,7 +179,7 @@ func TestPeer_ServeHTTP_PeerModelNotFound(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "nonexistent-model", ModelID: "nonexistent-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "nonexistent-model", ModelID: "nonexistent-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
@@ -212,7 +213,7 @@ func TestPeer_ServeHTTP_ApiKeyInjection(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
@@ -246,7 +247,7 @@ func TestPeer_ServeHTTP_NoApiKey(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
@@ -279,7 +280,7 @@ func TestPeer_ServeHTTP_HostHeaderSet(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
@@ -311,7 +312,7 @@ func TestPeer_ServeHTTP_SSEHeaderModification(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
@@ -347,7 +348,7 @@ func TestPeer_ServeHTTP_ShutdownRejectsNewRequests(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
@@ -385,7 +386,7 @@ func TestPeer_ServeHTTP_WaitsForInflightDuringShutdown(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
var wg sync.WaitGroup
wg.Add(1)
@@ -448,7 +449,7 @@ func TestPeer_ServeHTTP_ShutdownTimeoutCancelsInflight(t *testing.T) {
}
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "test-model", ModelID: "test-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "test-model", ModelID: "test-model"}))
var wg sync.WaitGroup
wg.Add(1)
@@ -551,7 +552,7 @@ func TestPeer_ServeHTTP_ContextOverridesBodyModel(t *testing.T) {
body := strings.NewReader(`{"model":"body-model","prompt":"hello"}`)
req := httptest.NewRequest("POST", "/v1/chat/completions", body)
req.Header.Set("Content-Type", "application/json")
*req = *req.WithContext(SetContext(req.Context(), ReqContextData{Model: "context-model", ModelID: "context-model"}))
*req = *req.WithContext(shared.SetContext(req.Context(), shared.ReqContextData{Model: "context-model", ModelID: "context-model"}))
w := httptest.NewRecorder()
pr.ServeHTTP(w, req)
+4 -152
View File
@@ -1,40 +1,18 @@
package router
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/process"
"github.com/mostlygeek/llama-swap/internal/router/scheduler"
"github.com/tidwall/gjson"
"github.com/mostlygeek/llama-swap/internal/shared"
)
type contextkey struct {
name string
}
type ReqContextData struct {
Model string
ModelID string
Streaming bool
SendLoadingState bool
}
var (
ErrNoModelInContext = fmt.Errorf("no model in request context")
ErrNoRouterFound = fmt.Errorf("no router found for model")
ErrNoPeerModelFound = fmt.Errorf("peer model not found")
ErrNoLocalModelFound = scheduler.ErrModelNotFound
ContextKey = &contextkey{"context"}
ErrNoRouterFound = shared.ErrNoRouterFound
ErrNoPeerModelFound = shared.ErrNoPeerModelFound
ErrNoLocalModelFound = shared.ErrNoLocalModelFound
)
type Router interface {
@@ -72,129 +50,3 @@ type LocalRouter interface {
// model is not known to this router.
ProcessLogger(modelID string) (*logmon.Monitor, bool)
}
// FetchContext will attempt to get the model id from the context then
// from the model body. If it extracts the model from the body it will
// store the model in the context for downstream handlers. An error
// will be returned when model can not be fetch from either location.
func FetchContext(r *http.Request, cfg config.Config) (ReqContextData, error) {
data, ok := ReadContext(r.Context())
if ok {
return data, nil
}
if data, err := ExtractContext(r); err == nil {
realName, _ := cfg.RealModelName(data.Model)
if realName == "" {
realName = data.Model
}
data.ModelID = realName
if mc, ok := cfg.Models[realName]; ok {
data.SendLoadingState = mc.SendLoadingState != nil && *mc.SendLoadingState
}
*r = *r.WithContext(SetContext(r.Context(), data))
return data, nil
}
return ReqContextData{}, ErrNoModelInContext
}
func SetContext(ctx context.Context, data ReqContextData) context.Context {
return context.WithValue(ctx, ContextKey, data)
}
func ReadContext(ctx context.Context) (ReqContextData, bool) {
data, ok := ctx.Value(ContextKey).(ReqContextData)
return data, ok
}
// ExtractContext pulls the model name from an HTTP request without consuming the
// body. For GET requests it reads the "model" query parameter. For POST
// requests it inspects Content-Type and parses JSON, multipart/form-data, or
// application/x-www-form-urlencoded bodies. The request body is always restored
// before returning so downstream handlers — including reverse proxies that
// forward raw bytes upstream — can still read it.
func ExtractContext(r *http.Request) (ReqContextData, error) {
if r.Method == http.MethodGet {
if model := r.URL.Query().Get("model"); model != "" {
return ReqContextData{Model: model, Streaming: r.URL.Query().Get("stream") == "true"}, nil
}
return ReqContextData{}, fmt.Errorf("missing 'model' query parameter")
}
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
return ReqContextData{}, fmt.Errorf("error reading request body: %w", err)
}
defer func() {
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
}()
contentType := r.Header.Get("Content-Type")
if strings.Contains(contentType, "application/json") {
model := gjson.GetBytes(bodyBytes, "model").String()
if model == "" {
return ReqContextData{}, fmt.Errorf("missing or empty 'model' in JSON body")
}
return ReqContextData{Model: model, Streaming: gjson.GetBytes(bodyBytes, "stream").Bool()}, nil
}
// Form parsers read from r.Body, so feed them a fresh reader over the
// buffered bytes. The deferred restore above will reset r.Body again
// after parsing.
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
if strings.Contains(contentType, "multipart/form-data") {
if err := r.ParseMultipartForm(32 << 20); err != nil {
return ReqContextData{}, fmt.Errorf("error parsing multipart form: %w", err)
}
} else {
if err := r.ParseForm(); err != nil {
return ReqContextData{}, fmt.Errorf("error parsing form: %w", err)
}
}
if model := r.FormValue("model"); model != "" {
return ReqContextData{Model: model, Streaming: r.FormValue("stream") == "true"}, nil
}
return ReqContextData{}, fmt.Errorf("missing 'model' parameter")
}
func SendError(w http.ResponseWriter, r *http.Request, err error) {
switch {
case errors.Is(err, ErrNoModelInContext):
SendResponse(w, r, http.StatusNotFound, "no model id could be identified")
case errors.Is(err, ErrNoPeerModelFound):
SendResponse(w, r, http.StatusNotFound, "no peer found for requested model")
case errors.Is(err, ErrNoLocalModelFound):
SendResponse(w, r, http.StatusNotFound, "no local server found for requested model")
case errors.Is(err, ErrNoRouterFound):
SendResponse(w, r, http.StatusNotFound, "no router for requested model")
default:
SendResponse(w, r, http.StatusInternalServerError, fmt.Sprintf("unspecific error: %v", err))
}
}
// SendResponse detects what content type the client prefers and returns an error response in that format.
func SendResponse(w http.ResponseWriter, r *http.Request, status int, message string) {
// Check Accept header for preferred response format
acceptHeader := r.Header.Get("Accept")
if strings.Contains(acceptHeader, "text/plain") {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(status)
w.Write([]byte(fmt.Sprintf("llama-swap: %s", message)))
return
}
if strings.Contains(acceptHeader, "text/html") {
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(status)
w.Write([]byte(fmt.Sprintf(`<html><body><h1>llama-swap</h1><p>%s</p></body></html>`, message)))
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
w.Write([]byte(fmt.Sprintf(`{"src":"llama-swap", "error": "%s"}`, message)))
}
-275
View File
@@ -1,275 +0,0 @@
package router
import (
"bytes"
"context"
"io"
"mime/multipart"
"net/http"
"net/url"
"strings"
"testing"
)
func TestExtractContext_GET(t *testing.T) {
tests := []struct {
name string
query string
wantModel string
wantErr bool
}{
{"model present", "model=llama3", "llama3", false},
{"model with slashes", "model=author/model-7b", "author/model-7b", false},
{"model missing", "", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r, _ := http.NewRequest(http.MethodGet, "/?"+tt.query, nil)
got, err := ExtractContext(r)
if (err != nil) != tt.wantErr {
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
}
if got.Model != tt.wantModel {
t.Errorf("want %q got %q", tt.wantModel, got.Model)
}
})
}
}
func TestExtractContext_JSON(t *testing.T) {
tests := []struct {
name string
body string
wantModel string
wantErr bool
}{
{"model present", `{"model":"llama3","stream":true}`, "llama3", false},
{"model with slashes", `{"model":"author/model-7b"}`, "author/model-7b", false},
{"model empty string", `{"model":""}`, "", true},
{"model key missing", `{"stream":true}`, "", true},
{"invalid json", `not-json`, "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(tt.body))
r.Header.Set("Content-Type", "application/json")
got, err := ExtractContext(r)
if (err != nil) != tt.wantErr {
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
}
if got.Model != tt.wantModel {
t.Errorf("want %q got %q", tt.wantModel, got.Model)
}
})
}
}
func TestExtractContext_URLEncodedForm(t *testing.T) {
tests := []struct {
name string
formModel string
wantModel string
wantErr bool
}{
{"model present", "whisper-1", "whisper-1", false},
{"model missing", "", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
form := url.Values{}
if tt.formModel != "" {
form.Set("model", tt.formModel)
}
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(form.Encode()))
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
got, err := ExtractContext(r)
if (err != nil) != tt.wantErr {
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
}
if got.Model != tt.wantModel {
t.Errorf("want %q got %q", tt.wantModel, got.Model)
}
})
}
}
func TestExtractContext_MultipartForm(t *testing.T) {
tests := []struct {
name string
formModel string
wantModel string
wantErr bool
}{
{"model present", "whisper-1", "whisper-1", false},
{"model missing", "", "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var buf bytes.Buffer
mw := multipart.NewWriter(&buf)
if tt.formModel != "" {
fw, _ := mw.CreateFormField("model")
fw.Write([]byte(tt.formModel))
}
mw.Close()
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", &buf)
r.Header.Set("Content-Type", mw.FormDataContentType())
got, err := ExtractContext(r)
if (err != nil) != tt.wantErr {
t.Fatalf("wantErr=%v got err=%v", tt.wantErr, err)
}
if got.Model != tt.wantModel {
t.Errorf("want %q got %q", tt.wantModel, got.Model)
}
})
}
}
func TestExtractContext_JSONBodyRestored(t *testing.T) {
body := `{"model":"llama3","stream":true}`
r, _ := http.NewRequest(http.MethodPost, "/v1/chat/completions", strings.NewReader(body))
r.Header.Set("Content-Type", "application/json")
if _, err := ExtractContext(r); err != nil {
t.Fatalf("ExtractContext: %v", err)
}
remaining, err := io.ReadAll(r.Body)
if err != nil {
t.Fatalf("reading body after ExtractContext: %v", err)
}
if string(remaining) != body {
t.Errorf("body not restored: want %q got %q", body, string(remaining))
}
}
func TestExtractContext_MultipartBodyRestored(t *testing.T) {
var buf bytes.Buffer
mw := multipart.NewWriter(&buf)
fw, _ := mw.CreateFormField("model")
fw.Write([]byte("whisper-1"))
ff, _ := mw.CreateFormFile("file", "audio.wav")
ff.Write([]byte("fake-audio-bytes"))
mw.Close()
original := buf.Bytes()
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", bytes.NewReader(original))
r.Header.Set("Content-Type", mw.FormDataContentType())
if _, err := ExtractContext(r); err != nil {
t.Fatalf("ExtractContext: %v", err)
}
remaining, err := io.ReadAll(r.Body)
if err != nil {
t.Fatalf("reading body after ExtractContext: %v", err)
}
if !bytes.Equal(remaining, original) {
t.Errorf("multipart body not restored: want %d bytes got %d bytes", len(original), len(remaining))
}
}
func TestExtractContext_URLEncodedBodyRestored(t *testing.T) {
body := "model=whisper-1&extra=value"
r, _ := http.NewRequest(http.MethodPost, "/v1/audio/transcriptions", strings.NewReader(body))
r.Header.Set("Content-Type", "application/x-www-form-urlencoded")
if _, err := ExtractContext(r); err != nil {
t.Fatalf("ExtractContext: %v", err)
}
remaining, err := io.ReadAll(r.Body)
if err != nil {
t.Fatalf("reading body after ExtractContext: %v", err)
}
if string(remaining) != body {
t.Errorf("url-encoded body not restored: want %q got %q", body, string(remaining))
}
}
func TestSetContext(t *testing.T) {
ctx := SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"})
data, ok := ctx.Value(ContextKey).(ReqContextData)
if !ok {
t.Fatalf("ContextKey not set or wrong type")
}
if data.Model != "llama3" {
t.Errorf("want %q got %q", "llama3", data.Model)
}
if data.ModelID != "llama3" {
t.Errorf("want %q got %q", "llama3", data.ModelID)
}
}
func TestSetContext_WithAlias(t *testing.T) {
ctx := SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"})
data, _ := ctx.Value(ContextKey).(ReqContextData)
if data.Model != "llama" {
t.Errorf("want requested %q got %q", "llama", data.Model)
}
if data.ModelID != "llama3" {
t.Errorf("want real %q got %q", "llama3", data.ModelID)
}
}
func TestSetContext_DoesNotMutateParent(t *testing.T) {
parent := context.Background()
_ = SetContext(parent, ReqContextData{Model: "llama3", ModelID: "llama3"})
if v := parent.Value(ContextKey); v != nil {
t.Errorf("parent context was mutated: %v", v)
}
}
func TestReadContext(t *testing.T) {
tests := []struct {
name string
ctx context.Context
wantReq string
wantReal string
wantBool bool
}{
{
name: "model present, same name",
ctx: SetContext(context.Background(), ReqContextData{Model: "llama3", ModelID: "llama3"}),
wantReq: "llama3",
wantReal: "llama3",
wantBool: true,
},
{
name: "model present, aliased",
ctx: SetContext(context.Background(), ReqContextData{Model: "llama", ModelID: "llama3"}),
wantReq: "llama",
wantReal: "llama3",
wantBool: true,
},
{
name: "model absent",
ctx: context.Background(),
wantReq: "",
wantReal: "",
wantBool: false,
},
{
name: "model is empty string",
ctx: SetContext(context.Background(), ReqContextData{Model: "", ModelID: ""}),
wantReq: "",
wantReal: "",
wantBool: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotData, ok := ReadContext(tt.ctx)
if gotData.Model != tt.wantReq || gotData.ModelID != tt.wantReal || ok != tt.wantBool {
t.Errorf("want (%q, %q, %v) got (%q, %q, %v)", tt.wantReq, tt.wantReal, tt.wantBool, gotData.Model, gotData.ModelID, ok)
}
})
}
}
+3 -3
View File
@@ -11,17 +11,17 @@ package scheduler
import (
"context"
"fmt"
"net/http"
"time"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/mostlygeek/llama-swap/internal/process"
"github.com/mostlygeek/llama-swap/internal/shared"
)
// ErrModelNotFound is granted to callers whose model is not handled by this
// router. The router package aliases it so SendError can match it.
var ErrModelNotFound = fmt.Errorf("local model not found")
// router. It is an alias for shared.ErrNoLocalModelFound.
var ErrModelNotFound = shared.ErrNoLocalModelFound
// Swapper is the eviction policy: it decides which running models must be
// stopped before a target can serve. It is orthogonal to the scheduling