62aea0e83d
- refactor shared http functionality into internal/shared/http.go - remove stripping of Authorization and x-api-key - add Request Context middleware to internal/server - add /ui and /metrics behind auth middleware, fixes #717 Fix #717 Updates: #834
76 lines
2.0 KiB
Go
76 lines
2.0 KiB
Go
package server
|
|
|
|
import (
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"sync"
|
|
"testing"
|
|
|
|
"github.com/mostlygeek/llama-swap/internal/config"
|
|
"github.com/mostlygeek/llama-swap/internal/shared"
|
|
)
|
|
|
|
func concurrencyTestReq(model string) *http.Request {
|
|
r := httptest.NewRequest("GET", "/v1/chat/completions", nil)
|
|
return r.WithContext(shared.SetContext(r.Context(), shared.ReqContextData{Model: model, ModelID: model}))
|
|
}
|
|
|
|
func TestServer_ConcurrencyMiddleware_RejectsOverLimit(t *testing.T) {
|
|
cfg := config.Config{
|
|
Models: map[string]config.ModelConfig{
|
|
"m1": {ConcurrencyLimit: 1},
|
|
},
|
|
}
|
|
|
|
entered := make(chan struct{})
|
|
release := make(chan struct{})
|
|
var once sync.Once
|
|
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
once.Do(func() { close(entered) })
|
|
<-release
|
|
w.WriteHeader(http.StatusOK)
|
|
})
|
|
h := CreateConcurrencyMiddleware(cfg)(final)
|
|
|
|
// First request occupies the only slot.
|
|
done := make(chan struct{})
|
|
go func() {
|
|
defer close(done)
|
|
h.ServeHTTP(httptest.NewRecorder(), concurrencyTestReq("m1"))
|
|
}()
|
|
<-entered
|
|
|
|
// Second concurrent request is rejected with 429.
|
|
w := httptest.NewRecorder()
|
|
h.ServeHTTP(w, concurrencyTestReq("m1"))
|
|
if w.Code != http.StatusTooManyRequests {
|
|
t.Fatalf("over-limit status = %d, want 429", w.Code)
|
|
}
|
|
|
|
// Once the slot frees, a new request succeeds.
|
|
close(release)
|
|
<-done
|
|
w = httptest.NewRecorder()
|
|
h.ServeHTTP(w, concurrencyTestReq("m1"))
|
|
if w.Code != http.StatusOK {
|
|
t.Fatalf("post-release status = %d, want 200", w.Code)
|
|
}
|
|
}
|
|
|
|
func TestServer_ConcurrencyMiddleware_UnconfiguredModelPassesThrough(t *testing.T) {
|
|
cfg := config.Config{Models: map[string]config.ModelConfig{}}
|
|
|
|
called := 0
|
|
final := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
called++
|
|
w.WriteHeader(http.StatusOK)
|
|
})
|
|
h := CreateConcurrencyMiddleware(cfg)(final)
|
|
|
|
w := httptest.NewRecorder()
|
|
h.ServeHTTP(w, concurrencyTestReq("peer-model"))
|
|
if w.Code != http.StatusOK || called != 1 {
|
|
t.Fatalf("unconfigured model: status=%d called=%d, want 200/1", w.Code, called)
|
|
}
|
|
}
|