proxy: add API key support (#436)

Add configuration support for api keys that are enforced by llama-swap. Keys are stripped before sending them to upstream servers. 

Updates: #433, #50 and #251
This commit is contained in:
Benson Wong
2025-12-23 23:39:33 -08:00
committed by GitHub
parent 565c44766d
commit 53b32f3601
8 changed files with 258 additions and 20 deletions
+73 -19
View File
@@ -256,37 +256,38 @@ func (pm *ProxyManager) setupGinEngine() {
})
// Set up routes using the Gin engine
pm.ginEngine.POST("/v1/chat/completions", pm.proxyInferenceHandler)
// Protected routes use pm.apiKeyAuth() middleware
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// Support legacy /v1/completions api, see issue #12
pm.ginEngine.POST("/v1/completions", pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
pm.ginEngine.POST("/v1/messages", pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// Support embeddings and reranking
pm.ginEngine.POST("/v1/embeddings", pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// llama-server's /reranking endpoint + aliases
pm.ginEngine.POST("/reranking", pm.proxyInferenceHandler)
pm.ginEngine.POST("/rerank", pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/rerank", pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/reranking", pm.proxyInferenceHandler)
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// llama-server's /infill endpoint for code infilling
pm.ginEngine.POST("/infill", pm.proxyInferenceHandler)
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// llama-server's /completion endpoint
pm.ginEngine.POST("/completion", pm.proxyInferenceHandler)
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// Support audio/speech endpoint
pm.ginEngine.POST("/v1/audio/speech", pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler)
pm.ginEngine.GET("/v1/models", pm.listModelsHandler)
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
// in proxymanager_loghandlers.go
pm.ginEngine.GET("/logs", pm.sendLogsHandlers)
pm.ginEngine.GET("/logs/stream", pm.streamLogsHandler)
pm.ginEngine.GET("/logs/stream/*logMonitorID", pm.streamLogsHandler)
pm.ginEngine.GET("/logs", pm.apiKeyAuth(), pm.sendLogsHandlers)
pm.ginEngine.GET("/logs/stream", pm.apiKeyAuth(), pm.streamLogsHandler)
pm.ginEngine.GET("/logs/stream/*logMonitorID", pm.apiKeyAuth(), pm.streamLogsHandler)
/**
* User Interface Endpoints
@@ -298,9 +299,9 @@ func (pm *ProxyManager) setupGinEngine() {
pm.ginEngine.GET("/upstream", func(c *gin.Context) {
c.Redirect(http.StatusFound, "/ui/models")
})
pm.ginEngine.Any("/upstream/*upstreamPath", pm.proxyToUpstream)
pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler)
pm.ginEngine.GET("/running", pm.listRunningProcessesHandler)
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.proxyToUpstream)
pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler)
pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler)
pm.ginEngine.GET("/health", func(c *gin.Context) {
c.String(http.StatusOK, "OK")
})
@@ -765,6 +766,59 @@ func (pm *ProxyManager) sendErrorResponse(c *gin.Context, statusCode int, messag
}
}
// apiKeyAuth returns a middleware that validates API keys if configured.
// Returns a pass-through handler if no API keys are configured.
func (pm *ProxyManager) apiKeyAuth() gin.HandlerFunc {
if len(pm.config.RequiredAPIKeys) == 0 {
return func(c *gin.Context) { c.Next() }
}
return func(c *gin.Context) {
xApiKey := c.GetHeader("x-api-key")
var bearerKey string
if auth := c.GetHeader("Authorization"); auth != "" {
if strings.HasPrefix(auth, "Bearer ") {
bearerKey = strings.TrimPrefix(auth, "Bearer ")
}
}
// If both headers present, they must match
if xApiKey != "" && bearerKey != "" && xApiKey != bearerKey {
pm.sendErrorResponse(c, http.StatusBadRequest, "x-api-key and Authorization header values do not match")
c.Abort()
return
}
// Use x-api-key first, then Authorization
providedKey := xApiKey
if providedKey == "" {
providedKey = bearerKey
}
// Validate key
valid := false
for _, key := range pm.config.RequiredAPIKeys {
if providedKey == key {
valid = true
break
}
}
if !valid {
pm.sendErrorResponse(c, http.StatusUnauthorized, "unauthorized: invalid or missing API key")
c.Abort()
return
}
// Strip auth headers to prevent leakage to upstream
c.Request.Header.Del("Authorization")
c.Request.Header.Del("x-api-key")
c.Next()
}
}
func (pm *ProxyManager) unloadAllModelsHandler(c *gin.Context) {
pm.StopProcesses(StopImmediately)
c.String(http.StatusOK, "OK")