Add count_tokens api proxying (#476)

This commit is contained in:
Ryan Voots
2026-01-20 12:34:42 -05:00
committed by GitHub
parent 205efd40a1
commit 7493618fdc
2 changed files with 3 additions and 0 deletions
+1
View File
@@ -27,6 +27,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
- `v1/images/edits`
- ✅ Anthropic API supported endpoints:
- `v1/messages`
- `v1/messages/count_tokens`
- ✅ llama-server (llama.cpp) supported endpoints
- `v1/rerank`, `v1/reranking`, `/rerank`
- `/infill` - for code infilling
+2
View File
@@ -282,6 +282,8 @@ func (pm *ProxyManager) setupGinEngine() {
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// Support anthropic count_tokens API (Also added in the above PR)
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.proxyInferenceHandler)
// Support embeddings and reranking
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler)