From 7493618fdca22cec2d28bc1b33e63cf37785ad6b Mon Sep 17 00:00:00 2001 From: Ryan Voots Date: Tue, 20 Jan 2026 12:34:42 -0500 Subject: [PATCH] Add count_tokens api proxying (#476) --- README.md | 1 + proxy/proxymanager.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 22add2bd..c2696235 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and - `v1/images/edits` - ✅ Anthropic API supported endpoints: - `v1/messages` + - `v1/messages/count_tokens` - ✅ llama-server (llama.cpp) supported endpoints - `v1/rerank`, `v1/reranking`, `/rerank` - `/infill` - for code infilling diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 9115d810..5a016bc5 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -282,6 +282,8 @@ func (pm *ProxyManager) setupGinEngine() { pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler) // Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570) pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler) + // Support anthropic count_tokens API (Also added in the above PR) + pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.proxyInferenceHandler) // Support embeddings and reranking pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler)