proxy: add support for anthropic v1/messages api (#417)
* proxy: add support for anthropic v1/messages api * proxy: restrict loading message to /v1/chat/completions
This commit is contained in:
+4
-1
@@ -507,7 +507,10 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
|
|||||||
// add a sync so the streaming client only runs when the goroutine has exited
|
// add a sync so the streaming client only runs when the goroutine has exited
|
||||||
|
|
||||||
isStreaming, _ := r.Context().Value(proxyCtxKey("streaming")).(bool)
|
isStreaming, _ := r.Context().Value(proxyCtxKey("streaming")).(bool)
|
||||||
if p.config.SendLoadingState != nil && *p.config.SendLoadingState && isStreaming {
|
|
||||||
|
// PR #417 (no support for anthropic v1/messages yet)
|
||||||
|
isChatCompletions := strings.HasPrefix(r.URL.Path, "/v1/chat/completions")
|
||||||
|
if p.config.SendLoadingState != nil && *p.config.SendLoadingState && isStreaming && isChatCompletions {
|
||||||
srw = newStatusResponseWriter(p, w)
|
srw = newStatusResponseWriter(p, w)
|
||||||
go srw.statusUpdates(swapCtx)
|
go srw.statusUpdates(swapCtx)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
+13
-11
@@ -236,27 +236,29 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
// Set up routes using the Gin engine
|
// Set up routes using the Gin engine
|
||||||
pm.ginEngine.POST("/v1/chat/completions", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/chat/completions", pm.proxyInferenceHandler)
|
||||||
// Support legacy /v1/completions api, see issue #12
|
// Support legacy /v1/completions api, see issue #12
|
||||||
pm.ginEngine.POST("/v1/completions", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/completions", pm.proxyInferenceHandler)
|
||||||
|
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
||||||
|
pm.ginEngine.POST("/v1/messages", pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// Support embeddings and reranking
|
// Support embeddings and reranking
|
||||||
pm.ginEngine.POST("/v1/embeddings", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/embeddings", pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /reranking endpoint + aliases
|
// llama-server's /reranking endpoint + aliases
|
||||||
pm.ginEngine.POST("/reranking", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/reranking", pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/rerank", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/rerank", pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/rerank", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/rerank", pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/reranking", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/reranking", pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /infill endpoint for code infilling
|
// llama-server's /infill endpoint for code infilling
|
||||||
pm.ginEngine.POST("/infill", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/infill", pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /completion endpoint
|
// llama-server's /completion endpoint
|
||||||
pm.ginEngine.POST("/completion", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/completion", pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// Support audio/speech endpoint
|
// Support audio/speech endpoint
|
||||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/audio/speech", pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
|
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
|
||||||
|
|
||||||
pm.ginEngine.GET("/v1/models", pm.listModelsHandler)
|
pm.ginEngine.GET("/v1/models", pm.listModelsHandler)
|
||||||
@@ -545,7 +547,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
|
func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
||||||
bodyBytes, err := io.ReadAll(c.Request.Body)
|
bodyBytes, err := io.ReadAll(c.Request.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
|
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
|
||||||
|
|||||||
Reference in New Issue
Block a user