Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5672cb03fd | |||
| 0f583163f7 | |||
| 7905fa9ea3 | |||
| bbaf172956 | |||
| fd50932dbc | |||
| 8c693e7fcf |
@@ -7,6 +7,10 @@ on:
|
|||||||
|
|
||||||
# Allows manual triggering of the workflow
|
# Allows manual triggering of the workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Tag version to release (e.g. v144)'
|
||||||
|
required: true
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
@@ -20,15 +24,15 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.inputs.tag || github.ref }}
|
||||||
-
|
-
|
||||||
name: Set up Go
|
name: Set up Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v5
|
||||||
|
|
||||||
-
|
-
|
||||||
name: Set up Node.js
|
name: Set up Node.js
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: '23' # or your preferred version
|
node-version: '23'
|
||||||
-
|
-
|
||||||
name: Install dependencies and build UI
|
name: Install dependencies and build UI
|
||||||
run: |
|
run: |
|
||||||
@@ -47,3 +51,29 @@ jobs:
|
|||||||
args: release --clean
|
args: release --clean
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
trigger-tap-update:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: goreleaser
|
||||||
|
steps:
|
||||||
|
- name: "Resolve tag to dispatch"
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||||
|
echo "tag=${{ github.event.inputs.tag }}" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "tag=${{ github.ref_name }}" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: "Trigger tap repository update"
|
||||||
|
uses: peter-evans/repository-dispatch@v2
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.TAP_REPO_PAT }}
|
||||||
|
repository: mostlygeek/homebrew-llama-swap
|
||||||
|
event-type: new-release
|
||||||
|
client-payload: |
|
||||||
|
{
|
||||||
|
"release": {
|
||||||
|
"tag_name": "${{ steps.tag.outputs.tag }}"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,7 +18,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
|||||||
- `v1/completions`
|
- `v1/completions`
|
||||||
- `v1/chat/completions`
|
- `v1/chat/completions`
|
||||||
- `v1/embeddings`
|
- `v1/embeddings`
|
||||||
- `v1/rerank`
|
- `v1/rerank`, `v1/reranking`, `rerank`
|
||||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||||
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
||||||
- ✅ llama-swap custom API endpoints
|
- ✅ llama-swap custom API endpoints
|
||||||
@@ -122,6 +122,20 @@ $ docker run -it --rm --runtime nvidia -p 9292:8080 \
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
## Homebrew Install (macOS/Linux)
|
||||||
|
|
||||||
|
For macOS & Linux users, `llama-swap` can be installed via [Homebrew](https://brew.sh):
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Set up tap and install formula
|
||||||
|
brew tap mostlygeek/llama-swap
|
||||||
|
brew install llama-swap
|
||||||
|
# Run llama-swap
|
||||||
|
llama-swap --config path/to/config.yaml --listen localhost:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
This will install the `llama-swap` binary and make it available in your path. See the [configuration documentation](https://github.com/mostlygeek/llama-swap/wiki/Configuration)
|
||||||
|
|
||||||
## Bare metal Install ([download](https://github.com/mostlygeek/llama-swap/releases))
|
## Bare metal Install ([download](https://github.com/mostlygeek/llama-swap/releases))
|
||||||
|
|
||||||
Pre-built binaries are available for Linux, Mac, Windows and FreeBSD. These are automatically published and are likely a few hours ahead of the docker releases. The baremetal install works with any OpenAI compatible server, not just llama-server.
|
Pre-built binaries are available for Linux, Mac, Windows and FreeBSD. These are automatically published and are likely a few hours ahead of the docker releases. The baremetal install works with any OpenAI compatible server, not just llama-server.
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
|||||||
bodyBytes, err := io.ReadAll(c.Request.Body)
|
bodyBytes, err := io.ReadAll(c.Request.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
|
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
|
||||||
|
c.Abort()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||||
@@ -24,15 +25,16 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
|
|||||||
requestedModel := gjson.GetBytes(bodyBytes, "model").String()
|
requestedModel := gjson.GetBytes(bodyBytes, "model").String()
|
||||||
if requestedModel == "" {
|
if requestedModel == "" {
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
|
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
|
||||||
|
c.Abort()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
realModelName, found := pm.config.RealModelName(requestedModel)
|
||||||
if !found {
|
if !found {
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
|
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
|
||||||
|
c.Abort()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
c.Set("ls-real-model-name", realModelName)
|
|
||||||
|
|
||||||
writer := &MetricsResponseWriter{
|
writer := &MetricsResponseWriter{
|
||||||
ResponseWriter: c.Writer,
|
ResponseWriter: c.Writer,
|
||||||
|
|||||||
+17
-5
@@ -14,6 +14,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -160,8 +161,10 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)
|
||||||
|
|
||||||
// Support embeddings
|
// Support embeddings
|
||||||
pm.ginEngine.POST("/v1/embeddings", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
|
||||||
pm.ginEngine.POST("/v1/rerank", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
|
||||||
|
pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
|
||||||
|
pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)
|
||||||
|
|
||||||
// Support audio/speech endpoint
|
// Support audio/speech endpoint
|
||||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
||||||
@@ -188,6 +191,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
|
|
||||||
pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler)
|
pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler)
|
||||||
pm.ginEngine.GET("/running", pm.listRunningProcessesHandler)
|
pm.ginEngine.GET("/running", pm.listRunningProcessesHandler)
|
||||||
|
pm.ginEngine.GET("/health", func(c *gin.Context) {
|
||||||
|
c.String(http.StatusOK, "OK")
|
||||||
|
})
|
||||||
|
|
||||||
pm.ginEngine.GET("/favicon.ico", func(c *gin.Context) {
|
pm.ginEngine.GET("/favicon.ico", func(c *gin.Context) {
|
||||||
if data, err := reactStaticFS.ReadFile("ui_dist/favicon.ico"); err == nil {
|
if data, err := reactStaticFS.ReadFile("ui_dist/favicon.ico"); err == nil {
|
||||||
@@ -365,9 +371,15 @@ func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
realModelName := c.GetString("ls-real-model-name") // Should be set in MetricsMiddleware
|
requestedModel := gjson.GetBytes(bodyBytes, "model").String()
|
||||||
if realModelName == "" {
|
if requestedModel == "" {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "ls-real-model-name not set")
|
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
realModelName, found := pm.config.RealModelName(requestedModel)
|
||||||
|
if !found {
|
||||||
|
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -755,3 +755,21 @@ func TestProxyManager_MiddlewareWritesMetrics_Streaming(t *testing.T) {
|
|||||||
assert.Greater(t, lastMetric.TokensPerSecond, 0.0, "tokens per second should be greater than 0")
|
assert.Greater(t, lastMetric.TokensPerSecond, 0.0, "tokens per second should be greater than 0")
|
||||||
assert.Greater(t, lastMetric.DurationMs, 0, "duration should be greater than 0")
|
assert.Greater(t, lastMetric.DurationMs, 0, "duration should be greater than 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
||||||
|
config := AddDefaultGroupToConfig(Config{
|
||||||
|
HealthCheckTimeout: 15,
|
||||||
|
Models: map[string]ModelConfig{
|
||||||
|
"model1": getTestSimpleResponderConfig("model1"),
|
||||||
|
},
|
||||||
|
LogLevel: "error",
|
||||||
|
})
|
||||||
|
|
||||||
|
proxy := New(config)
|
||||||
|
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||||
|
req := httptest.NewRequest("GET", "/health", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
proxy.ServeHTTP(rec, req)
|
||||||
|
assert.Equal(t, http.StatusOK, rec.Code)
|
||||||
|
assert.Equal(t, "OK", rec.Body.String())
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user