Update github actions for notifying homebrew build (#212 )

Combine homebrew-llama-swap event with the release action
add /health (#211 )
2025-07-30 11:29:03 -07:00 · 2025-07-30 10:37:10 -07:00 · 2025-07-30 10:13:49 -07:00 · 2025-07-30 10:12:21 -07:00 · 2025-07-27 10:36:06 -07:00 · 2025-07-24 08:32:47 -07:00
6 changed files with 93 additions and 14 deletions
@@ -7,6 +7,10 @@ on:

  # Allows manual triggering of the workflow
  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Tag version to release (e.g. v144)'
+        required: true

 permissions:
  contents: write
@@ -20,15 +24,15 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+          ref: ${{ github.event.inputs.tag || github.ref }}
      -
        name: Set up Go
        uses: actions/setup-go@v5
-
      -
        name: Set up Node.js
        uses: actions/setup-node@v4
        with:
-          node-version: '23'  # or your preferred version
+          node-version: '23'
      -
        name: Install dependencies and build UI
        run: |
@@ -46,4 +50,30 @@ jobs:
          version: '~> v2'
          args: release --clean
        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  trigger-tap-update:
+    runs-on: ubuntu-latest
+    needs: goreleaser
+    steps:
+      - name: "Resolve tag to dispatch"
+        id: tag
+        run: |
+          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
+            echo "tag=${{ github.event.inputs.tag }}" >> "$GITHUB_OUTPUT"
+          else
+            echo "tag=${{ github.ref_name }}" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: "Trigger tap repository update"
+        uses: peter-evans/repository-dispatch@v2
+        with:
+          token: ${{ secrets.TAP_REPO_PAT }}
+          repository: mostlygeek/homebrew-llama-swap
+          event-type: new-release
+          client-payload: |
+            {
+              "release": {
+                "tag_name": "${{ steps.tag.outputs.tag }}"
+              }
+            }
@@ -18,7 +18,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
  - `v1/completions`
  - `v1/chat/completions`
  - `v1/embeddings`
-  - `v1/rerank`
+  - `v1/rerank`, `v1/reranking`, `rerank`
  - `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
  - `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
 - ✅ llama-swap custom API endpoints
@@ -122,6 +122,20 @@ $ docker run -it --rm --runtime nvidia -p 9292:8080 \

 </details>

+## Homebrew Install (macOS/Linux)
+
+For macOS & Linux users, `llama-swap` can be installed via [Homebrew](https://brew.sh):
+
+```shell
+# Set up tap and install formula 
+brew tap mostlygeek/llama-swap
+brew install llama-swap
+# Run llama-swap
+llama-swap --config path/to/config.yaml --listen localhost:8080
+```
+
+This will install the `llama-swap` binary and make it available in your path. See the [configuration documentation](https://github.com/mostlygeek/llama-swap/wiki/Configuration)
+
 ## Bare metal Install ([download](https://github.com/mostlygeek/llama-swap/releases))

 Pre-built binaries are available for Linux, Mac, Windows and FreeBSD. These are automatically published and are likely a few hours ahead of the docker releases. The baremetal install works with any OpenAI compatible server, not just llama-server.
@@ -17,6 +17,7 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
 		bodyBytes, err := io.ReadAll(c.Request.Body)
 		if err != nil {
 			pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
+			c.Abort()
 			return
 		}
 		c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
@@ -24,15 +25,16 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
 		requestedModel := gjson.GetBytes(bodyBytes, "model").String()
 		if requestedModel == "" {
 			pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
+			c.Abort()
 			return
 		}

 		realModelName, found := pm.config.RealModelName(requestedModel)
 		if !found {
 			pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
+			c.Abort()
 			return
 		}
-		c.Set("ls-real-model-name", realModelName)

 		writer := &MetricsResponseWriter{
 			ResponseWriter: c.Writer,
@@ -14,6 +14,7 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
+	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )

@@ -160,8 +161,10 @@ func (pm *ProxyManager) setupGinEngine() {
 	pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler)

 	// Support embeddings
-	pm.ginEngine.POST("/v1/embeddings", pm.proxyOAIHandler)
-	pm.ginEngine.POST("/v1/rerank", pm.proxyOAIHandler)
+	pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler)
+	pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler)
+	pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler)
+	pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler)

 	// Support audio/speech endpoint
 	pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
@@ -188,6 +191,9 @@ func (pm *ProxyManager) setupGinEngine() {

 	pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler)
 	pm.ginEngine.GET("/running", pm.listRunningProcessesHandler)
+	pm.ginEngine.GET("/health", func(c *gin.Context) {
+		c.String(http.StatusOK, "OK")
+	})

 	pm.ginEngine.GET("/favicon.ico", func(c *gin.Context) {
 		if data, err := reactStaticFS.ReadFile("ui_dist/favicon.ico"); err == nil {
@@ -365,9 +371,15 @@ func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) {
 		return
 	}

-	realModelName := c.GetString("ls-real-model-name") // Should be set in MetricsMiddleware
-	if realModelName == "" {
-		pm.sendErrorResponse(c, http.StatusInternalServerError, "ls-real-model-name not set")
+	requestedModel := gjson.GetBytes(bodyBytes, "model").String()
+	if requestedModel == "" {
+		pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
+		return
+	}
+
+	realModelName, found := pm.config.RealModelName(requestedModel)
+	if !found {
+		pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
 		return
 	}

@@ -755,3 +755,21 @@ func TestProxyManager_MiddlewareWritesMetrics_Streaming(t *testing.T) {
 	assert.Greater(t, lastMetric.TokensPerSecond, 0.0, "tokens per second should be greater than 0")
 	assert.Greater(t, lastMetric.DurationMs, 0, "duration should be greater than 0")
 }
+
+func TestProxyManager_HealthEndpoint(t *testing.T) {
+	config := AddDefaultGroupToConfig(Config{
+		HealthCheckTimeout: 15,
+		Models: map[string]ModelConfig{
+			"model1": getTestSimpleResponderConfig("model1"),
+		},
+		LogLevel: "error",
+	})
+
+	proxy := New(config)
+	defer proxy.StopProcesses(StopWaitForInflightRequest)
+	req := httptest.NewRequest("GET", "/health", nil)
+	rec := httptest.NewRecorder()
+	proxy.ServeHTTP(rec, req)
+	assert.Equal(t, http.StatusOK, rec.Code)
+	assert.Equal(t, "OK", rec.Body.String())
+}
@@ -27,10 +27,13 @@ export default function ModelsPage() {
  }, []);

  const [totalRequests, totalTokens, avgTokensPerSecond] = useMemo(() => {
-    const totalTokens = metrics.reduce((sum, m) => sum + m.input_tokens + m.output_tokens, 0);
-    const totalSeconds = metrics.reduce((sum, m) => sum + m.duration_ms / 1000, 0);
-    const avgTokensPerSecond = totalSeconds > 0 ? totalTokens / totalSeconds : 0;
-    return [metrics.length, totalTokens, avgTokensPerSecond.toFixed(2)];
+    const totalRequests = metrics.length;
+    if (totalRequests === 0) {
+      return [0, 0, 0];
+    }
+    const totalTokens = metrics.reduce((sum, m) => sum + m.output_tokens, 0);
+    const avgTokensPerSecond = (metrics.reduce((sum, m) => sum + m.tokens_per_second, 0) / totalRequests).toFixed(2);
+    return [totalRequests, totalTokens, avgTokensPerSecond];
  }, [metrics]);

  return (
Author	SHA1	Message	Date
Benson Wong	5672cb03fd	Update github actions for notifying homebrew build (#212 ) Combine homebrew-llama-swap event with the release action	2025-07-30 11:29:03 -07:00
Benson Wong	0f583163f7	add /health (#211 )	2025-07-30 10:37:10 -07:00
Benson Wong	7905fa9ea3	Update trigger-homebrew-update.yml [skip ci]	2025-07-30 10:13:49 -07:00
Ian Sebastian Mathew	bbaf172956	add trigger to rebuild homebrew formula (#210 )	2025-07-30 10:12:21 -07:00
Benson Wong	fd50932dbc	Decouple MetricsMiddleware from downstream handlers (#206 ) * Decouple MetricsMiddleware from downstream handlers Remove ls-real-model-name optimization. Within proxyOAIHandler the request body's bytes are required for various rewriting features anyways. This negated any benefits from trying not to parse it twice.	2025-07-27 10:36:06 -07:00
Gaël James	8c693e7fcf	Add endpoint aliases for reranking models (#201 ) * Add endpoint aliases for reranking models * Add MetricsMiddleware to the previous reranking endpoint * Fix the embeddings endpoint not having model set	2025-07-24 08:32:47 -07:00
Benson Wong	8f2af26a41	fix stats on model page	2025-07-23 13:57:33 -07:00