create tag to release

Change versioning to use git commits counts instead of semver
- less work for me - more frequent releases
2024-12-14 10:07:20 -08:00 · 2024-12-14 09:53:13 -08:00 · 2024-12-09 19:14:49 -08:00 · 2024-12-09 19:08:03 -08:00
6 changed files with 64 additions and 20 deletions
@@ -2,8 +2,8 @@ name: goreleaser

 on:
  push:
-    tags:
-      - '*'
+    branches:
+      - main

 permissions:
  contents: write
@@ -20,14 +20,23 @@ jobs:
      -
        name: Set up Go
        uses: actions/setup-go@v5
+
+      - name: Get commit count
+        id: get_commit_count
+        run: echo "COMMIT_COUNT=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
+
+      - name: Create release tag
+        run: |
+          git config user.name github-actions
+          git config user.email github-actions@github.com
+          git tag -a v${{ steps.get_commit_count.outputs.COMMIT_COUNT }} -m "Release v${{ steps.get_commit_count.outputs.COMMIT_COUNT }}"
+          git push origin v${{ steps.get_commit_count.outputs.COMMIT_COUNT }}
      -
        name: Run GoReleaser
        uses: goreleaser/goreleaser-action@v6
        with:
-          # either 'goreleaser' (default) or 'goreleaser-pro'
          distribution: goreleaser
-          # 'latest', 'nightly', or a semver
-          version: '~> v2'
-          args: release --clean
+          version: latest
+          args: release --clean --snapshot
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -2,6 +2,16 @@
 APP_NAME = llama-swap
 BUILD_DIR = build

+# Get the current Git hash
+GIT_HASH := $(shell git rev-parse --short HEAD)
+ifneq ($(shell git status --porcelain),)
+    # There are untracked changes
+    GIT_HASH := $(GIT_HASH)+
+endif
+
+# Get the build number from the commit count on the main branch
+COMMIT_COUNT := $(shell git rev-list --count HEAD)
+
 # Default target: Builds binaries for both OSX and Linux
 all: mac linux simple-responder

@@ -18,12 +28,12 @@ test-all:
 # Build OSX binary
 mac:
 	@echo "Building Mac binary..."
-	GOOS=darwin GOARCH=arm64 go build -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64
+	GOOS=darwin GOARCH=arm64 go build -ldflags="-X main.GIT_HASH=${GIT_HASH} -X main.COMMIT_COUNT=${COMMIT_COUNT}" -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64

 # Build Linux binary
 linux:
 	@echo "Building Linux binary..."
-	GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
+	GOOS=linux GOARCH=amd64 go build -ldflags="-X main.GIT_HASH=${GIT_HASH} -X main.COMMIT_COUNT=${COMMIT_COUNT}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64

 # for testing proxy.Process
 simple-responder:
@@ -16,6 +16,7 @@ Features:
 - ✅ Run multiple models at once with `profiles`
 - ✅ Remote log monitoring at `/log`
 - ✅ Automatic unloading of models from GPUs after timeout
+- ✅ Use any local server that provides an OpenAI compatible API (llama.cpp, vllm, tabblyAPI, etc)

 ## Releases

@@ -9,13 +9,23 @@ import (
 	"github.com/mostlygeek/llama-swap/proxy"
 )

+// see Makefile which injects new values at build time
+var GIT_HASH string = "abcd1234"
+var COMMIT_COUNT string = "0-dev"
+
 func main() {
 	// Define a command-line flag for the port
 	configPath := flag.String("config", "config.yaml", "config file name")
 	listenStr := flag.String("listen", ":8080", "listen ip/port")
+	showVersion := flag.Bool("version", false, "show version of build")

 	flag.Parse() // Parse the command-line flags

+	if *showVersion {
+		fmt.Printf("version: v%s (%s)\n", COMMIT_COUNT, GIT_HASH)
+		os.Exit(0)
+	}
+
 	config, err := proxy.LoadConfig(*configPath)
 	if err != nil {
 		fmt.Printf("Error loading config: %v\n", err)
@@ -122,16 +122,15 @@ func (p *Process) start() error {
 		// start a goroutine to check every second if
 		// the process should be stopped
 		go func() {
-			ticker := time.NewTicker(time.Second)
-			defer ticker.Stop()
 			maxDuration := time.Duration(p.config.UnloadAfter) * time.Second

-			for {
-				<-ticker.C
+			for range time.Tick(time.Second) {
+				// wait for all inflight requests to complete and ticker
+				p.inFlightRequests.Wait()
+
 				if time.Since(p.lastRequestHandled) > maxDuration {
 					fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %d reached.\n", p.ID, p.config.UnloadAfter)
 					p.Stop()
-					return
 				}
 			}
 		}()
@@ -275,7 +274,11 @@ func (p *Process) checkHealthEndpoint(ctxFromStart context.Context) error {
 func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {

 	p.inFlightRequests.Add(1)
-	defer p.inFlightRequests.Done()
+
+	defer func() {
+		p.lastRequestHandled = time.Now()
+		p.inFlightRequests.Done()
+	}()

 	if p.CurrentState() != StateReady {
 		if err := p.start(); err != nil {
@@ -285,8 +288,6 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
 		}
 	}

-	p.lastRequestHandled = time.Now()
-
 	proxyTo := p.config.Proxy
 	client := &http.Client{}
 	req, err := http.NewRequest(r.Method, proxyTo+r.URL.String(), r.Body)
@@ -82,18 +82,31 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
 	process := NewProcess("ttl", 2, config, NewLogMonitorWriter(io.Discard))
 	defer process.Stop()

-	req := httptest.NewRequest("GET", "/test", nil)
+	// this should take 4 seconds
+	req1 := httptest.NewRequest("GET", "/slow-respond?echo=1234&delay=1000ms", nil)
+	req2 := httptest.NewRequest("GET", "/test", nil)
+
 	w := httptest.NewRecorder()

-	// Proxy the request (auto start)
-	process.ProxyRequest(w, req)
+	// Proxy the request (auto start) with a slow response that takes longer than config.UnloadAfter
+	process.ProxyRequest(w, req1)

+	t.Log("sending slow first request (4 seconds)")
+	assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
+	assert.Contains(t, w.Body.String(), "1234")
+	assert.Equal(t, StateReady, process.CurrentState())
+
+	// ensure the TTL timeout does not race slow requests (see issue #25)
+	t.Log("sending second request (1 second)")
+	time.Sleep(time.Second)
+	w = httptest.NewRecorder()
+	process.ProxyRequest(w, req2)
 	assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
 	assert.Contains(t, w.Body.String(), expectedMessage)
-
 	assert.Equal(t, StateReady, process.CurrentState())

 	// wait 5 seconds
+	t.Log("sleep 5 seconds and check if unloaded")
 	time.Sleep(5 * time.Second)
 	assert.Equal(t, StateStopped, process.CurrentState())
 }
Author	SHA1	Message	Date
Benson Wong	a955a4a5c0	create tag to release	2024-12-14 10:07:20 -08:00
Benson Wong	22d3f1a4f9	Change versioning to use git commits counts instead of semver - less work for me - more frequent releases	2024-12-14 09:53:13 -08:00
Benson Wong	e2443251ad	update readme	2024-12-09 19:14:49 -08:00
Benson Wong	5fbd53c616	delay TTL check until after all requests are complete (#25 ) - fixes #25 where requests that last longer than the TTL will cause the process to be unloaded before the next request. - new behavior, TTL waits until all requests are complete before checking timeout	2024-12-09 19:08:03 -08:00