Compare commits

...

4 Commits

Author SHA1 Message Date
Benson Wong a955a4a5c0 create tag to release 2024-12-14 10:07:20 -08:00
Benson Wong 22d3f1a4f9 Change versioning to use git commits counts instead of semver
- less work for me
- more frequent releases
2024-12-14 09:53:13 -08:00
Benson Wong e2443251ad update readme 2024-12-09 19:14:49 -08:00
Benson Wong 5fbd53c616 delay TTL check until after all requests are complete (#25)
- fixes #25 where requests that last longer than the TTL will cause the
  process to be unloaded before the next request.
- new behavior, TTL waits until all requests are complete before
  checking timeout
2024-12-09 19:08:03 -08:00
6 changed files with 64 additions and 20 deletions
+15 -6
View File
@@ -2,8 +2,8 @@ name: goreleaser
on:
push:
tags:
- '*'
branches:
- main
permissions:
contents: write
@@ -20,14 +20,23 @@ jobs:
-
name: Set up Go
uses: actions/setup-go@v5
- name: Get commit count
id: get_commit_count
run: echo "COMMIT_COUNT=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
- name: Create release tag
run: |
git config user.name github-actions
git config user.email github-actions@github.com
git tag -a v${{ steps.get_commit_count.outputs.COMMIT_COUNT }} -m "Release v${{ steps.get_commit_count.outputs.COMMIT_COUNT }}"
git push origin v${{ steps.get_commit_count.outputs.COMMIT_COUNT }}
-
name: Run GoReleaser
uses: goreleaser/goreleaser-action@v6
with:
# either 'goreleaser' (default) or 'goreleaser-pro'
distribution: goreleaser
# 'latest', 'nightly', or a semver
version: '~> v2'
args: release --clean
version: latest
args: release --clean --snapshot
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+12 -2
View File
@@ -2,6 +2,16 @@
APP_NAME = llama-swap
BUILD_DIR = build
# Get the current Git hash
GIT_HASH := $(shell git rev-parse --short HEAD)
ifneq ($(shell git status --porcelain),)
# There are untracked changes
GIT_HASH := $(GIT_HASH)+
endif
# Get the build number from the commit count on the main branch
COMMIT_COUNT := $(shell git rev-list --count HEAD)
# Default target: Builds binaries for both OSX and Linux
all: mac linux simple-responder
@@ -18,12 +28,12 @@ test-all:
# Build OSX binary
mac:
@echo "Building Mac binary..."
GOOS=darwin GOARCH=arm64 go build -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64
GOOS=darwin GOARCH=arm64 go build -ldflags="-X main.GIT_HASH=${GIT_HASH} -X main.COMMIT_COUNT=${COMMIT_COUNT}" -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64
# Build Linux binary
linux:
@echo "Building Linux binary..."
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
GOOS=linux GOARCH=amd64 go build -ldflags="-X main.GIT_HASH=${GIT_HASH} -X main.COMMIT_COUNT=${COMMIT_COUNT}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
# for testing proxy.Process
simple-responder:
+1
View File
@@ -16,6 +16,7 @@ Features:
- ✅ Run multiple models at once with `profiles`
- ✅ Remote log monitoring at `/log`
- ✅ Automatic unloading of models from GPUs after timeout
- ✅ Use any local server that provides an OpenAI compatible API (llama.cpp, vllm, tabblyAPI, etc)
## Releases
+10
View File
@@ -9,13 +9,23 @@ import (
"github.com/mostlygeek/llama-swap/proxy"
)
// see Makefile which injects new values at build time
var GIT_HASH string = "abcd1234"
var COMMIT_COUNT string = "0-dev"
func main() {
// Define a command-line flag for the port
configPath := flag.String("config", "config.yaml", "config file name")
listenStr := flag.String("listen", ":8080", "listen ip/port")
showVersion := flag.Bool("version", false, "show version of build")
flag.Parse() // Parse the command-line flags
if *showVersion {
fmt.Printf("version: v%s (%s)\n", COMMIT_COUNT, GIT_HASH)
os.Exit(0)
}
config, err := proxy.LoadConfig(*configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
+9 -8
View File
@@ -122,16 +122,15 @@ func (p *Process) start() error {
// start a goroutine to check every second if
// the process should be stopped
go func() {
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
maxDuration := time.Duration(p.config.UnloadAfter) * time.Second
for {
<-ticker.C
for range time.Tick(time.Second) {
// wait for all inflight requests to complete and ticker
p.inFlightRequests.Wait()
if time.Since(p.lastRequestHandled) > maxDuration {
fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %d reached.\n", p.ID, p.config.UnloadAfter)
p.Stop()
return
}
}
}()
@@ -275,7 +274,11 @@ func (p *Process) checkHealthEndpoint(ctxFromStart context.Context) error {
func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
p.inFlightRequests.Add(1)
defer p.inFlightRequests.Done()
defer func() {
p.lastRequestHandled = time.Now()
p.inFlightRequests.Done()
}()
if p.CurrentState() != StateReady {
if err := p.start(); err != nil {
@@ -285,8 +288,6 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
}
}
p.lastRequestHandled = time.Now()
proxyTo := p.config.Proxy
client := &http.Client{}
req, err := http.NewRequest(r.Method, proxyTo+r.URL.String(), r.Body)
+17 -4
View File
@@ -82,18 +82,31 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
process := NewProcess("ttl", 2, config, NewLogMonitorWriter(io.Discard))
defer process.Stop()
req := httptest.NewRequest("GET", "/test", nil)
// this should take 4 seconds
req1 := httptest.NewRequest("GET", "/slow-respond?echo=1234&delay=1000ms", nil)
req2 := httptest.NewRequest("GET", "/test", nil)
w := httptest.NewRecorder()
// Proxy the request (auto start)
process.ProxyRequest(w, req)
// Proxy the request (auto start) with a slow response that takes longer than config.UnloadAfter
process.ProxyRequest(w, req1)
t.Log("sending slow first request (4 seconds)")
assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), "1234")
assert.Equal(t, StateReady, process.CurrentState())
// ensure the TTL timeout does not race slow requests (see issue #25)
t.Log("sending second request (1 second)")
time.Sleep(time.Second)
w = httptest.NewRecorder()
process.ProxyRequest(w, req2)
assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), expectedMessage)
assert.Equal(t, StateReady, process.CurrentState())
// wait 5 seconds
t.Log("sleep 5 seconds and check if unloaded")
time.Sleep(5 * time.Second)
assert.Equal(t, StateStopped, process.CurrentState())
}