Compare commits

..

7 Commits

Author SHA1 Message Date
Benson Wong 27302c0c02 change llama-swap to use goreleaser default ldflag values 2024-12-14 10:30:06 -08:00
Benson Wong d4e22cceaa Fix security vulnerability with golang.org/x/crypto
- does not affect the project as llama-swap does not use the crypto
  libraries
- good practice to keep security deps updated!
2024-12-14 10:20:22 -08:00
Benson Wong 4c94927658 Move release to Makefile out of goreleaser
- less complexity
- easier
- goreleaser, github, pipelines: 1...  mostlygeek: 0
2024-12-14 10:16:46 -08:00
Benson Wong a955a4a5c0 create tag to release 2024-12-14 10:07:20 -08:00
Benson Wong 22d3f1a4f9 Change versioning to use git commits counts instead of semver
- less work for me
- more frequent releases
2024-12-14 09:53:13 -08:00
Benson Wong e2443251ad update readme 2024-12-09 19:14:49 -08:00
Benson Wong 5fbd53c616 delay TTL check until after all requests are complete (#25)
- fixes #25 where requests that last longer than the TTL will cause the
  process to be unloaded before the next request.
- new behavior, TTL waits until all requests are complete before
  checking timeout
2024-12-09 19:08:03 -08:00
8 changed files with 73 additions and 18 deletions
+1 -1
View File
@@ -30,4 +30,4 @@ jobs:
version: '~> v2'
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+26 -2
View File
@@ -2,6 +2,19 @@
APP_NAME = llama-swap
BUILD_DIR = build
# Get the current Git hash
GIT_HASH := $(shell git rev-parse --short HEAD)
ifneq ($(shell git status --porcelain),)
# There are untracked changes
GIT_HASH := $(GIT_HASH)+
endif
# Get the build number from the commit count on the main branch
COMMIT_COUNT := $(shell git rev-list --count HEAD)
# Capture the current build date in RFC3339 format
BUILD_DATE := $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
# Default target: Builds binaries for both OSX and Linux
all: mac linux simple-responder
@@ -18,12 +31,12 @@ test-all:
# Build OSX binary
mac:
@echo "Building Mac binary..."
GOOS=darwin GOARCH=arm64 go build -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64
GOOS=darwin GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=${COMMIT_COUNT} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64
# Build Linux binary
linux:
@echo "Building Linux binary..."
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=${COMMIT_COUNT} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
# for testing proxy.Process
simple-responder:
@@ -35,5 +48,16 @@ simple-responder:
$(BUILD_DIR):
mkdir -p $(BUILD_DIR)
# Create a new release tag
release:
@echo "Checking for unstaged changes..."
@if [ -n "$(shell git status --porcelain)" ]; then \
echo "Error: There are unstaged changes. Please commit or stash your changes before creating a release tag." >&2; \
exit 1; \
fi
@echo "Creating release tag v$(COMMIT_COUNT)..."
git tag v$(COMMIT_COUNT)
git push origin v$(COMMIT_COUNT)
# Phony targets
.PHONY: all clean osx linux
+1
View File
@@ -16,6 +16,7 @@ Features:
- ✅ Run multiple models at once with `profiles`
- ✅ Remote log monitoring at `/log`
- ✅ Automatic unloading of models from GPUs after timeout
- ✅ Use any local server that provides an OpenAI compatible API (llama.cpp, vllm, tabblyAPI, etc)
## Releases
+3 -3
View File
@@ -32,9 +32,9 @@ require (
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/text v0.21.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
)
+6
View File
@@ -66,14 +66,20 @@ golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+10
View File
@@ -9,13 +9,23 @@ import (
"github.com/mostlygeek/llama-swap/proxy"
)
var version string = "0"
var commit string = "abcd1234"
var date = "unknown"
func main() {
// Define a command-line flag for the port
configPath := flag.String("config", "config.yaml", "config file name")
listenStr := flag.String("listen", ":8080", "listen ip/port")
showVersion := flag.Bool("version", false, "show version of build")
flag.Parse() // Parse the command-line flags
if *showVersion {
fmt.Printf("version: v%s (%s), built at %s\n", version, commit, date)
os.Exit(0)
}
config, err := proxy.LoadConfig(*configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
+9 -8
View File
@@ -122,16 +122,15 @@ func (p *Process) start() error {
// start a goroutine to check every second if
// the process should be stopped
go func() {
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
maxDuration := time.Duration(p.config.UnloadAfter) * time.Second
for {
<-ticker.C
for range time.Tick(time.Second) {
// wait for all inflight requests to complete and ticker
p.inFlightRequests.Wait()
if time.Since(p.lastRequestHandled) > maxDuration {
fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %d reached.\n", p.ID, p.config.UnloadAfter)
p.Stop()
return
}
}
}()
@@ -275,7 +274,11 @@ func (p *Process) checkHealthEndpoint(ctxFromStart context.Context) error {
func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
p.inFlightRequests.Add(1)
defer p.inFlightRequests.Done()
defer func() {
p.lastRequestHandled = time.Now()
p.inFlightRequests.Done()
}()
if p.CurrentState() != StateReady {
if err := p.start(); err != nil {
@@ -285,8 +288,6 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
}
}
p.lastRequestHandled = time.Now()
proxyTo := p.config.Proxy
client := &http.Client{}
req, err := http.NewRequest(r.Method, proxyTo+r.URL.String(), r.Body)
+17 -4
View File
@@ -82,18 +82,31 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
process := NewProcess("ttl", 2, config, NewLogMonitorWriter(io.Discard))
defer process.Stop()
req := httptest.NewRequest("GET", "/test", nil)
// this should take 4 seconds
req1 := httptest.NewRequest("GET", "/slow-respond?echo=1234&delay=1000ms", nil)
req2 := httptest.NewRequest("GET", "/test", nil)
w := httptest.NewRecorder()
// Proxy the request (auto start)
process.ProxyRequest(w, req)
// Proxy the request (auto start) with a slow response that takes longer than config.UnloadAfter
process.ProxyRequest(w, req1)
t.Log("sending slow first request (4 seconds)")
assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), "1234")
assert.Equal(t, StateReady, process.CurrentState())
// ensure the TTL timeout does not race slow requests (see issue #25)
t.Log("sending second request (1 second)")
time.Sleep(time.Second)
w = httptest.NewRecorder()
process.ProxyRequest(w, req2)
assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), expectedMessage)
assert.Equal(t, StateReady, process.CurrentState())
// wait 5 seconds
t.Log("sleep 5 seconds and check if unloaded")
time.Sleep(5 * time.Second)
assert.Equal(t, StateStopped, process.CurrentState())
}