Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 27302c0c02 | |||
| d4e22cceaa | |||
| 4c94927658 | |||
| a955a4a5c0 | |||
| 22d3f1a4f9 | |||
| e2443251ad | |||
| 5fbd53c616 |
@@ -30,4 +30,4 @@ jobs:
|
|||||||
version: '~> v2'
|
version: '~> v2'
|
||||||
args: release --clean
|
args: release --clean
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@@ -2,6 +2,19 @@
|
|||||||
APP_NAME = llama-swap
|
APP_NAME = llama-swap
|
||||||
BUILD_DIR = build
|
BUILD_DIR = build
|
||||||
|
|
||||||
|
# Get the current Git hash
|
||||||
|
GIT_HASH := $(shell git rev-parse --short HEAD)
|
||||||
|
ifneq ($(shell git status --porcelain),)
|
||||||
|
# There are untracked changes
|
||||||
|
GIT_HASH := $(GIT_HASH)+
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Get the build number from the commit count on the main branch
|
||||||
|
COMMIT_COUNT := $(shell git rev-list --count HEAD)
|
||||||
|
|
||||||
|
# Capture the current build date in RFC3339 format
|
||||||
|
BUILD_DATE := $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
# Default target: Builds binaries for both OSX and Linux
|
# Default target: Builds binaries for both OSX and Linux
|
||||||
all: mac linux simple-responder
|
all: mac linux simple-responder
|
||||||
|
|
||||||
@@ -18,12 +31,12 @@ test-all:
|
|||||||
# Build OSX binary
|
# Build OSX binary
|
||||||
mac:
|
mac:
|
||||||
@echo "Building Mac binary..."
|
@echo "Building Mac binary..."
|
||||||
GOOS=darwin GOARCH=arm64 go build -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64
|
GOOS=darwin GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=${COMMIT_COUNT} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-darwin-arm64
|
||||||
|
|
||||||
# Build Linux binary
|
# Build Linux binary
|
||||||
linux:
|
linux:
|
||||||
@echo "Building Linux binary..."
|
@echo "Building Linux binary..."
|
||||||
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
|
GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=${COMMIT_COUNT} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
|
||||||
|
|
||||||
# for testing proxy.Process
|
# for testing proxy.Process
|
||||||
simple-responder:
|
simple-responder:
|
||||||
@@ -35,5 +48,16 @@ simple-responder:
|
|||||||
$(BUILD_DIR):
|
$(BUILD_DIR):
|
||||||
mkdir -p $(BUILD_DIR)
|
mkdir -p $(BUILD_DIR)
|
||||||
|
|
||||||
|
# Create a new release tag
|
||||||
|
release:
|
||||||
|
@echo "Checking for unstaged changes..."
|
||||||
|
@if [ -n "$(shell git status --porcelain)" ]; then \
|
||||||
|
echo "Error: There are unstaged changes. Please commit or stash your changes before creating a release tag." >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
@echo "Creating release tag v$(COMMIT_COUNT)..."
|
||||||
|
git tag v$(COMMIT_COUNT)
|
||||||
|
git push origin v$(COMMIT_COUNT)
|
||||||
|
|
||||||
# Phony targets
|
# Phony targets
|
||||||
.PHONY: all clean osx linux
|
.PHONY: all clean osx linux
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ Features:
|
|||||||
- ✅ Run multiple models at once with `profiles`
|
- ✅ Run multiple models at once with `profiles`
|
||||||
- ✅ Remote log monitoring at `/log`
|
- ✅ Remote log monitoring at `/log`
|
||||||
- ✅ Automatic unloading of models from GPUs after timeout
|
- ✅ Automatic unloading of models from GPUs after timeout
|
||||||
|
- ✅ Use any local server that provides an OpenAI compatible API (llama.cpp, vllm, tabblyAPI, etc)
|
||||||
|
|
||||||
## Releases
|
## Releases
|
||||||
|
|
||||||
|
|||||||
@@ -32,9 +32,9 @@ require (
|
|||||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/crypto v0.23.0 // indirect
|
golang.org/x/crypto v0.31.0 // indirect
|
||||||
golang.org/x/net v0.25.0 // indirect
|
golang.org/x/net v0.25.0 // indirect
|
||||||
golang.org/x/sys v0.20.0 // indirect
|
golang.org/x/sys v0.28.0 // indirect
|
||||||
golang.org/x/text v0.15.0 // indirect
|
golang.org/x/text v0.21.0 // indirect
|
||||||
google.golang.org/protobuf v1.34.1 // indirect
|
google.golang.org/protobuf v1.34.1 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -66,14 +66,20 @@ golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
|||||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||||
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
|
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
|
||||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||||
|
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||||
|
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||||
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
|
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
|
||||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
||||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
||||||
|
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
|
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
|
||||||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
|
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
|
||||||
|
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||||
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
||||||
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
|
|||||||
@@ -9,13 +9,23 @@ import (
|
|||||||
"github.com/mostlygeek/llama-swap/proxy"
|
"github.com/mostlygeek/llama-swap/proxy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var version string = "0"
|
||||||
|
var commit string = "abcd1234"
|
||||||
|
var date = "unknown"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Define a command-line flag for the port
|
// Define a command-line flag for the port
|
||||||
configPath := flag.String("config", "config.yaml", "config file name")
|
configPath := flag.String("config", "config.yaml", "config file name")
|
||||||
listenStr := flag.String("listen", ":8080", "listen ip/port")
|
listenStr := flag.String("listen", ":8080", "listen ip/port")
|
||||||
|
showVersion := flag.Bool("version", false, "show version of build")
|
||||||
|
|
||||||
flag.Parse() // Parse the command-line flags
|
flag.Parse() // Parse the command-line flags
|
||||||
|
|
||||||
|
if *showVersion {
|
||||||
|
fmt.Printf("version: v%s (%s), built at %s\n", version, commit, date)
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
config, err := proxy.LoadConfig(*configPath)
|
config, err := proxy.LoadConfig(*configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error loading config: %v\n", err)
|
fmt.Printf("Error loading config: %v\n", err)
|
||||||
|
|||||||
+9
-8
@@ -122,16 +122,15 @@ func (p *Process) start() error {
|
|||||||
// start a goroutine to check every second if
|
// start a goroutine to check every second if
|
||||||
// the process should be stopped
|
// the process should be stopped
|
||||||
go func() {
|
go func() {
|
||||||
ticker := time.NewTicker(time.Second)
|
|
||||||
defer ticker.Stop()
|
|
||||||
maxDuration := time.Duration(p.config.UnloadAfter) * time.Second
|
maxDuration := time.Duration(p.config.UnloadAfter) * time.Second
|
||||||
|
|
||||||
for {
|
for range time.Tick(time.Second) {
|
||||||
<-ticker.C
|
// wait for all inflight requests to complete and ticker
|
||||||
|
p.inFlightRequests.Wait()
|
||||||
|
|
||||||
if time.Since(p.lastRequestHandled) > maxDuration {
|
if time.Since(p.lastRequestHandled) > maxDuration {
|
||||||
fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %d reached.\n", p.ID, p.config.UnloadAfter)
|
fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %d reached.\n", p.ID, p.config.UnloadAfter)
|
||||||
p.Stop()
|
p.Stop()
|
||||||
return
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -275,7 +274,11 @@ func (p *Process) checkHealthEndpoint(ctxFromStart context.Context) error {
|
|||||||
func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
|
func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
p.inFlightRequests.Add(1)
|
p.inFlightRequests.Add(1)
|
||||||
defer p.inFlightRequests.Done()
|
|
||||||
|
defer func() {
|
||||||
|
p.lastRequestHandled = time.Now()
|
||||||
|
p.inFlightRequests.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
if p.CurrentState() != StateReady {
|
if p.CurrentState() != StateReady {
|
||||||
if err := p.start(); err != nil {
|
if err := p.start(); err != nil {
|
||||||
@@ -285,8 +288,6 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p.lastRequestHandled = time.Now()
|
|
||||||
|
|
||||||
proxyTo := p.config.Proxy
|
proxyTo := p.config.Proxy
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
req, err := http.NewRequest(r.Method, proxyTo+r.URL.String(), r.Body)
|
req, err := http.NewRequest(r.Method, proxyTo+r.URL.String(), r.Body)
|
||||||
|
|||||||
+17
-4
@@ -82,18 +82,31 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
|
|||||||
process := NewProcess("ttl", 2, config, NewLogMonitorWriter(io.Discard))
|
process := NewProcess("ttl", 2, config, NewLogMonitorWriter(io.Discard))
|
||||||
defer process.Stop()
|
defer process.Stop()
|
||||||
|
|
||||||
req := httptest.NewRequest("GET", "/test", nil)
|
// this should take 4 seconds
|
||||||
|
req1 := httptest.NewRequest("GET", "/slow-respond?echo=1234&delay=1000ms", nil)
|
||||||
|
req2 := httptest.NewRequest("GET", "/test", nil)
|
||||||
|
|
||||||
w := httptest.NewRecorder()
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
// Proxy the request (auto start)
|
// Proxy the request (auto start) with a slow response that takes longer than config.UnloadAfter
|
||||||
process.ProxyRequest(w, req)
|
process.ProxyRequest(w, req1)
|
||||||
|
|
||||||
|
t.Log("sending slow first request (4 seconds)")
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "1234")
|
||||||
|
assert.Equal(t, StateReady, process.CurrentState())
|
||||||
|
|
||||||
|
// ensure the TTL timeout does not race slow requests (see issue #25)
|
||||||
|
t.Log("sending second request (1 second)")
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
w = httptest.NewRecorder()
|
||||||
|
process.ProxyRequest(w, req2)
|
||||||
assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
|
assert.Equal(t, http.StatusOK, w.Code, "Expected status code %d, got %d", http.StatusOK, w.Code)
|
||||||
assert.Contains(t, w.Body.String(), expectedMessage)
|
assert.Contains(t, w.Body.String(), expectedMessage)
|
||||||
|
|
||||||
assert.Equal(t, StateReady, process.CurrentState())
|
assert.Equal(t, StateReady, process.CurrentState())
|
||||||
|
|
||||||
// wait 5 seconds
|
// wait 5 seconds
|
||||||
|
t.Log("sleep 5 seconds and check if unloaded")
|
||||||
time.Sleep(5 * time.Second)
|
time.Sleep(5 * time.Second)
|
||||||
assert.Equal(t, StateStopped, process.CurrentState())
|
assert.Equal(t, StateStopped, process.CurrentState())
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user