Compare commits

..

2 Commits

Author SHA1 Message Date
Benson Wong e7af671d8e remove noisy debug print message 2025-05-19 15:36:15 -07:00
Benson Wong 8e62098eef add guard to avoid unnecessary logic in Process.Shutdown 2025-05-19 15:34:30 -07:00
7 changed files with 29 additions and 60 deletions
+1 -2
View File
@@ -15,8 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
#platform: [intel, cuda, vulkan, cpu, musa]
platform: [cuda, vulkan, cpu, musa]
platform: [intel, cuda, vulkan, cpu, musa]
fail-fast: false
steps:
- name: Checkout code
+4 -5
View File
@@ -7,7 +7,7 @@
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
Written in golang, it is very easy to install (single binary with no dependencies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.
Written in golang, it is very easy to install (single binary with no dependancies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.
## Features:
@@ -63,10 +63,9 @@ models:
<summary>But also very powerful ...</summary>
```yaml
# Seconds to wait for upstream to load and be ready to serve requests
# minimum is 15 seconds
# default is 120 seconds
healthCheckTimeout: 500
# Seconds to wait for llama.cpp to load and be ready to serve requests
# Default (and minimum) is 15 seconds
healthCheckTimeout: 60
# Valid log levels: debug, info (default), warn, error
logLevel: info
+2 -2
View File
@@ -223,13 +223,13 @@ runloop:
if countSigInt > 1 {
break runloop
} else {
log.Println("Received SIGINT, send another SIGINT to shutdown")
log.Println("Recieved SIGINT, send another SIGINT to shutdown")
}
case syscall.SIGTERM:
if *ignoreSigTerm {
log.Println("Ignoring SIGTERM")
} else {
log.Println("Received SIGTERM, shutting down")
log.Println("Recieved SIGTERM, shutting down")
break runloop
}
default:
+1 -6
View File
@@ -113,12 +113,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
return Config{}, err
}
if config.HealthCheckTimeout == 0 {
// this high default timeout helps avoid failing health checks
// for configurations that wait for docker or have slower startup
config.HealthCheckTimeout = 120
} else if config.HealthCheckTimeout < 15 {
// set a minimum of 15 seconds
if config.HealthCheckTimeout < 15 {
config.HealthCheckTimeout = 15
}
+1 -1
View File
@@ -100,7 +100,7 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo
concurrencyLimitSemaphore: make(chan struct{}, concurrentLimit),
// stop timeout
gracefulStopTimeout: 10 * time.Second,
gracefulStopTimeout: 5 * time.Second,
upstreamWasStoppedWithKill: false,
}
}
+1 -1
View File
@@ -319,7 +319,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
func (pm *ProxyManager) upstreamIndex(c *gin.Context) {
var html strings.Builder
html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><a href=\"/unload\">Unload all models</a><ul>")
html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><ul>")
// Extract keys and sort them
var modelIDs []string
+19 -43
View File
@@ -4,8 +4,6 @@
set -eu
LLAMA_SWAP_DEFAULT_ADDRESS=${LLAMA_SWAP_DEFAULT_ADDRESS:-"127.0.0.1:8080"}
red="$( (/usr/bin/tput bold || :; /usr/bin/tput setaf 1 || :) 2>&-)"
plain="$( (/usr/bin/tput sgr0 || :) 2>&-)"
@@ -13,16 +11,16 @@ status() { echo ">>> $*" >&2; }
error() { echo "${red}ERROR:${plain} $*"; exit 1; }
warning() { echo "${red}WARNING:${plain} $*"; }
available() { command -v "$1" >/dev/null; }
available() { command -v $1 >/dev/null; }
require() {
_MISSING=''
for TOOL in "$@"; do
if ! available "$TOOL"; then
_MISSING="$_MISSING $TOOL"
local MISSING=''
for TOOL in $*; do
if ! available $TOOL; then
MISSING="$MISSING $TOOL"
fi
done
echo "$_MISSING"
echo $MISSING
}
SUDO=
@@ -34,7 +32,7 @@ if [ "$(id -u)" -ne 0 ]; then
SUDO="sudo"
fi
NEEDS=$(require tee tar python3 mktemp)
NEEDS=$(require curl tee jq tar)
if [ -n "$NEEDS" ]; then
status "ERROR: The following tools are required but missing:"
for NEED in $NEEDS; do
@@ -64,40 +62,18 @@ esac
download_binary() {
ASSET_NAME="linux_$ARCH"
TMPDIR=$(mktemp -d)
trap 'rm -rf "${TMPDIR}"' EXIT INT TERM HUP
PYTHON_SCRIPT=$(cat <<EOF
import os
import json
import sys
import urllib.request
# Fetch the latest release info and extract the matching asset URL
DL_URL=$(curl -s "https://api.github.com/repos/mostlygeek/llama-swap/releases/latest" | \
jq -r --arg name "$ASSET_NAME" \
'.assets[] | select(.name | contains($name)) | .browser_download_url')
ASSET_NAME = "${ASSET_NAME}"
with urllib.request.urlopen("https://api.github.com/repos/mostlygeek/llama-swap/releases/latest") as resp:
data = json.load(resp)
for asset in data.get("assets", []):
if ASSET_NAME in asset.get("name", ""):
url = asset["browser_download_url"]
break
else:
print("ERROR: Matching asset not found.", file=sys.stderr)
exit(1)
print("Downloading:", url, file=sys.stderr)
output_path = os.path.join("${TMPDIR}", "llama-swap.tar.gz")
urllib.request.urlretrieve(url, output_path)
print(output_path)
EOF
)
TARFILE=$(python3 -c "$PYTHON_SCRIPT")
if [ ! -f "$TARFILE" ]; then
error "Failed to download binary."
# Check if a URL was successfully extracted
if [ -z "$DL_URL" ]; then
error "No matching asset found with name containing '$ASSET_NAME'."
fi
status "Extracting to /usr/local/bin"
$SUDO tar -xzf "$TARFILE" -C /usr/local/bin llama-swap
status "Downloading Linux $ARCH binary"
curl -s -L "$DL_URL" | $SUDO tar -xzf - -C /usr/local/bin llama-swap
}
download_binary
@@ -120,7 +96,7 @@ configure_systemd() {
fi
status "Adding current user to llama-swap group..."
$SUDO usermod -a -G llama-swap "$(whoami)"
$SUDO usermod -a -G llama-swap $(whoami)
if [ ! -f "/usr/share/llama-swap/config.yaml" ]; then
status "Creating default config.yaml..."
@@ -162,7 +138,7 @@ User=llama-swap
Group=llama-swap
# set this to match your environment
ExecStart=/usr/local/bin/llama-swap --config /usr/share/llama-swap/config.yaml --watch-config -listen ${LLAMA_SWAP_DEFAULT_ADDRESS}
ExecStart=/usr/local/bin/llama-swap --config /usr/share/llama-swap/config.yaml --watch-config
Restart=on-failure
RestartSec=3
@@ -196,7 +172,7 @@ if available systemctl; then
fi
install_success() {
status "The llama-swap API is now available at http://${LLAMA_SWAP_DEFAULT_ADDRESS}"
status 'The llama-swap API is now available at 127.0.0.1:8080.'
status 'Customize the config file at /usr/share/llama-swap/config.yaml.'
status 'Install complete.'
}