increase default healthCheckTimeout to 120s

Disable building of intel container until it's fixed upstream
Improve install script (#144 )
2025-05-26 09:57:53 -07:00 · 2025-05-23 22:54:43 -07:00 · 2025-05-23 09:39:55 -07:00 · 2025-05-23 08:40:15 -07:00
6 changed files with 59 additions and 28 deletions
@@ -15,7 +15,8 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        platform: [intel, cuda, vulkan, cpu, musa]
+        #platform: [intel, cuda, vulkan, cpu, musa]
        platform: [cuda, vulkan, cpu, musa]
      fail-fast: false
    steps:
      - name: Checkout code
@@ -7,7 +7,7 @@
 llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
-Written in golang, it is very easy to install (single binary with no dependancies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.
+Written in golang, it is very easy to install (single binary with no dependencies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.
 ## Features:
@@ -63,9 +63,10 @@ models:
 <summary>But also very powerful ...</summary>
 ```yaml
-# Seconds to wait for llama.cpp to load and be ready to serve requests
+# Seconds to wait for upstream to load and be ready to serve requests
-# Default (and minimum) is 15 seconds
+# minimum is 15 seconds
-healthCheckTimeout: 60
+# default is 120 seconds
 healthCheckTimeout: 500
 # Valid log levels: debug, info (default), warn, error
 logLevel: info
@@ -223,13 +223,13 @@ runloop:
 			if countSigInt > 1 {
 				break runloop
 			} else {
-				log.Println("Recieved SIGINT, send another SIGINT to shutdown")
+				log.Println("Received SIGINT, send another SIGINT to shutdown")
 			}
 		case syscall.SIGTERM:
 			if *ignoreSigTerm {
 				log.Println("Ignoring SIGTERM")
 			} else {
-				log.Println("Recieved SIGTERM, shutting down")
+				log.Println("Received SIGTERM, shutting down")
 				break runloop
 			}
 		default:
@@ -113,7 +113,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 		return Config{}, err
 	}
-	if config.HealthCheckTimeout < 15 {
+	if config.HealthCheckTimeout == 0 {
 		// this high default timeout helps avoid failing health checks
 		// for configurations that wait for docker or have slower startup
 		config.HealthCheckTimeout = 120
 	} else if config.HealthCheckTimeout < 15 {
 		// set a minimum of 15 seconds
 		config.HealthCheckTimeout = 15
 	}
@@ -100,7 +100,7 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo
 		concurrencyLimitSemaphore: make(chan struct{}, concurrentLimit),
 		// stop timeout
-		gracefulStopTimeout:        5 * time.Second,
+		gracefulStopTimeout:        10 * time.Second,
 		upstreamWasStoppedWithKill: false,
 	}
 }
@@ -4,6 +4,8 @@
 set -eu
 LLAMA_SWAP_DEFAULT_ADDRESS=${LLAMA_SWAP_DEFAULT_ADDRESS:-"127.0.0.1:8080"}
 red="$( (/usr/bin/tput bold || :; /usr/bin/tput setaf 1 || :) 2>&-)"
 plain="$( (/usr/bin/tput sgr0 || :) 2>&-)"
@@ -11,16 +13,16 @@ status() { echo ">>> $*" >&2; }
 error() { echo "${red}ERROR:${plain} $*"; exit 1; }
 warning() { echo "${red}WARNING:${plain} $*"; }
-available() { command -v $1 >/dev/null; }
+available() { command -v "$1" >/dev/null; }
 require() {
-    local MISSING=''
+    _MISSING=''
-    for TOOL in $*; do
+    for TOOL in "$@"; do
-        if ! available $TOOL; then
+        if ! available "$TOOL"; then
-            MISSING="$MISSING $TOOL"
+            _MISSING="$_MISSING $TOOL"
        fi
    done
-    echo $MISSING
+    echo "$_MISSING"
 }
 SUDO=
@@ -32,7 +34,7 @@ if [ "$(id -u)" -ne 0 ]; then
    SUDO="sudo"
 fi
-NEEDS=$(require curl tee jq tar)
+NEEDS=$(require tee tar python3 mktemp)
 if [ -n "$NEEDS" ]; then
    status "ERROR: The following tools are required but missing:"
    for NEED in $NEEDS; do
@@ -62,18 +64,40 @@ esac
 download_binary() {
    ASSET_NAME="linux_$ARCH"
-    # Fetch the latest release info and extract the matching asset URL
+    TMPDIR=$(mktemp -d)
-    DL_URL=$(curl -s "https://api.github.com/repos/mostlygeek/llama-swap/releases/latest" | \
+    trap 'rm -rf "${TMPDIR}"' EXIT INT TERM HUP
-        jq -r --arg name "$ASSET_NAME" \
+    PYTHON_SCRIPT=$(cat <<EOF
-        '.assets[] | select(.name | contains($name)) | .browser_download_url')
+import os
 import json
 import sys
 import urllib.request
-    # Check if a URL was successfully extracted
+ASSET_NAME = "${ASSET_NAME}"
-    if [ -z "$DL_URL" ]; then
+
-        error "No matching asset found with name containing '$ASSET_NAME'."
+with urllib.request.urlopen("https://api.github.com/repos/mostlygeek/llama-swap/releases/latest") as resp:
    data = json.load(resp)
    for asset in data.get("assets", []):
        if ASSET_NAME in asset.get("name", ""):
            url = asset["browser_download_url"]
            break
    else:
        print("ERROR: Matching asset not found.", file=sys.stderr)
        exit(1)
 print("Downloading:", url, file=sys.stderr)
 output_path = os.path.join("${TMPDIR}", "llama-swap.tar.gz")
 urllib.request.urlretrieve(url, output_path)
 print(output_path)
 EOF
 )
    TARFILE=$(python3 -c "$PYTHON_SCRIPT")
    if [ ! -f "$TARFILE" ]; then
        error "Failed to download binary."
    fi
-    status "Downloading Linux $ARCH binary"
+    status "Extracting to /usr/local/bin"
-    curl -s -L "$DL_URL" | $SUDO tar -xzf - -C /usr/local/bin llama-swap
+    $SUDO tar -xzf "$TARFILE" -C /usr/local/bin llama-swap
 }
 download_binary
@@ -96,7 +120,7 @@ configure_systemd() {
    fi
    status "Adding current user to llama-swap group..."
-    $SUDO usermod -a -G llama-swap $(whoami)
+    $SUDO usermod -a -G llama-swap "$(whoami)"
    if [ ! -f "/usr/share/llama-swap/config.yaml" ]; then
        status "Creating default config.yaml..."
@@ -138,7 +162,7 @@ User=llama-swap
 Group=llama-swap
 # set this to match your environment
-ExecStart=/usr/local/bin/llama-swap --config /usr/share/llama-swap/config.yaml --watch-config
+ExecStart=/usr/local/bin/llama-swap --config /usr/share/llama-swap/config.yaml --watch-config -listen ${LLAMA_SWAP_DEFAULT_ADDRESS}
 Restart=on-failure
 RestartSec=3
@@ -172,7 +196,7 @@ if available systemctl; then
 fi
 install_success() {
-    status 'The llama-swap API is now available at 127.0.0.1:8080.'
+    status "The llama-swap API is now available at http://${LLAMA_SWAP_DEFAULT_ADDRESS}"
    status 'Customize the config file at /usr/share/llama-swap/config.yaml.'
    status 'Install complete.'
 }
Author	SHA1	Message	Date
Benson Wong	02ee29d881	increase default healthCheckTimeout to 120s	2025-05-26 09:57:53 -07:00
Benson Wong	b2a891f8f4	Disable building of intel container until it's fixed upstream	2025-05-23 22:54:43 -07:00
Yuta Hayashibe	8d2b568897	Improve install script (#144 ) * Use `python3` instead of `curl` and `jq` * Use quote to word splitting * Remove undefined `local` in POSIX sh * Added `LLAMA_SWAP_DEFAULT_ADDRESS` to customize the server address * Added `mktemp` to `NEEDS`	2025-05-23 09:39:55 -07:00
Yuta Hayashibe	fb44cf4e08	Fix typos (#143 )	2025-05-23 08:40:15 -07:00