remove noisy debug print message

add guard to avoid unnecessary logic in Process.Shutdown
2025-05-19 15:36:15 -07:00 · 2025-05-19 15:34:30 -07:00
7 changed files with 29 additions and 60 deletions
@@ -15,8 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        #platform: [intel, cuda, vulkan, cpu, musa]
-        platform: [cuda, vulkan, cpu, musa]
+        platform: [intel, cuda, vulkan, cpu, musa]
      fail-fast: false
    steps:
      - name: Checkout code
@@ -7,7 +7,7 @@

 llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.

-Written in golang, it is very easy to install (single binary with no dependencies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.
+Written in golang, it is very easy to install (single binary with no dependancies) and configure (single yaml file). To get started, download a pre-built binary or use the provided docker images.

 ## Features:

@@ -63,10 +63,9 @@ models:
 <summary>But also very powerful ...</summary>

 ```yaml
-# Seconds to wait for upstream to load and be ready to serve requests
-# minimum is 15 seconds
-# default is 120 seconds
-healthCheckTimeout: 500
+# Seconds to wait for llama.cpp to load and be ready to serve requests
+# Default (and minimum) is 15 seconds
+healthCheckTimeout: 60

 # Valid log levels: debug, info (default), warn, error
 logLevel: info
@@ -223,13 +223,13 @@ runloop:
 			if countSigInt > 1 {
 				break runloop
 			} else {
-				log.Println("Received SIGINT, send another SIGINT to shutdown")
+				log.Println("Recieved SIGINT, send another SIGINT to shutdown")
 			}
 		case syscall.SIGTERM:
 			if *ignoreSigTerm {
 				log.Println("Ignoring SIGTERM")
 			} else {
-				log.Println("Received SIGTERM, shutting down")
+				log.Println("Recieved SIGTERM, shutting down")
 				break runloop
 			}
 		default:
@@ -113,12 +113,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 		return Config{}, err
 	}

-	if config.HealthCheckTimeout == 0 {
-		// this high default timeout helps avoid failing health checks
-		// for configurations that wait for docker or have slower startup
-		config.HealthCheckTimeout = 120
-	} else if config.HealthCheckTimeout < 15 {
-		// set a minimum of 15 seconds
+	if config.HealthCheckTimeout < 15 {
 		config.HealthCheckTimeout = 15
 	}

@@ -100,7 +100,7 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo
 		concurrencyLimitSemaphore: make(chan struct{}, concurrentLimit),

 		// stop timeout
-		gracefulStopTimeout:        10 * time.Second,
+		gracefulStopTimeout:        5 * time.Second,
 		upstreamWasStoppedWithKill: false,
 	}
 }
@@ -319,7 +319,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
 func (pm *ProxyManager) upstreamIndex(c *gin.Context) {
 	var html strings.Builder

-	html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><a href=\"/unload\">Unload all models</a><ul>")
+	html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><ul>")

 	// Extract keys and sort them
 	var modelIDs []string
@@ -4,8 +4,6 @@

 set -eu

-LLAMA_SWAP_DEFAULT_ADDRESS=${LLAMA_SWAP_DEFAULT_ADDRESS:-"127.0.0.1:8080"}
-
 red="$( (/usr/bin/tput bold || :; /usr/bin/tput setaf 1 || :) 2>&-)"
 plain="$( (/usr/bin/tput sgr0 || :) 2>&-)"

@@ -13,16 +11,16 @@ status() { echo ">>> $*" >&2; }
 error() { echo "${red}ERROR:${plain} $*"; exit 1; }
 warning() { echo "${red}WARNING:${plain} $*"; }

-available() { command -v "$1" >/dev/null; }
+available() { command -v $1 >/dev/null; }
 require() {
-    _MISSING=''
-    for TOOL in "$@"; do
-        if ! available "$TOOL"; then
-            _MISSING="$_MISSING $TOOL"
+    local MISSING=''
+    for TOOL in $*; do
+        if ! available $TOOL; then
+            MISSING="$MISSING $TOOL"
        fi
    done

-    echo "$_MISSING"
+    echo $MISSING
 }

 SUDO=
@@ -34,7 +32,7 @@ if [ "$(id -u)" -ne 0 ]; then
    SUDO="sudo"
 fi

-NEEDS=$(require tee tar python3 mktemp)
+NEEDS=$(require curl tee jq tar)
 if [ -n "$NEEDS" ]; then
    status "ERROR: The following tools are required but missing:"
    for NEED in $NEEDS; do
@@ -64,40 +62,18 @@ esac
 download_binary() {
    ASSET_NAME="linux_$ARCH"

-    TMPDIR=$(mktemp -d)
-    trap 'rm -rf "${TMPDIR}"' EXIT INT TERM HUP
-    PYTHON_SCRIPT=$(cat <<EOF
-import os
-import json
-import sys
-import urllib.request
+    # Fetch the latest release info and extract the matching asset URL
+    DL_URL=$(curl -s "https://api.github.com/repos/mostlygeek/llama-swap/releases/latest" | \
+        jq -r --arg name "$ASSET_NAME" \
+        '.assets[] | select(.name | contains($name)) | .browser_download_url')

-ASSET_NAME = "${ASSET_NAME}"
-
-with urllib.request.urlopen("https://api.github.com/repos/mostlygeek/llama-swap/releases/latest") as resp:
-    data = json.load(resp)
-    for asset in data.get("assets", []):
-        if ASSET_NAME in asset.get("name", ""):
-            url = asset["browser_download_url"]
-            break
-    else:
-        print("ERROR: Matching asset not found.", file=sys.stderr)
-        exit(1)
-
-print("Downloading:", url, file=sys.stderr)
-output_path = os.path.join("${TMPDIR}", "llama-swap.tar.gz")
-urllib.request.urlretrieve(url, output_path)
-print(output_path)
-EOF
-)
-
-    TARFILE=$(python3 -c "$PYTHON_SCRIPT")
-    if [ ! -f "$TARFILE" ]; then
-        error "Failed to download binary."
+    # Check if a URL was successfully extracted
+    if [ -z "$DL_URL" ]; then
+        error "No matching asset found with name containing '$ASSET_NAME'."
    fi

-    status "Extracting to /usr/local/bin"
-    $SUDO tar -xzf "$TARFILE" -C /usr/local/bin llama-swap
+    status "Downloading Linux $ARCH binary"
+    curl -s -L "$DL_URL" | $SUDO tar -xzf - -C /usr/local/bin llama-swap
 }
 download_binary

@@ -120,7 +96,7 @@ configure_systemd() {
    fi

    status "Adding current user to llama-swap group..."
-    $SUDO usermod -a -G llama-swap "$(whoami)"
+    $SUDO usermod -a -G llama-swap $(whoami)

    if [ ! -f "/usr/share/llama-swap/config.yaml" ]; then
        status "Creating default config.yaml..."
@@ -162,7 +138,7 @@ User=llama-swap
 Group=llama-swap

 # set this to match your environment
-ExecStart=/usr/local/bin/llama-swap --config /usr/share/llama-swap/config.yaml --watch-config -listen ${LLAMA_SWAP_DEFAULT_ADDRESS}
+ExecStart=/usr/local/bin/llama-swap --config /usr/share/llama-swap/config.yaml --watch-config

 Restart=on-failure
 RestartSec=3
@@ -196,7 +172,7 @@ if available systemctl; then
 fi

 install_success() {
-    status "The llama-swap API is now available at http://${LLAMA_SWAP_DEFAULT_ADDRESS}"
+    status 'The llama-swap API is now available at 127.0.0.1:8080.'
    status 'Customize the config file at /usr/share/llama-swap/config.yaml.'
    status 'Install complete.'
 }
Author	SHA1	Message	Date
Benson Wong	e7af671d8e	remove noisy debug print message	2025-05-19 15:36:15 -07:00
Benson Wong	8e62098eef	add guard to avoid unnecessary logic in Process.Shutdown	2025-05-19 15:34:30 -07:00