Compare commits

..

6 Commits

Author SHA1 Message Date
Benson Wong 8fabc75634 docker/unified: vulkan build fixes (#600)
multiple fixes to vulkan build: 

- use ubuntu 26.04 to be compatible with AMD 395+ (Strix halo) hardware
- add home directory in container 
- fix stable-diffusion install to actually enable vulkan

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-25 23:26:13 +09:00
Benson Wong e5e7391b6d .github,docker/unified: include vulkan build (#599)
Update docker/unified scripts to support building both cuda and vulkan unified images.
2026-03-25 06:58:28 +09:00
Benson Wong 2c282dccad .github,docker/unified: improve caching and fix bugs (#598)
- set up a GHA scheduled job to build the container nightly 
- enabling pushing a llama-swap:unified and a llama-swap:unified-Y-M-D
image to ghcr.io
- tidy up Dockerfile to use a non-root user and llama-swap as an entry
point
2026-03-23 22:24:40 +09:00
Benson Wong 916d13f5bd .github/workflows,docker/unified: add cuda based unified container (#597)
Add Docker build scripts for a unified cuda docker container with llama-server, stable-diffusion.cpp, whisper.cpp.
2026-03-22 21:11:54 +09:00
Benson Wong a3725e7d09 Update go.mod to 1.26.1 (#593) 2026-03-20 16:09:58 +09:00
Benson Wong 15bd55d3a9 proxy, ui-svelte: add /sdapi/v1 endpoint support (#587)
Add proxy routes for stable-diffusion.cpp's /sdapi/v1/txt2img,
/sdapi/v1/img2img, and /sdapi/v1/loras endpoints. POST endpoints
use proxyInferenceHandler (model in JSON body), GET /loras uses
proxyGETModelHandler (model in query param).

Update the image playground with a dual-mode UI supporting both
OpenAI and SDAPI backends. In SDAPI mode, loras are fetched first
to prime the server-side cache, and all txt2img parameters are
exposed (negative prompt, steps, cfg_scale, seed, batch_size,
clip_skip, sampler, scheduler, lora selection with multipliers).

- Add 3 sdapi route registrations in proxymanager.go
- Add sdApi.ts client with generateSdImage and fetchSdLoras
- Add SDAPI types (SdApiTxt2ImgRequest, SdApiResponse, etc.)
- Add /sdapi to vite dev proxy config
- Add backend tests for sdapi routing
- Support batch image display in gallery grid

https://claude.ai/code/session_0186MGX6NXdHVBTv2KH45fqn

---------

Co-authored-by: Claude <noreply@anthropic.com>
2026-03-19 22:08:31 +09:00
20 changed files with 1637 additions and 33 deletions
+3 -3
View File
@@ -36,7 +36,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.23'
go-version-file: go.mod
# Only run in this linux based runner
- name: Check Formatting
@@ -51,7 +51,7 @@ jobs:
uses: actions/cache/restore@v4
with:
path: ./build
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
# necessary for testing proxy/Process swapping
- name: Create simple-responder
@@ -67,4 +67,4 @@ jobs:
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
- name: Test all
run: make test-all
run: make test-all
+104
View File
@@ -0,0 +1,104 @@
name: Build Unified Docker Image
on:
schedule:
- cron: "37 5 * * *"
workflow_dispatch:
inputs:
llama_cpp_ref:
description: "llama.cpp commit hash, tag, or branch"
required: false
default: "master"
whisper_ref:
description: "whisper.cpp commit hash, tag, or branch"
required: false
default: "master"
sd_ref:
description: "stable-diffusion.cpp commit hash, tag, or branch"
required: false
default: "master"
llama_swap_version:
description: "llama-swap version (e.g. v198, latest, main)"
required: false
default: "main"
build_cuda:
description: "Build CUDA image"
type: boolean
required: false
default: true
build_vulkan:
description: "Build Vulkan image"
type: boolean
required: false
default: true
permissions:
contents: read
packages: write
jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
backend:
- cuda
- vulkan
exclude:
- backend: ${{ inputs.build_cuda == false && 'cuda' || 'none' }}
- backend: ${{ inputs.build_vulkan == false && 'vulkan' || 'none' }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Free up disk space
run: |
echo "Before cleanup:"
df -h
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker system prune -af
echo "After cleanup:"
df -h
# On GitHub Actions runners, create a fresh builder.
# When running locally under act, skip this and reuse the existing
# llama-swap-builder (which has ccache warm) to avoid exhausting disk.
- name: Set up Docker Buildx
if: ${{ !env.ACT }}
uses: docker/setup-buildx-action@v3
- name: Log in to GitHub Container Registry
if: ${{ !env.ACT }}
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build unified Docker image (${{ matrix.backend }})
env:
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
SD_REF: ${{ inputs.sd_ref || 'master' }}
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
# When running under act, use the local builder that has warm ccache.
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
# created by setup-buildx-action above.
BUILDX_BUILDER: ${{ env.ACT == 'true' && 'llama-swap-builder' || '' }}
run: |
chmod +x docker/unified/build-image.sh
docker/unified/build-image.sh --${{ matrix.backend }}
- name: Push to GitHub Container Registry
if: ${{ !env.ACT }}
run: |
docker push ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
DATE_TAG=$(date -u +%Y-%m-%d)
docker tag ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }} ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}-${DATE_TAG}
docker push ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}-${DATE_TAG}
+1 -1
View File
@@ -51,7 +51,7 @@ mac: ui
linux: ui
@echo "Building Linux binary..."
GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
#GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
# Build Windows binary
windows: ui
+37
View File
@@ -274,6 +274,43 @@ func main() {
c.String(200, fmt.Sprintf("%s %s", c.Request.Method, c.Request.URL.Path))
})
// SD API endpoints
r.POST("/sdapi/v1/txt2img", func(c *gin.Context) {
body, err := io.ReadAll(c.Request.Body)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read request body"})
return
}
defer c.Request.Body.Close()
modelName := gjson.GetBytes(body, "model").String()
c.JSON(http.StatusOK, gin.H{
"model": modelName,
"images": []string{},
})
})
r.POST("/sdapi/v1/img2img", func(c *gin.Context) {
body, err := io.ReadAll(c.Request.Body)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read request body"})
return
}
defer c.Request.Body.Close()
modelName := gjson.GetBytes(body, "model").String()
c.JSON(http.StatusOK, gin.H{
"model": modelName,
"images": []string{},
})
})
r.GET("/sdapi/v1/loras", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{
"loras": []string{},
})
})
address := "127.0.0.1:" + *port // Address with the specified port
srv := &http.Server{
+305
View File
@@ -0,0 +1,305 @@
#!/bin/bash
#
# Build script for llama-swap-docker with commit hash pinning
#
# Usage:
# ./build-image.sh --cuda # Build CUDA image
# ./build-image.sh --vulkan # Build Vulkan image
# ./build-image.sh --cuda --no-cache # Build CUDA image without cache
# LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda # Override llama.cpp commit
# LLAMA_COMMIT_HASH=b8429 ./build-image.sh --vulkan # Override llama.cpp release tag (vulkan uses prebuilt binaries)
# WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan # Override whisper.cpp commit
# SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda # Override stable-diffusion.cpp commit
#
# Features:
# - Auto-detects latest commit hashes from git repos
# - Builds llama-swap from local source code
# - Allows environment variable overrides for reproducible builds
# - Cache-friendly: changing commit hash busts cache appropriately
# - Supports both CUDA and Vulkan backends (requires explicit flag)
#
set -euo pipefail
# Parse command line arguments
BACKEND=""
NO_CACHE=false
if [[ $# -eq 0 ]]; then
echo "Error: No backend specified. Please use --cuda or --vulkan."
echo ""
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
echo ""
echo "Options:"
echo " --cuda Build CUDA image (NVIDIA GPUs)"
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
echo " --no-cache Force rebuild without using Docker cache"
echo " --help, -h Show this help message"
echo ""
echo "Environment variables:"
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash"
echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash"
echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash"
exit 1
fi
for arg in "$@"; do
case $arg in
--cuda)
BACKEND="cuda"
;;
--vulkan)
BACKEND="vulkan"
;;
--no-cache)
NO_CACHE=true
;;
--help|-h)
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
echo ""
echo "Options:"
echo " --cuda Build CUDA image (NVIDIA GPUs)"
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
echo " --no-cache Force rebuild without using Docker cache"
echo " --help, -h Show this help message"
echo ""
echo "Environment variables:"
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash"
echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash"
echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash"
exit 0
;;
esac
done
# Validate backend selection
if [[ -z "$BACKEND" ]]; then
echo "Error: No backend specified. Please use --cuda or --vulkan."
exit 1
fi
# Configuration
if [[ -n "${DOCKER_IMAGE_TAG:-}" ]]; then
# User provided a custom tag, use it as-is
:
elif [[ "$BACKEND" == "vulkan" ]]; then
DOCKER_IMAGE_TAG="llama-swap:vulkan"
else
DOCKER_IMAGE_TAG="llama-swap:cuda"
fi
DOCKER_BUILDKIT="${DOCKER_BUILDKIT:-1}"
# Single unified Dockerfile, backend selected via build arg
DOCKERFILE="Dockerfile"
if [[ "$BACKEND" == "vulkan" ]]; then
echo "Building for: Vulkan (AMD GPUs and compatible hardware)"
else
echo "Building for: CUDA (NVIDIA GPUs)"
fi
# Git repository URLs
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
# Function to get the latest commit hash from a git repo's default branch
get_latest_commit() {
local repo_url="$1"
local branch="${2:-master}"
# Try to get the latest commit hash for the specified branch
git ls-remote --heads "${repo_url}" "${branch}" 2>/dev/null | head -1 | cut -f1
}
# Function to get the default branch name (master or main)
get_default_branch() {
local repo_url="$1"
# Check for master first
if git ls-remote --heads "${repo_url}" master &>/dev/null; then
echo "master"
elif git ls-remote --heads "${repo_url}" main &>/dev/null; then
echo "main"
else
echo "master" # fallback
fi
}
# Function to get the latest release tag from a GitHub repo
get_latest_release_tag() {
local owner_repo="$1"
curl -fsSL "https://api.github.com/repos/${owner_repo}/releases/latest" \
| grep '"tag_name"' | head -1 | cut -d'"' -f4
}
echo "=========================================="
echo "llama-swap-docker Build Script"
echo "=========================================="
echo ""
# Determine commit hashes / release tags - use env vars or auto-detect
# For vulkan builds, llama and sd use GitHub release tags (prebuilt binaries).
# For cuda builds (or whisper on any backend), use git commit hashes.
if [[ -n "${LLAMA_COMMIT_HASH:-}" ]]; then
LLAMA_HASH="${LLAMA_COMMIT_HASH}"
echo "llama.cpp: Using provided version: ${LLAMA_HASH}"
elif [[ "$BACKEND" == "vulkan" ]]; then
LLAMA_HASH=$(get_latest_release_tag "ggml-org/llama.cpp")
if [[ -z "${LLAMA_HASH}" ]]; then
echo "ERROR: Could not determine latest release tag for llama.cpp" >&2
exit 1
fi
echo "llama.cpp: Auto-detected latest release tag: ${LLAMA_HASH}"
else
LLAMA_BRANCH=$(get_default_branch "${LLAMA_REPO}")
LLAMA_HASH=$(get_latest_commit "${LLAMA_REPO}" "${LLAMA_BRANCH}")
if [[ -z "${LLAMA_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for llama.cpp" >&2
exit 1
fi
echo "llama.cpp: Auto-detected latest commit (${LLAMA_BRANCH}): ${LLAMA_HASH}"
fi
if [[ -n "${WHISPER_COMMIT_HASH:-}" ]]; then
WHISPER_HASH="${WHISPER_COMMIT_HASH}"
echo "whisper.cpp: Using provided commit hash: ${WHISPER_HASH}"
else
WHISPER_BRANCH=$(get_default_branch "${WHISPER_REPO}")
WHISPER_HASH=$(get_latest_commit "${WHISPER_REPO}" "${WHISPER_BRANCH}")
if [[ -z "${WHISPER_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for whisper.cpp" >&2
exit 1
fi
echo "whisper.cpp: Auto-detected latest commit (${WHISPER_BRANCH}): ${WHISPER_HASH}"
fi
if [[ -n "${SD_COMMIT_HASH:-}" ]]; then
SD_HASH="${SD_COMMIT_HASH}"
echo "stable-diffusion.cpp: Using provided version: ${SD_HASH}"
elif [[ "$BACKEND" == "vulkan" ]]; then
SD_HASH=$(get_latest_release_tag "leejet/stable-diffusion.cpp")
if [[ -z "${SD_HASH}" ]]; then
echo "ERROR: Could not determine latest release tag for stable-diffusion.cpp" >&2
exit 1
fi
echo "stable-diffusion.cpp: Auto-detected latest release tag: ${SD_HASH}"
else
SD_BRANCH=$(get_default_branch "${SD_REPO}")
SD_HASH=$(get_latest_commit "${SD_REPO}" "${SD_BRANCH}")
if [[ -z "${SD_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2
exit 1
fi
echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}"
fi
echo ""
echo "=========================================="
echo "Starting Docker build..."
echo "=========================================="
echo ""
# Build the Docker image with commit hashes as build args
# Build context is the repository root (..) so the Dockerfile can access Go source
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
BUILD_ARGS=(
--build-arg "BACKEND=${BACKEND}"
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
-t "${DOCKER_IMAGE_TAG}"
-f "${SCRIPT_DIR}/${DOCKERFILE}"
)
if [[ "$NO_CACHE" == true ]]; then
BUILD_ARGS+=(--no-cache)
echo "Note: Building without cache"
fi
# Use docker buildx with a custom builder for parallelism control
# The legacy DOCKER_BUILDKIT=1 docker build doesn't respect BUILDKIT_MAX_PARALLELISM env var
# We need to use a custom builder with a buildkitd.toml config file
BUILDER_NAME="llama-swap-builder"
# Check if our custom builder exists with the right config, create/update if needed
if ! docker buildx inspect "$BUILDER_NAME" >/dev/null 2>&1; then
echo "Creating custom buildx builder with max-parallelism=1..."
# Create buildkitd.toml config file
cat > buildkitd.toml << 'BUILDKIT_EOF'
[worker.oci]
max-parallelism = 1
BUILDKIT_EOF
# Create the builder with the config
docker buildx create --name "$BUILDER_NAME" \
--driver docker-container \
--buildkitd-config buildkitd.toml \
--use
else
# Switch to our builder
docker buildx use "$BUILDER_NAME"
fi
echo "Building with sequential stages (one at a time), each using all CPU cores..."
echo "Using builder: $BUILDER_NAME"
# Use docker buildx build with --load to load the image into Docker
# The --builder flag ensures we use our custom builder with max-parallelism=1
# Build context is the repository root so we can access Go source files
docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" "${REPO_ROOT}"
echo ""
echo "=========================================="
echo "Verifying build artifacts..."
echo "=========================================="
echo ""
# Verify all expected binaries exist in the image
MISSING_BINARIES=()
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then
MISSING_BINARIES+=("${binary}")
fi
done
if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
echo "ERROR: Build succeeded but the following binaries are missing from the image:"
for binary in "${MISSING_BINARIES[@]}"; do
echo " - ${binary}"
done
echo ""
echo "This usually indicates a build stage failure. Try running with --no-cache flag:"
echo " ./build-image.sh --vulkan --no-cache"
exit 1
fi
echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
echo ""
echo "=========================================="
echo "Build complete!"
echo "=========================================="
echo ""
echo "Image tag: ${DOCKER_IMAGE_TAG}"
echo ""
echo "Built with:"
echo " llama.cpp: ${LLAMA_HASH}"
echo " whisper.cpp: ${WHISPER_HASH}"
echo " stable-diffusion.cpp: ${SD_HASH}"
echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)"
echo ""
if [[ "$BACKEND" == "vulkan" ]]; then
echo "Run with:"
echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
echo ""
echo "Note: For AMD GPUs, you may also need to mount render devices:"
echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
else
echo "Run with:"
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
fi
+168
View File
@@ -0,0 +1,168 @@
# Unified multi-stage Dockerfile for AI inference tools
# Supports CUDA and Vulkan backends via BACKEND build arg
#
# Usage:
# docker buildx build --build-arg BACKEND=cuda -t llama-swap:unified-cuda .
# docker buildx build --build-arg BACKEND=vulkan -t llama-swap:unified-vulkan .
#
# Each project has its own install script that handles cloning, building,
# and installing binaries. Build stages are independent for cache efficiency.
ARG BACKEND=cuda
# ── Builder bases ──────────────────────────────────────────────────────
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base-cuda
ENV DEBIAN_FRONTEND=noninteractive
ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
ENV CCACHE_DIR=/ccache
ENV CCACHE_MAXSIZE=2G
ENV PATH="/usr/lib/ccache:${PATH}"
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake git python3 python3-pip libssl-dev \
curl ca-certificates ccache make wget \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
# ──
FROM ubuntu:26.04 AS builder-base-vulkan
ENV DEBIAN_FRONTEND=noninteractive
ENV CCACHE_DIR=/ccache
ENV CCACHE_MAXSIZE=2G
ENV PATH="/usr/lib/ccache:${PATH}"
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake git python3 python3-pip libssl-dev \
curl ca-certificates ccache make wget software-properties-common \
libvulkan-dev glslang-tools spirv-tools vulkan-validationlayers glslc \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
# ── Select builder base by BACKEND ────────────────────────────────────
FROM builder-base-${BACKEND} AS builder-base
# ── Build whisper.cpp (fastest build, run first) ──────────────────────
FROM builder-base AS whisper-build
ARG BACKEND=cuda
ARG WHISPER_COMMIT_HASH=master
COPY install-whisper.sh /build/
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
--mount=type=cache,id=whisper-${BACKEND},target=/src/whisper.cpp/build \
BACKEND=${BACKEND} bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}"
# ── Build stable-diffusion.cpp ────────────────────────────────────────
FROM builder-base AS sd-build
ARG BACKEND=cuda
ARG SD_COMMIT_HASH=master
COPY install-sd.sh /build/
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
--mount=type=cache,id=sd-${BACKEND},target=/src/stable-diffusion.cpp/build \
BACKEND=${BACKEND} bash /build/install-sd.sh "${SD_COMMIT_HASH}"
# ── Build llama.cpp (slowest build, run last) ─────────────────────────
FROM builder-base AS llama-build
ARG BACKEND=cuda
ARG LLAMA_COMMIT_HASH=master
COPY install-llama.sh /build/
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
--mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \
BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"
# ── Download llama-swap release binary ────────────────────────────────
FROM builder-base AS llama-swap-download
ARG LS_VERSION=latest
COPY install-llama-swap.sh /build/
RUN bash /build/install-llama-swap.sh "${LS_VERSION}"
# ── Runtime bases ─────────────────────────────────────────────────────
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda
ENV DEBIAN_FRONTEND=noninteractive
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV PATH="/usr/local/bin:${PATH}"
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 python3 python3-pip curl ca-certificates git \
&& rm -rf /var/lib/apt/lists/*
# CUDA stub drivers for container compatibility
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
# ──
FROM ubuntu:26.04 AS runtime-vulkan
ENV DEBIAN_FRONTEND=noninteractive
ENV PATH="/usr/local/bin:${PATH}"
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 libvulkan1 mesa-vulkan-drivers \
python3 python3-pip curl ca-certificates git \
&& rm -rf /var/lib/apt/lists/*
# ── Select runtime base by BACKEND ────────────────────────────────────
FROM runtime-${BACKEND} AS runtime
ARG BACKEND=cuda
ARG LLAMA_COMMIT_HASH=unknown
ARG WHISPER_COMMIT_HASH=unknown
ARG SD_COMMIT_HASH=unknown
RUN pip3 install --no-cache-dir --break-system-packages numpy sentencepiece
# Create llama-swap user and config directory
RUN useradd --system --create-home --shell /sbin/nologin llama-swap && \
mkdir -p /etc/llama-swap/config && \
chown -R llama-swap:llama-swap /etc/llama-swap
WORKDIR /app
# Copy whisper.cpp binaries and libraries
COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/
COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/
COPY --from=whisper-build /install/lib/ /usr/local/lib/
# Copy stable-diffusion.cpp binaries and libraries
COPY --from=sd-build /install/bin/sd-server /usr/local/bin/
COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/
COPY --from=sd-build /install/lib/ /usr/local/lib/
# Copy llama.cpp binaries and libraries
COPY --from=llama-build /install/bin/llama-server /usr/local/bin/
COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
COPY --from=llama-build /install/lib/ /usr/local/lib/
# Copy llama-swap binary
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
RUN ldconfig
COPY config.example.yaml /etc/llama-swap/config/config.yaml
# Version tracking
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
echo "backend: ${BACKEND}" >> /versions.txt && \
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
WORKDIR /models
USER llama-swap
ENTRYPOINT ["llama-swap"]
CMD ["-config", "/etc/llama-swap/config/config.yaml", "-listen", "0.0.0.0:8080"]
+8
View File
@@ -0,0 +1,8 @@
# Unified Docker Container
These scripts create a custom llama-swap container that contains:
- llama-server for LLMs, rerank and embedding model support
- sd-server (stable-diffusion.cpp) for image generation
- whisper.cpp for ASR
+248
View File
@@ -0,0 +1,248 @@
#!/bin/bash
#
# Build script for unified container with version pinning
#
# Usage:
# ./build-image.sh --cuda # Build CUDA image
# ./build-image.sh --vulkan # Build Vulkan image
# ./build-image.sh --cuda --no-cache # Build without cache
# LLAMA_REF=b1234 ./build-image.sh --vulkan # Pin llama.cpp to a commit hash
# LLAMA_REF=v1.2.3 ./build-image.sh --cuda # Pin llama.cpp to a tag
# WHISPER_REF=v1.0.0 ./build-image.sh --vulkan # Pin whisper.cpp to a tag
# SD_REF=master ./build-image.sh --cuda # Pin stable-diffusion.cpp to a branch
# LS_VERSION=170 ./build-image.sh --cuda # Override llama-swap version
#
set -euo pipefail
BACKEND=""
NO_CACHE=false
for arg in "$@"; do
case $arg in
--cuda)
BACKEND="cuda"
;;
--vulkan)
BACKEND="vulkan"
;;
--no-cache)
NO_CACHE=true
;;
--help|-h)
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
echo ""
echo "Options:"
echo " --cuda Build CUDA image (NVIDIA GPUs)"
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
echo " --no-cache Force rebuild without using Docker cache"
echo " --help, -h Show this help message"
echo ""
echo "Environment variables:"
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified-cuda or llama-swap:unified-vulkan)"
echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch"
echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch"
echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch"
echo " LS_VERSION Override llama-swap version (e.g., '170' or 'latest')"
exit 0
;;
esac
done
if [[ -z "$BACKEND" ]]; then
echo "Error: No backend specified. Please use --cuda or --vulkan."
echo ""
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
exit 1
fi
DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified-${BACKEND}}"
# Git repository URLs
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git"
# Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
# Requires only: git, network access to the remote.
resolve_ref() {
local repo_url="$1"
local ref="$2"
# Full 40-char SHA — use as-is
if [[ "${ref}" =~ ^[0-9a-f]{40}$ ]]; then
echo "${ref}"
return
fi
# Try tag then branch (exact match)
local hash
hash=$(git ls-remote "${repo_url}" "refs/tags/${ref}" "refs/heads/${ref}" 2>/dev/null | head -1 | cut -f1)
if [[ -n "${hash}" ]]; then
echo "${hash}"
return
fi
# Short hash (7+ chars): scan all refs for a SHA with this prefix
if [[ "${ref}" =~ ^[0-9a-f]{7,}$ ]]; then
hash=$(git ls-remote "${repo_url}" 2>/dev/null | grep "^${ref}" | head -1 | cut -f1)
if [[ -n "${hash}" ]]; then
echo "${hash}"
return
fi
fi
echo "ERROR: Could not resolve ref '${ref}' for ${repo_url}" >&2
if [[ "${ref}" =~ ^[0-9a-f]+$ && ${#ref} -lt 7 ]]; then
echo " Short hashes must be at least 7 characters (got ${#ref})." >&2
else
echo " Tried: tag, branch, git ls-remote prefix match" >&2
fi
echo " Use a full 40-char SHA, a tag name, a branch name, or a 7+ char short hash." >&2
return 1
}
# Resolve HEAD of a repo without needing to know the default branch name.
get_latest_hash() {
git ls-remote "${1}" HEAD 2>/dev/null | head -1 | cut -f1
}
echo "=========================================="
echo "llama-swap Unified Build (${BACKEND})"
echo "=========================================="
echo ""
# Resolve llama.cpp ref
if [[ -n "${LLAMA_REF:-}" ]]; then
LLAMA_HASH=$(resolve_ref "${LLAMA_REPO}" "${LLAMA_REF}") || exit 1
echo "llama.cpp: ${LLAMA_REF} -> ${LLAMA_HASH}"
else
LLAMA_HASH=$(get_latest_hash "${LLAMA_REPO}")
if [[ -z "${LLAMA_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for llama.cpp" >&2
exit 1
fi
echo "llama.cpp: latest HEAD: ${LLAMA_HASH}"
fi
# Resolve whisper.cpp ref
if [[ -n "${WHISPER_REF:-}" ]]; then
WHISPER_HASH=$(resolve_ref "${WHISPER_REPO}" "${WHISPER_REF}") || exit 1
echo "whisper.cpp: ${WHISPER_REF} -> ${WHISPER_HASH}"
else
WHISPER_HASH=$(get_latest_hash "${WHISPER_REPO}")
if [[ -z "${WHISPER_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for whisper.cpp" >&2
exit 1
fi
echo "whisper.cpp: latest HEAD: ${WHISPER_HASH}"
fi
# Resolve stable-diffusion.cpp ref
if [[ -n "${SD_REF:-}" ]]; then
SD_HASH=$(resolve_ref "${SD_REPO}" "${SD_REF}") || exit 1
echo "stable-diffusion.cpp: ${SD_REF} -> ${SD_HASH}"
else
SD_HASH=$(get_latest_hash "${SD_REPO}")
if [[ -z "${SD_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2
exit 1
fi
echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
fi
# Resolve llama-swap ref
if [[ -n "${LS_VERSION:-}" ]]; then
LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1
echo "llama-swap: ${LS_VERSION} -> ${LS_HASH}"
else
LS_HASH=$(get_latest_hash "${LLAMA_SWAP_REPO}")
if [[ -z "${LS_HASH}" ]]; then
echo "ERROR: Could not determine latest commit for llama-swap" >&2
exit 1
fi
echo "llama-swap: latest HEAD: ${LS_HASH}"
fi
echo ""
echo "=========================================="
echo "Starting Docker build..."
echo "=========================================="
echo ""
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BUILD_ARGS=(
--build-arg "BACKEND=${BACKEND}"
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
--build-arg "LS_VERSION=${LS_HASH}"
-t "${DOCKER_IMAGE_TAG}"
-f "${SCRIPT_DIR}/Dockerfile"
)
if [[ "$NO_CACHE" == true ]]; then
BUILD_ARGS+=(--no-cache)
echo "Note: Building without cache"
elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then
CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-${BACKEND}-cache"
BUILD_ARGS+=(
--cache-from "type=registry,ref=${CACHE_REF}"
--cache-to "type=registry,ref=${CACHE_REF},mode=max"
)
echo "Note: Using registry cache (${CACHE_REF})"
fi
DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"
echo ""
echo "=========================================="
echo "Verifying build artifacts..."
echo "=========================================="
echo ""
MISSING_BINARIES=()
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then
MISSING_BINARIES+=("${binary}")
fi
done
if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
echo "ERROR: Build succeeded but the following binaries are missing:"
for binary in "${MISSING_BINARIES[@]}"; do
echo " - ${binary}"
done
echo ""
echo "Try running with --no-cache flag:"
echo " ./build-image.sh --${BACKEND} --no-cache"
exit 1
fi
echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
echo ""
echo "=========================================="
echo "Build complete!"
echo "=========================================="
echo ""
echo "Image tag: ${DOCKER_IMAGE_TAG}"
echo ""
echo "Built with:"
echo " llama.cpp: ${LLAMA_HASH}"
echo " whisper.cpp: ${WHISPER_HASH}"
echo " stable-diffusion.cpp: ${SD_HASH}"
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
echo ""
if [[ "$BACKEND" == "vulkan" ]]; then
echo "Run with:"
echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
echo ""
echo "Note: For AMD GPUs, you may also need:"
echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
else
echo "Run with:"
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
fi
+33
View File
@@ -0,0 +1,33 @@
# placeholder example configuration
healthCheckTimeout: 300
logRequests: true
models:
"llama":
cmd: >
llama-server
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
--port ${PORT}
"whisper":
checkEndpoint: /v1/audio/transcriptions/
cmd: >
whisper-server
--port ${PORT}
--m /models/whisper.bin
--flash-attn
--request-path /v1/audio/transcriptions --inference-path ""
"image":
checkEndpoint: /
cmd: |
/app/sd-server
--listen-port 9999
--diffusion-fa
--diffusion-model /models/z_image_turbo-Q8_0.gguf
--vae /models/ae.safetensors
--llm /models/qwen3-4b-instruct-2507-q8_0.gguf
--offload-to-cpu
--cfg-scale 1.0
--height 512 --width 512
--steps 8
+59
View File
@@ -0,0 +1,59 @@
#!/bin/bash
# Install llama-swap - download latest release binary from GitHub
# Usage: ./install-llama-swap.sh [version]
# version: release version number (e.g., "170") or "latest" (default)
set -e
VERSION="${1:-latest}"
REPO="mostlygeek/llama-swap"
mkdir -p /install/bin
# If a full commit hash is given, find the release tag that points to it
if echo "${VERSION}" | grep -qE '^[0-9a-f]{40}$'; then
echo "=== Resolving commit ${VERSION:0:7} to release tag ==="
TAG=$(git ls-remote --tags "https://github.com/${REPO}.git" 2>/dev/null \
| grep "^${VERSION}" | sed 's|.*refs/tags/||' | grep -v '\^{}' | head -1)
if [ -n "${TAG}" ]; then
echo "Resolved to tag: ${TAG}"
VERSION="${TAG#v}"
else
echo "No release tag found for commit ${VERSION:0:7}, using latest"
VERSION="latest"
fi
fi
# Strip leading 'v' prefix so both "198" and "v198" work
VERSION="${VERSION#v}"
# Resolve "latest" to actual version number
if [ "$VERSION" = "latest" ]; then
echo "=== Resolving latest llama-swap release ==="
VERSION=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" \
| grep '"tag_name"' | head -1 | cut -d'"' -f4 | sed 's/^v//')
if [ -z "$VERSION" ]; then
echo "FATAL: Could not determine latest release version" >&2
exit 1
fi
echo "Latest version: ${VERSION}"
fi
# Download and extract
URL="https://github.com/${REPO}/releases/download/v${VERSION}/llama-swap_${VERSION}_linux_amd64.tar.gz"
echo "=== Downloading llama-swap v${VERSION} ==="
echo "URL: $URL"
curl -fSL -o /tmp/llama-swap.tar.gz "$URL"
tar -xzf /tmp/llama-swap.tar.gz -C /install/bin/
rm /tmp/llama-swap.tar.gz
# Validate
if [ ! -x "/install/bin/llama-swap" ]; then
echo "FATAL: llama-swap binary not found or not executable" >&2
ls -la /install/bin/ >&2
exit 1
fi
echo "$VERSION" > /install/llama-swap-version
echo "=== llama-swap v${VERSION} installed ==="
ls -la /install/bin/llama-swap
+65
View File
@@ -0,0 +1,65 @@
#!/bin/bash
# Install llama.cpp - clone, build, and install binaries
# Usage: BACKEND=cuda|vulkan ./install-llama.sh <commit_hash>
set -e
COMMIT_HASH="${1:-master}"
BACKEND="${BACKEND:-cuda}"
mkdir -p /install/bin /install/lib
# Clone and checkout (init-based so cache-mounted /src/llama.cpp/build dir doesn't break clone)
echo "=== Cloning llama.cpp at ${COMMIT_HASH} ==="
mkdir -p /src/llama.cpp
cd /src/llama.cpp
if [ ! -d .git ]; then
git init
git remote add origin https://github.com/ggml-org/llama.cpp.git
fi
git fetch --depth=1 origin "${COMMIT_HASH}"
git checkout FETCH_HEAD
# Common cmake flags
CMAKE_FLAGS=(
-DGGML_NATIVE=OFF
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DLLAMA_BUILD_TESTS=OFF
)
if [ "$BACKEND" = "cuda" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=ON
-DGGML_VULKAN=OFF
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}"
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
)
elif [ "$BACKEND" = "vulkan" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=OFF
-DGGML_VULKAN=ON
)
fi
TARGETS=(llama-cli llama-server)
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
echo "=== Building llama.cpp for ${BACKEND} ==="
cmake -B build "${CMAKE_FLAGS[@]}"
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
for bin in "${TARGETS[@]}"; do
if [ ! -f "build/bin/$bin" ]; then
echo "FATAL: $bin not found in build/bin/" >&2
exit 1
fi
cp "build/bin/$bin" "/install/bin/"
done
find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
echo "=== llama.cpp build complete ==="
ls -la /install/bin/
+68
View File
@@ -0,0 +1,68 @@
#!/bin/bash
# Install stable-diffusion.cpp - clone, build, and install binaries and library
# Usage: BACKEND=cuda|vulkan ./install-sd.sh <commit_hash>
set -e
COMMIT_HASH="${1:-master}"
BACKEND="${BACKEND:-cuda}"
mkdir -p /install/bin /install/lib
# Clone and checkout (init-based so cache-mounted /src/stable-diffusion.cpp/build dir doesn't break clone)
echo "=== Cloning stable-diffusion.cpp at ${COMMIT_HASH} ==="
mkdir -p /src/stable-diffusion.cpp
cd /src/stable-diffusion.cpp
if [ ! -d .git ]; then
git init
git remote add origin https://github.com/leejet/stable-diffusion.cpp.git
fi
git fetch --depth=1 origin "${COMMIT_HASH}"
git checkout FETCH_HEAD
git submodule update --init --recursive --depth=1
# Common cmake flags
CMAKE_FLAGS=(
-DGGML_NATIVE=OFF
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DSD_BUILD_EXAMPLES=ON
)
if [ "$BACKEND" = "cuda" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=ON
-DGGML_VULKAN=OFF
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}"
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
-DSD_CUDA=ON
)
elif [ "$BACKEND" = "vulkan" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=OFF
-DGGML_VULKAN=ON
-DSD_VULKAN=ON
)
fi
TARGETS=(stable-diffusion sd-cli sd-server)
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
echo "=== Building stable-diffusion.cpp for ${BACKEND} ==="
cmake -B build "${CMAKE_FLAGS[@]}"
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
for bin in sd-cli sd-server; do
if [ ! -f "build/bin/$bin" ]; then
echo "FATAL: $bin not found in build/bin/" >&2
exit 1
fi
cp "build/bin/$bin" "/install/bin/"
done
find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
echo "=== stable-diffusion.cpp build complete ==="
ls -la /install/bin/ /install/lib/
+64
View File
@@ -0,0 +1,64 @@
#!/bin/bash
# Install whisper.cpp - clone, build, and install binaries
# Usage: BACKEND=cuda|vulkan ./install-whisper.sh <commit_hash>
set -e
COMMIT_HASH="${1:-master}"
BACKEND="${BACKEND:-cuda}"
mkdir -p /install/bin /install/lib
# Clone and checkout (init-based so cache-mounted /src/whisper.cpp/build dir doesn't break clone)
echo "=== Cloning whisper.cpp at ${COMMIT_HASH} ==="
mkdir -p /src/whisper.cpp
cd /src/whisper.cpp
if [ ! -d .git ]; then
git init
git remote add origin https://github.com/ggml-org/whisper.cpp.git
fi
git fetch --depth=1 origin "${COMMIT_HASH}"
git checkout FETCH_HEAD
# Common cmake flags
CMAKE_FLAGS=(
-DGGML_NATIVE=OFF
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_C_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
)
if [ "$BACKEND" = "cuda" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=ON
-DGGML_VULKAN=OFF
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}"
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
)
elif [ "$BACKEND" = "vulkan" ]; then
CMAKE_FLAGS+=(
-DGGML_CUDA=OFF
-DGGML_VULKAN=ON
)
fi
TARGETS=(whisper-cli whisper-server)
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
echo "=== Building whisper.cpp for ${BACKEND} ==="
cmake -B build "${CMAKE_FLAGS[@]}"
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
for bin in "${TARGETS[@]}"; do
if [ ! -f "build/bin/$bin" ]; then
echo "FATAL: $bin not found in build/bin/" >&2
exit 1
fi
cp "build/bin/$bin" "/install/bin/"
done
find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
echo "=== whisper.cpp build complete ==="
ls -la /install/bin/
+1 -1
View File
@@ -1,6 +1,6 @@
module github.com/mostlygeek/llama-swap
go 1.25.4
go 1.26.1
require (
github.com/billziss-gh/golib v0.2.0
+5
View File
@@ -346,6 +346,11 @@ func (pm *ProxyManager) setupGinEngine() {
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
// sd.cpp /sdapi/v1 endpoints
pm.ginEngine.POST("/sdapi/v1/txt2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/sdapi/v1/img2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.GET("/sdapi/v1/loras", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
// in proxymanager_loghandlers.go
+79
View File
@@ -1659,3 +1659,82 @@ models:
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
})
}
func TestProxyManager_SdApiTxt2ImgRouting(t *testing.T) {
conf := config.AddDefaultGroupToConfig(config.Config{
HealthCheckTimeout: 15,
Models: map[string]config.ModelConfig{
"sd-model": getTestSimpleResponderConfig("sd-model"),
},
LogLevel: "error",
})
proxy := New(conf)
defer proxy.StopProcesses(StopWaitForInflightRequest)
t.Run("successful txt2img with model", func(t *testing.T) {
reqBody := `{"model":"sd-model","prompt":"a cat"}`
req := httptest.NewRequest("POST", "/sdapi/v1/txt2img", bytes.NewBufferString(reqBody))
w := CreateTestResponseRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), "sd-model")
})
t.Run("successful img2img with model", func(t *testing.T) {
reqBody := `{"model":"sd-model","prompt":"a cat","init_images":[]}`
req := httptest.NewRequest("POST", "/sdapi/v1/img2img", bytes.NewBufferString(reqBody))
w := CreateTestResponseRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), "sd-model")
})
t.Run("missing model returns 400", func(t *testing.T) {
reqBody := `{"prompt":"a cat"}`
req := httptest.NewRequest("POST", "/sdapi/v1/txt2img", bytes.NewBufferString(reqBody))
w := CreateTestResponseRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusBadRequest, w.Code)
assert.Contains(t, w.Body.String(), "missing or invalid 'model' key")
})
}
func TestProxyManager_SdApiGetLoras(t *testing.T) {
conf := config.AddDefaultGroupToConfig(config.Config{
HealthCheckTimeout: 15,
Models: map[string]config.ModelConfig{
"sd-model": getTestSimpleResponderConfig("sd-model"),
},
LogLevel: "error",
})
proxy := New(conf)
defer proxy.StopProcesses(StopWaitForInflightRequest)
t.Run("successful GET loras with model query param", func(t *testing.T) {
req := httptest.NewRequest("GET", "/sdapi/v1/loras?model=sd-model", nil)
w := CreateTestResponseRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
})
t.Run("missing model query param returns 400", func(t *testing.T) {
req := httptest.NewRequest("GET", "/sdapi/v1/loras", nil)
w := CreateTestResponseRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusBadRequest, w.Code)
assert.Contains(t, w.Body.String(), "missing required 'model' query parameter")
})
t.Run("unknown model returns 400", func(t *testing.T) {
req := httptest.NewRequest("GET", "/sdapi/v1/loras?model=nonexistent", nil)
w := CreateTestResponseRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusBadRequest, w.Code)
assert.Contains(t, w.Body.String(), "could not find suitable handler")
})
}
@@ -2,26 +2,91 @@
import { models } from "../../stores/api";
import { persistentStore } from "../../stores/persistent";
import { generateImage } from "../../lib/imageApi";
import { generateSdImage, fetchSdLoras } from "../../lib/sdApi";
import { playgroundStores } from "../../stores/playgroundActivity";
import ModelSelector from "./ModelSelector.svelte";
import ExpandableTextarea from "./ExpandableTextarea.svelte";
import type { ImageApiMode, SdApiLora, SdApiLoraRef } from "../../lib/types";
const selectedModelStore = persistentStore<string>("playground-image-model", "");
const selectedSizeStore = persistentStore<string>("playground-image-size", "1024x1024");
const apiModeStore = persistentStore<ImageApiMode>("playground-image-api-mode", "openai");
// SDAPI persistent settings
const sdNegativePromptStore = persistentStore<string>("playground-sdapi-negative-prompt", "");
const sdStepsStore = persistentStore<number>("playground-sdapi-steps", 20);
const sdCfgScaleStore = persistentStore<number>("playground-sdapi-cfg-scale", 7);
const sdSeedStore = persistentStore<number>("playground-sdapi-seed", -1);
const sdSamplerStore = persistentStore<string>("playground-sdapi-sampler", "");
const sdSchedulerStore = persistentStore<string>("playground-sdapi-scheduler", "");
const sdBatchSizeStore = persistentStore<number>("playground-sdapi-batch-size", 1);
let prompt = $state("");
let isGenerating = $state(false);
let generatedImage = $state<string | null>(null);
let generatedImages = $state<string[]>([]);
let error = $state<string | null>(null);
let abortController = $state<AbortController | null>(null);
let showFullscreen = $state(false);
let fullscreenIndex = $state(0);
let showSettings = $state(false);
// SDAPI lora state
let availableLoras = $state<SdApiLora[]>([]);
let selectedLoras = $state<SdApiLoraRef[]>([]);
let isLoadingLoras = $state(false);
let lorasLoaded = $state(false);
let loraError = $state<string | null>(null);
let hasModels = $derived($models.some((m) => !m.unlisted));
let isSdapi = $derived($apiModeStore === "sdapi");
$effect(() => {
playgroundStores.imageGenerating.set(isGenerating);
});
async function loadLoras() {
if (!$selectedModelStore || isLoadingLoras) return;
isLoadingLoras = true;
loraError = null;
try {
const loras = await fetchSdLoras($selectedModelStore);
availableLoras = loras;
lorasLoaded = true;
} catch (err) {
availableLoras = [];
loraError = err instanceof Error ? err.message : "Failed to load LoRAs";
lorasLoaded = false;
} finally {
isLoadingLoras = false;
}
}
function addLora(event: Event) {
const select = event.target as HTMLSelectElement;
const path = select.value;
if (!path) return;
const lora = availableLoras.find((l) => l.path === path);
if (lora && !selectedLoras.some((l) => l.path === path)) {
selectedLoras = [...selectedLoras, { path: lora.path, multiplier: 1.0 }];
}
select.value = "";
}
function removeLora(path: string) {
selectedLoras = selectedLoras.filter((l) => l.path !== path);
}
function updateLoraMultiplier(path: string, multiplier: number) {
selectedLoras = selectedLoras.map((l) =>
l.path === path ? { ...l, multiplier } : l
);
}
function getLoraName(path: string): string {
return availableLoras.find((l) => l.path === path)?.name ?? path;
}
async function generate() {
const trimmedPrompt = prompt.trim();
if (!trimmedPrompt || !$selectedModelStore || isGenerating) return;
@@ -31,19 +96,44 @@
abortController = new AbortController();
try {
const response = await generateImage(
$selectedModelStore,
trimmedPrompt,
$selectedSizeStore,
abortController.signal
);
if (isSdapi) {
const [w, h] = $selectedSizeStore.split("x").map(Number);
const request = {
model: $selectedModelStore,
prompt: trimmedPrompt,
negative_prompt: $sdNegativePromptStore || undefined,
width: w,
height: h,
steps: $sdStepsStore,
cfg_scale: $sdCfgScaleStore,
seed: $sdSeedStore,
batch_size: $sdBatchSizeStore,
sampler_name: $sdSamplerStore || undefined,
scheduler: $sdSchedulerStore || undefined,
lora: selectedLoras.length > 0 ? selectedLoras : undefined,
};
if (response.data && response.data.length > 0) {
const imageData = response.data[0];
if (imageData.b64_json) {
generatedImage = `data:image/png;base64,${imageData.b64_json}`;
} else if (imageData.url) {
generatedImage = imageData.url;
const response = await generateSdImage(request, abortController.signal);
if (response.images && response.images.length > 0) {
generatedImages = response.images.map(
(img) => `data:image/png;base64,${img}`
);
}
} else {
const response = await generateImage(
$selectedModelStore,
trimmedPrompt,
$selectedSizeStore,
abortController.signal
);
if (response.data && response.data.length > 0) {
const imageData = response.data[0];
if (imageData.b64_json) {
generatedImages = [`data:image/png;base64,${imageData.b64_json}`];
} else if (imageData.url) {
generatedImages = [imageData.url];
}
}
}
} catch (err) {
@@ -63,28 +153,29 @@
}
function clearImage() {
generatedImage = null;
generatedImages = [];
error = null;
prompt = "";
}
function downloadImage() {
if (!generatedImage) return;
function downloadImage(index: number = 0) {
const img = generatedImages[index];
if (!img) return;
const link = document.createElement("a");
link.href = generatedImage;
link.download = `generated-image-${Date.now()}.png`;
link.href = img;
link.download = `generated-image-${Date.now()}-${index}.png`;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
function openFullscreen() {
function openFullscreen(index: number = 0) {
fullscreenIndex = index;
showFullscreen = true;
}
function closeFullscreen(event?: MouseEvent) {
// Only close if clicking the background, not the image
if (event && event.target !== event.currentTarget) {
return;
}
@@ -100,9 +191,19 @@
</script>
<div class="flex flex-col h-full">
<!-- Model selector -->
<!-- Model selector and mode toggle -->
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an image model..." disabled={isGenerating} />
<select
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$apiModeStore}
disabled={isGenerating}
>
<option value="openai">OpenAI</option>
<option value="sdapi">SDAPI</option>
</select>
<select
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$selectedSizeStore}
@@ -123,8 +224,166 @@
<option value="1024x1792">1024x1792 (SDXL)</option>
</optgroup>
</select>
{#if isSdapi}
<button
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors"
onclick={() => showSettings = !showSettings}
>
{showSettings ? "Hide Settings" : "Settings"}
</button>
{/if}
</div>
<!-- SDAPI Settings Panel -->
{#if isSdapi && showSettings}
<div class="shrink-0 mb-4 p-4 rounded border border-gray-200 dark:border-white/10 bg-surface">
<div class="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
<label class="flex flex-col gap-1">
<span class="text-xs text-txtsecondary">Steps</span>
<input
type="number"
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$sdStepsStore}
min="1"
max="150"
/>
</label>
<label class="flex flex-col gap-1">
<span class="text-xs text-txtsecondary">CFG Scale</span>
<input
type="number"
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$sdCfgScaleStore}
min="1"
max="30"
step="0.5"
/>
</label>
<label class="flex flex-col gap-1">
<span class="text-xs text-txtsecondary">Seed (-1 = random)</span>
<input
type="number"
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$sdSeedStore}
min="-1"
/>
</label>
<label class="flex flex-col gap-1">
<span class="text-xs text-txtsecondary">Batch Size</span>
<input
type="number"
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$sdBatchSizeStore}
min="1"
max="8"
/>
</label>
<label class="flex flex-col gap-1">
<span class="text-xs text-txtsecondary">Sampler</span>
<select
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$sdSamplerStore}
>
<option value="">Default</option>
<option value="euler_a">euler_a</option>
<option value="euler">euler</option>
<option value="heun">heun</option>
<option value="dpm2">dpm2</option>
<option value="dpmpp2s_a">dpmpp2s_a</option>
<option value="dpmpp2m">dpmpp2m</option>
<option value="dpmpp2mv2">dpmpp2mv2</option>
<option value="ipndm">ipndm</option>
<option value="ipndm_v">ipndm_v</option>
<option value="lcm">lcm</option>
<option value="ddim_trailing">ddim_trailing</option>
<option value="tcd">tcd</option>
</select>
</label>
<label class="flex flex-col gap-1">
<span class="text-xs text-txtsecondary">Scheduler</span>
<select
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
bind:value={$sdSchedulerStore}
>
<option value="">Auto for model</option>
<option value="discrete">discrete</option>
<option value="karras">karras</option>
<option value="exponential">exponential</option>
<option value="ays">ays</option>
<option value="gits">gits</option>
</select>
</label>
</div>
<label class="flex flex-col gap-1 mb-3">
<span class="text-xs text-txtsecondary">Negative Prompt</span>
<textarea
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-y text-sm"
bind:value={$sdNegativePromptStore}
rows="2"
placeholder="Elements to avoid..."
></textarea>
</label>
<!-- LoRA Selection -->
<div>
<span class="text-xs text-txtsecondary block mb-1">LoRAs</span>
<div class="flex items-center gap-2 mb-2">
<button
class="px-3 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors disabled:opacity-50"
onclick={loadLoras}
disabled={!$selectedModelStore || isLoadingLoras}
>
{isLoadingLoras ? "Loading..." : lorasLoaded ? "Reload LoRAs" : "Load LoRAs"}
</button>
{#if lorasLoaded && availableLoras.length > 0}
<select
class="flex-1 px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
onchange={addLora}
>
<option value="">Add a LoRA...</option>
{#each availableLoras.filter((l) => !selectedLoras.some((s) => s.path === l.path)) as lora}
<option value={lora.path}>{lora.name}</option>
{/each}
</select>
{/if}
</div>
{#if loraError}
<p class="text-xs text-red-500 mb-1">{loraError}</p>
{/if}
{#if lorasLoaded && availableLoras.length === 0}
<p class="text-xs text-txtsecondary">No LoRAs available</p>
{/if}
{#if selectedLoras.length > 0}
<div class="flex flex-col gap-1.5">
{#each selectedLoras as lora}
<div class="flex items-center gap-2 text-sm">
<span class="flex-1 truncate">{getLoraName(lora.path)}</span>
<input
type="number"
class="w-20 px-1.5 py-1 text-xs rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-1 focus:ring-primary"
value={lora.multiplier}
oninput={(e) => updateLoraMultiplier(lora.path, parseFloat((e.target as HTMLInputElement).value) || 1)}
min="0"
max="2"
step="0.1"
/>
<button
class="px-1.5 py-0.5 text-xs rounded border border-gray-200 dark:border-white/10 hover:bg-red-500 hover:text-white hover:border-red-500 transition-colors"
onclick={() => removeLora(lora.path)}
aria-label="Remove LoRA"
>
x
</button>
</div>
{/each}
</div>
{/if}
</div>
</div>
{/if}
<!-- Empty state for no models configured -->
{#if !hasModels}
<div class="flex-1 flex items-center justify-center text-txtsecondary">
@@ -143,22 +402,50 @@
<p class="font-medium">Error</p>
<p class="text-sm mt-1">{error}</p>
</div>
{:else if generatedImage}
{:else if generatedImages.length > 1}
<!-- Grid for multiple images (batch) -->
<div class="grid grid-cols-2 gap-2 p-2 w-full h-full overflow-auto">
{#each generatedImages as img, i}
<div class="relative flex items-center justify-center">
<button
class="p-0 border-0 bg-transparent cursor-pointer"
onclick={() => openFullscreen(i)}
aria-label="View fullscreen"
>
<img
src={img}
alt="AI generated content {i + 1}"
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
/>
</button>
<button
class="absolute bottom-2 right-2 p-1.5 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
onclick={(e) => { e.stopPropagation(); downloadImage(i); }}
aria-label="Download image"
>
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path>
</svg>
</button>
</div>
{/each}
</div>
{:else if generatedImages.length === 1}
<div class="relative max-w-full max-h-full flex items-center justify-center">
<button
class="p-0 border-0 bg-transparent cursor-pointer"
onclick={openFullscreen}
onclick={() => openFullscreen(0)}
aria-label="View fullscreen"
>
<img
src={generatedImage}
src={generatedImages[0]}
alt="AI generated content"
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
/>
</button>
<button
class="absolute bottom-2 right-2 p-2 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
onclick={(e) => { e.stopPropagation(); downloadImage(); }}
onclick={(e) => { e.stopPropagation(); downloadImage(0); }}
aria-label="Download image"
>
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -198,7 +485,7 @@
<button
class="btn flex-1 md:flex-none"
onclick={clearImage}
disabled={!generatedImage && !error && !prompt.trim()}
disabled={generatedImages.length === 0 && !error && !prompt.trim()}
>
Clear
</button>
@@ -209,7 +496,7 @@
</div>
<!-- Fullscreen dialog -->
{#if showFullscreen && generatedImage}
{#if showFullscreen && generatedImages[fullscreenIndex]}
<div
class="fixed inset-0 bg-black/90 z-50 flex items-center justify-center p-4"
onclick={(e) => closeFullscreen(e)}
@@ -226,7 +513,7 @@
×
</button>
<img
src={generatedImage}
src={generatedImages[fullscreenIndex]}
alt="AI generated content"
class="max-w-full max-h-full object-contain pointer-events-none"
/>
+39
View File
@@ -0,0 +1,39 @@
import type { SdApiTxt2ImgRequest, SdApiResponse, SdApiLora } from "./types";
export async function generateSdImage(
request: SdApiTxt2ImgRequest,
signal?: AbortSignal
): Promise<SdApiResponse> {
const response = await fetch("/sdapi/v1/txt2img", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(request),
signal,
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`SDAPI error: ${response.status} - ${errorText}`);
}
return response.json();
}
export async function fetchSdLoras(
model: string,
signal?: AbortSignal
): Promise<SdApiLora[]> {
const response = await fetch(
`/sdapi/v1/loras?model=${encodeURIComponent(model)}`,
{ signal }
);
if (!response.ok) {
const errorText = await response.text();
throw new Error(`SDAPI loras error: ${response.status} - ${errorText}`);
}
return response.json();
}
+34
View File
@@ -115,6 +115,40 @@ export interface ImageGenerationResponse {
}>;
}
// SDAPI types (stable-diffusion.cpp)
export type ImageApiMode = "openai" | "sdapi";
export interface SdApiLora {
name: string;
path: string;
}
export interface SdApiLoraRef {
path: string;
multiplier: number;
}
export interface SdApiTxt2ImgRequest {
model?: string;
prompt: string;
negative_prompt?: string;
width?: number;
height?: number;
steps?: number;
cfg_scale?: number;
seed?: number;
batch_size?: number;
sampler_name?: string;
scheduler?: string;
lora?: SdApiLoraRef[];
}
export interface SdApiResponse {
images: string[];
parameters: Record<string, unknown>;
info: string;
}
export interface AudioTranscriptionRequest {
file: File;
model: string;
+1
View File
@@ -32,6 +32,7 @@ export default defineConfig({
"/upstream": "http://localhost:8080",
"/unload": "http://localhost:8080",
"/v1": "http://localhost:8080",
"/sdapi": "http://localhost:8080",
},
},
});