diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml
index a67c5100..53b79b34 100644
--- a/.github/workflows/containers.yml
+++ b/.github/workflows/containers.yml
@@ -33,7 +33,7 @@ jobs:
       fail-fast: false
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
 
       - name: Free up disk space
         if: matrix.platform == 'rocm'
@@ -48,8 +48,18 @@ jobs:
           echo "After cleanup:"
           df -h
 
+      # QEMU enables arm64 cross-builds on the amd64 GitHub runner.
+      # Currently only the cpu backend goes multi-arch; the action is a
+      # no-op for amd64-only builds, so leaving it on for every matrix
+      # entry keeps the workflow simple.
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v4
+
       - name: Log in to GitHub Container Registry
-        uses: docker/login-action@v2
+        uses: docker/login-action@v4
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
@@ -64,6 +74,9 @@ jobs:
   # see: https://github.com/actions/delete-package-versions/issues/74
   delete-untagged-containers:
     needs: build-and-push
+    # Skip on forks — the delete API requires package-admin on the
+    # upstream account and would otherwise red-x every fork CI run.
+    if: github.repository == 'mostlygeek/llama-swap'
     runs-on: ubuntu-latest
     steps:
       - uses: actions/delete-package-versions@v5
diff --git a/docker/build-container.sh b/docker/build-container.sh
index d3296d3a..242f69d2 100755
--- a/docker/build-container.sh
+++ b/docker/build-container.sh
@@ -46,13 +46,31 @@ fi
 BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp}
 SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp}
 
-# Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable
-# to enable easy container builds on forked repos
+# LS_REPO is the destination of the built container image — defaults to the
+# current GitHub repository so forked CI builds publish to the fork's own
+# ghcr.io namespace without code changes.
 LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap}
 
+# LS_BINARY_REPO is where the llama-swap release tarball is downloaded
+# from. Decoupled from LS_REPO so forks (which usually have no releases of
+# their own) can still build a container by pulling the canonical binary
+# from upstream. Override via the LS_BINARY_REPO env var when you maintain
+# fork-side releases.
+LS_BINARY_REPO=${LS_BINARY_REPO:-mostlygeek/llama-swap}
+
 # the most recent llama-swap tag
-# have to strip out the 'v' due to .tar.gz file naming
-LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//')
+# have to strip out the 'v' due to .tar.gz file naming.
+# Authenticated request — unauth'd github.com API is 60/hr per IP and GHA
+# runners share IPs, so the call regularly returns rate-limit JSON and
+# `.tag_name` then resolves to "null", producing a bogus `vnull` URL below.
+LS_VER=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
+    "https://api.github.com/repos/${LS_BINARY_REPO}/releases/latest" \
+    | jq -r .tag_name | sed 's/v//')
+
+if [[ -z "$LS_VER" || "$LS_VER" == "null" ]]; then
+    log_info "Error: could not resolve latest llama-swap release tag from ${LS_BINARY_REPO}"
+    exit 1
+fi
 
 # Fetches the most recent llama.cpp tag matching the given prefix
 # Handles pagination to search beyond the first 100 results
@@ -126,6 +144,25 @@ if [[ ! -z "$DEBUG_ABORT_BUILD" ]]; then
     exit 0
 fi
 
+# cpu is the only backend with a multi-arch upstream base
+# (ghcr.io/ggml-org/llama.cpp:server-bXXXX ships amd64+arm64); GPU backends
+# are amd64-only and stay on the original `docker build` path so the
+# sd-server layer can still FROM the just-built image via the local
+# dockerd image store (buildx's container driver has a separate store
+# that doesn't share with dockerd, which breaks the sd build).
+if [ "$ARCH" == "cpu" ]; then
+    if [ "$PUSH_IMAGES" == "true" ]; then
+        BUILDX_FLAGS="--push --platform linux/amd64,linux/arm64"
+    else
+        # Smoke build: validate both platforms but emit no output. buildx
+        # on the docker-container driver defaults to cacheonly when
+        # neither --push nor --load is given, so each arch fully builds
+        # and a regression in either fails CI — without materializing the
+        # image or needing to --load (which is multi-arch-incompatible).
+        BUILDX_FLAGS="--platform linux/amd64,linux/arm64"
+    fi
+fi
+
 for CONTAINER_TYPE in non-root root; do
   CONTAINER_TAG="ghcr.io/${LS_REPO}:v${LS_VER}-${ARCH}-${LCPP_TAG}"
   CONTAINER_LATEST="ghcr.io/${LS_REPO}:${ARCH}"
@@ -142,11 +179,23 @@ for CONTAINER_TYPE in non-root root; do
   fi
 
   log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
-  docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
-    --build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
-    --build-arg BASE_IMAGE=${BASE_IMAGE} .
+  if [ "$ARCH" == "cpu" ]; then
+    docker buildx build $BUILDX_FLAGS --provenance=false \
+      -f llama-swap.Containerfile \
+      --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
+      --build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \
+      --build-arg BASE_IMAGE=${BASE_IMAGE} \
+      -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} .
+  else
+    docker build --provenance=false -f llama-swap.Containerfile \
+      --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
+      --build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \
+      -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
+      --build-arg BASE_IMAGE=${BASE_IMAGE} .
+  fi
 
-  # For architectures with stable-diffusion.cpp support, layer sd-server on top
+  # For architectures with stable-diffusion.cpp support, layer sd-server on top.
+  # Stays on `docker build` so the base resolves from local dockerd.
   case "$ARCH" in
     "musa" | "vulkan")
       log_info "Adding sd-server to $CONTAINER_TAG"
@@ -157,7 +206,8 @@ for CONTAINER_TYPE in non-root root; do
         -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;;
   esac
 
-  if [ "$PUSH_IMAGES" == "true" ]; then
+  # cpu builds push inline via buildx --push; all other archs push here.
+  if [ "$ARCH" != "cpu" ] && [ "$PUSH_IMAGES" == "true" ]; then
     docker push ${CONTAINER_TAG}
     docker push ${CONTAINER_LATEST}
   fi
diff --git a/docker/llama-swap.Containerfile b/docker/llama-swap.Containerfile
index 41a53594..7d47e02b 100644
--- a/docker/llama-swap.Containerfile
+++ b/docker/llama-swap.Containerfile
@@ -3,6 +3,9 @@ ARG BASE_TAG=server-cuda
 FROM ${BASE_IMAGE}:${BASE_TAG}
 
 # has to be after the FROM
+# TARGETARCH is auto-set by `docker buildx build --platform …` (amd64/arm64);
+# falls back to amd64 when an older `docker build` runs without buildx.
+ARG TARGETARCH=amd64
 ARG LS_VER=170
 ARG LS_REPO=mostlygeek/llama-swap
 
@@ -34,9 +37,9 @@ WORKDIR /app
 ENV PATH="/app:${PATH}"
 
 RUN \
-    curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
-    tar -zxf "llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
-    rm "llama-swap_${LS_VER}_linux_amd64.tar.gz"
+    curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \
+    tar -zxf "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \
+    rm "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz"
 
 COPY --chown=$UID:$GID config.example.yaml /app/config.yaml