diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index a67c5100..53b79b34 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -33,7 +33,7 @@ jobs: fail-fast: false steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Free up disk space if: matrix.platform == 'rocm' @@ -48,8 +48,18 @@ jobs: echo "After cleanup:" df -h + # QEMU enables arm64 cross-builds on the amd64 GitHub runner. + # Currently only the cpu backend goes multi-arch; the action is a + # no-op for amd64-only builds, so leaving it on for every matrix + # entry keeps the workflow simple. + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + - name: Log in to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.actor }} @@ -64,6 +74,9 @@ jobs: # see: https://github.com/actions/delete-package-versions/issues/74 delete-untagged-containers: needs: build-and-push + # Skip on forks — the delete API requires package-admin on the + # upstream account and would otherwise red-x every fork CI run. + if: github.repository == 'mostlygeek/llama-swap' runs-on: ubuntu-latest steps: - uses: actions/delete-package-versions@v5 diff --git a/docker/build-container.sh b/docker/build-container.sh index d3296d3a..242f69d2 100755 --- a/docker/build-container.sh +++ b/docker/build-container.sh @@ -46,13 +46,31 @@ fi BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp} SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp} -# Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable -# to enable easy container builds on forked repos +# LS_REPO is the destination of the built container image — defaults to the +# current GitHub repository so forked CI builds publish to the fork's own +# ghcr.io namespace without code changes. LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap} +# LS_BINARY_REPO is where the llama-swap release tarball is downloaded +# from. Decoupled from LS_REPO so forks (which usually have no releases of +# their own) can still build a container by pulling the canonical binary +# from upstream. Override via the LS_BINARY_REPO env var when you maintain +# fork-side releases. +LS_BINARY_REPO=${LS_BINARY_REPO:-mostlygeek/llama-swap} + # the most recent llama-swap tag -# have to strip out the 'v' due to .tar.gz file naming -LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//') +# have to strip out the 'v' due to .tar.gz file naming. +# Authenticated request — unauth'd github.com API is 60/hr per IP and GHA +# runners share IPs, so the call regularly returns rate-limit JSON and +# `.tag_name` then resolves to "null", producing a bogus `vnull` URL below. +LS_VER=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/${LS_BINARY_REPO}/releases/latest" \ + | jq -r .tag_name | sed 's/v//') + +if [[ -z "$LS_VER" || "$LS_VER" == "null" ]]; then + log_info "Error: could not resolve latest llama-swap release tag from ${LS_BINARY_REPO}" + exit 1 +fi # Fetches the most recent llama.cpp tag matching the given prefix # Handles pagination to search beyond the first 100 results @@ -126,6 +144,25 @@ if [[ ! -z "$DEBUG_ABORT_BUILD" ]]; then exit 0 fi +# cpu is the only backend with a multi-arch upstream base +# (ghcr.io/ggml-org/llama.cpp:server-bXXXX ships amd64+arm64); GPU backends +# are amd64-only and stay on the original `docker build` path so the +# sd-server layer can still FROM the just-built image via the local +# dockerd image store (buildx's container driver has a separate store +# that doesn't share with dockerd, which breaks the sd build). +if [ "$ARCH" == "cpu" ]; then + if [ "$PUSH_IMAGES" == "true" ]; then + BUILDX_FLAGS="--push --platform linux/amd64,linux/arm64" + else + # Smoke build: validate both platforms but emit no output. buildx + # on the docker-container driver defaults to cacheonly when + # neither --push nor --load is given, so each arch fully builds + # and a regression in either fails CI — without materializing the + # image or needing to --load (which is multi-arch-incompatible). + BUILDX_FLAGS="--platform linux/amd64,linux/arm64" + fi +fi + for CONTAINER_TYPE in non-root root; do CONTAINER_TAG="ghcr.io/${LS_REPO}:v${LS_VER}-${ARCH}-${LCPP_TAG}" CONTAINER_LATEST="ghcr.io/${LS_REPO}:${ARCH}" @@ -142,11 +179,23 @@ for CONTAINER_TYPE in non-root root; do fi log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER" - docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \ - --build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \ - --build-arg BASE_IMAGE=${BASE_IMAGE} . + if [ "$ARCH" == "cpu" ]; then + docker buildx build $BUILDX_FLAGS --provenance=false \ + -f llama-swap.Containerfile \ + --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \ + --build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \ + --build-arg BASE_IMAGE=${BASE_IMAGE} \ + -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . + else + docker build --provenance=false -f llama-swap.Containerfile \ + --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \ + --build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \ + -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \ + --build-arg BASE_IMAGE=${BASE_IMAGE} . + fi - # For architectures with stable-diffusion.cpp support, layer sd-server on top + # For architectures with stable-diffusion.cpp support, layer sd-server on top. + # Stays on `docker build` so the base resolves from local dockerd. case "$ARCH" in "musa" | "vulkan") log_info "Adding sd-server to $CONTAINER_TAG" @@ -157,7 +206,8 @@ for CONTAINER_TYPE in non-root root; do -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;; esac - if [ "$PUSH_IMAGES" == "true" ]; then + # cpu builds push inline via buildx --push; all other archs push here. + if [ "$ARCH" != "cpu" ] && [ "$PUSH_IMAGES" == "true" ]; then docker push ${CONTAINER_TAG} docker push ${CONTAINER_LATEST} fi diff --git a/docker/llama-swap.Containerfile b/docker/llama-swap.Containerfile index 41a53594..7d47e02b 100644 --- a/docker/llama-swap.Containerfile +++ b/docker/llama-swap.Containerfile @@ -3,6 +3,9 @@ ARG BASE_TAG=server-cuda FROM ${BASE_IMAGE}:${BASE_TAG} # has to be after the FROM +# TARGETARCH is auto-set by `docker buildx build --platform …` (amd64/arm64); +# falls back to amd64 when an older `docker build` runs without buildx. +ARG TARGETARCH=amd64 ARG LS_VER=170 ARG LS_REPO=mostlygeek/llama-swap @@ -34,9 +37,9 @@ WORKDIR /app ENV PATH="/app:${PATH}" RUN \ - curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_amd64.tar.gz" && \ - tar -zxf "llama-swap_${LS_VER}_linux_amd64.tar.gz" && \ - rm "llama-swap_${LS_VER}_linux_amd64.tar.gz" + curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \ + tar -zxf "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \ + rm "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" COPY --chown=$UID:$GID config.example.yaml /app/config.yaml