Multi arch cpu (#746)
Encountered a similar problem as in https://github.com/mostlygeek/llama-swap/issues/709 but in my case I only needed the :cpu version. So decided to add the github action to build arm64 combined with the amd64 version on the same :cpu tag. Already tested it from this fork: ghcr.io/rhtenhove/llama-swap:cpu and it works perfectly fine. Adding GPU support is a whole other beast, needing quite a bit more work and isn't something I can test.
This commit is contained in:
@@ -33,7 +33,7 @@ jobs:
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Free up disk space
|
||||
if: matrix.platform == 'rocm'
|
||||
@@ -48,8 +48,18 @@ jobs:
|
||||
echo "After cleanup:"
|
||||
df -h
|
||||
|
||||
# QEMU enables arm64 cross-builds on the amd64 GitHub runner.
|
||||
# Currently only the cpu backend goes multi-arch; the action is a
|
||||
# no-op for amd64-only builds, so leaving it on for every matrix
|
||||
# entry keeps the workflow simple.
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v4
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
@@ -64,6 +74,9 @@ jobs:
|
||||
# see: https://github.com/actions/delete-package-versions/issues/74
|
||||
delete-untagged-containers:
|
||||
needs: build-and-push
|
||||
# Skip on forks — the delete API requires package-admin on the
|
||||
# upstream account and would otherwise red-x every fork CI run.
|
||||
if: github.repository == 'mostlygeek/llama-swap'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/delete-package-versions@v5
|
||||
|
||||
@@ -46,13 +46,31 @@ fi
|
||||
BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp}
|
||||
SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp}
|
||||
|
||||
# Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable
|
||||
# to enable easy container builds on forked repos
|
||||
# LS_REPO is the destination of the built container image — defaults to the
|
||||
# current GitHub repository so forked CI builds publish to the fork's own
|
||||
# ghcr.io namespace without code changes.
|
||||
LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap}
|
||||
|
||||
# LS_BINARY_REPO is where the llama-swap release tarball is downloaded
|
||||
# from. Decoupled from LS_REPO so forks (which usually have no releases of
|
||||
# their own) can still build a container by pulling the canonical binary
|
||||
# from upstream. Override via the LS_BINARY_REPO env var when you maintain
|
||||
# fork-side releases.
|
||||
LS_BINARY_REPO=${LS_BINARY_REPO:-mostlygeek/llama-swap}
|
||||
|
||||
# the most recent llama-swap tag
|
||||
# have to strip out the 'v' due to .tar.gz file naming
|
||||
LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//')
|
||||
# have to strip out the 'v' due to .tar.gz file naming.
|
||||
# Authenticated request — unauth'd github.com API is 60/hr per IP and GHA
|
||||
# runners share IPs, so the call regularly returns rate-limit JSON and
|
||||
# `.tag_name` then resolves to "null", producing a bogus `vnull` URL below.
|
||||
LS_VER=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
||||
"https://api.github.com/repos/${LS_BINARY_REPO}/releases/latest" \
|
||||
| jq -r .tag_name | sed 's/v//')
|
||||
|
||||
if [[ -z "$LS_VER" || "$LS_VER" == "null" ]]; then
|
||||
log_info "Error: could not resolve latest llama-swap release tag from ${LS_BINARY_REPO}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Fetches the most recent llama.cpp tag matching the given prefix
|
||||
# Handles pagination to search beyond the first 100 results
|
||||
@@ -126,6 +144,25 @@ if [[ ! -z "$DEBUG_ABORT_BUILD" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# cpu is the only backend with a multi-arch upstream base
|
||||
# (ghcr.io/ggml-org/llama.cpp:server-bXXXX ships amd64+arm64); GPU backends
|
||||
# are amd64-only and stay on the original `docker build` path so the
|
||||
# sd-server layer can still FROM the just-built image via the local
|
||||
# dockerd image store (buildx's container driver has a separate store
|
||||
# that doesn't share with dockerd, which breaks the sd build).
|
||||
if [ "$ARCH" == "cpu" ]; then
|
||||
if [ "$PUSH_IMAGES" == "true" ]; then
|
||||
BUILDX_FLAGS="--push --platform linux/amd64,linux/arm64"
|
||||
else
|
||||
# Smoke build: validate both platforms but emit no output. buildx
|
||||
# on the docker-container driver defaults to cacheonly when
|
||||
# neither --push nor --load is given, so each arch fully builds
|
||||
# and a regression in either fails CI — without materializing the
|
||||
# image or needing to --load (which is multi-arch-incompatible).
|
||||
BUILDX_FLAGS="--platform linux/amd64,linux/arm64"
|
||||
fi
|
||||
fi
|
||||
|
||||
for CONTAINER_TYPE in non-root root; do
|
||||
CONTAINER_TAG="ghcr.io/${LS_REPO}:v${LS_VER}-${ARCH}-${LCPP_TAG}"
|
||||
CONTAINER_LATEST="ghcr.io/${LS_REPO}:${ARCH}"
|
||||
@@ -142,11 +179,23 @@ for CONTAINER_TYPE in non-root root; do
|
||||
fi
|
||||
|
||||
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
||||
docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||
if [ "$ARCH" == "cpu" ]; then
|
||||
docker buildx build $BUILDX_FLAGS --provenance=false \
|
||||
-f llama-swap.Containerfile \
|
||||
--build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||
--build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \
|
||||
--build-arg BASE_IMAGE=${BASE_IMAGE} \
|
||||
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} .
|
||||
else
|
||||
docker build --provenance=false -f llama-swap.Containerfile \
|
||||
--build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||
--build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \
|
||||
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||
fi
|
||||
|
||||
# For architectures with stable-diffusion.cpp support, layer sd-server on top
|
||||
# For architectures with stable-diffusion.cpp support, layer sd-server on top.
|
||||
# Stays on `docker build` so the base resolves from local dockerd.
|
||||
case "$ARCH" in
|
||||
"musa" | "vulkan")
|
||||
log_info "Adding sd-server to $CONTAINER_TAG"
|
||||
@@ -157,7 +206,8 @@ for CONTAINER_TYPE in non-root root; do
|
||||
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;;
|
||||
esac
|
||||
|
||||
if [ "$PUSH_IMAGES" == "true" ]; then
|
||||
# cpu builds push inline via buildx --push; all other archs push here.
|
||||
if [ "$ARCH" != "cpu" ] && [ "$PUSH_IMAGES" == "true" ]; then
|
||||
docker push ${CONTAINER_TAG}
|
||||
docker push ${CONTAINER_LATEST}
|
||||
fi
|
||||
|
||||
@@ -3,6 +3,9 @@ ARG BASE_TAG=server-cuda
|
||||
FROM ${BASE_IMAGE}:${BASE_TAG}
|
||||
|
||||
# has to be after the FROM
|
||||
# TARGETARCH is auto-set by `docker buildx build --platform …` (amd64/arm64);
|
||||
# falls back to amd64 when an older `docker build` runs without buildx.
|
||||
ARG TARGETARCH=amd64
|
||||
ARG LS_VER=170
|
||||
ARG LS_REPO=mostlygeek/llama-swap
|
||||
|
||||
@@ -34,9 +37,9 @@ WORKDIR /app
|
||||
ENV PATH="/app:${PATH}"
|
||||
|
||||
RUN \
|
||||
curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
||||
tar -zxf "llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
||||
rm "llama-swap_${LS_VER}_linux_amd64.tar.gz"
|
||||
curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \
|
||||
tar -zxf "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \
|
||||
rm "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz"
|
||||
|
||||
COPY --chown=$UID:$GID config.example.yaml /app/config.yaml
|
||||
|
||||
|
||||
Reference in New Issue
Block a user