From 617c7dc6b920380428697fd915eb48d7d07b35b0 Mon Sep 17 00:00:00 2001 From: Steve Dudenhoeffer Date: Sun, 28 Jun 2026 12:48:48 -0400 Subject: [PATCH] ci: add Gitea workflow to build fork CUDA image Add a Gitea Actions workflow and multi-stage Containerfile that build this fork's llama-swap (serial scheduler + embedded Svelte UI) from source and layer it on a pinned llama.cpp CUDA server base, then push to the Gitea container registry as v230-cuda-b9821. - docker/fork-cuda.Containerfile: node UI -> go build -> cuda runtime, runs as root to match the upstream non-suffixed image - .gitea/workflows/build-cuda-image.yml: workflow_dispatch (version + llama.cpp build inputs) and push-on-build-files; logs in with REGISTRY_USER/REGISTRY_PASSWORD Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitea/workflows/build-cuda-image.yml | 76 +++++++++++++++++++++++++++ docker/fork-cuda.Containerfile | 72 +++++++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 .gitea/workflows/build-cuda-image.yml create mode 100644 docker/fork-cuda.Containerfile diff --git a/.gitea/workflows/build-cuda-image.yml b/.gitea/workflows/build-cuda-image.yml new file mode 100644 index 00000000..55b72990 --- /dev/null +++ b/.gitea/workflows/build-cuda-image.yml @@ -0,0 +1,76 @@ +name: Build CUDA image (fork) + +# Builds this fork's llama-swap (serial scheduler + embedded UI) from source and +# layers it on a pinned llama.cpp CUDA server base, then pushes to the Gitea +# container registry, e.g. gitea.stevedudenhoeffer.com/steve/llama-swap:v230-cuda-b9821 +# +# Requires repo secrets: REGISTRY_USER, REGISTRY_PASSWORD (push to the registry). + +on: + workflow_dispatch: + inputs: + llama_swap_version: + description: "llama-swap version label (image tag prefix)" + required: false + default: "v230" + llamacpp_build: + description: "llama.cpp CUDA server build (base image tag suffix)" + required: false + default: "b9821" + # Building the build definition itself kicks off a fresh image. + push: + branches: [main] + paths: + - ".gitea/workflows/build-cuda-image.yml" + - "docker/fork-cuda.Containerfile" + +env: + REGISTRY: gitea.stevedudenhoeffer.com + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Compute image metadata + id: meta + run: | + LS_VER="${{ inputs.llama_swap_version || 'v230' }}" + LCPP="${{ inputs.llamacpp_build || 'b9821' }}" + { + echo "image=${REGISTRY}/${{ github.repository }}" + echo "tag=${LS_VER}-cuda-${LCPP}" + echo "base_tag=server-cuda-${LCPP}" + echo "ls_version=${LS_VER}" + echo "build_date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" + } >> "$GITHUB_OUTPUT" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Gitea registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.REGISTRY_USER }} + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + file: docker/fork-cuda.Containerfile + push: true + provenance: false + build-args: | + BASE_TAG=${{ steps.meta.outputs.base_tag }} + LS_VERSION=${{ steps.meta.outputs.ls_version }} + GIT_HASH=${{ github.sha }} + BUILD_DATE=${{ steps.meta.outputs.build_date }} + tags: ${{ steps.meta.outputs.image }}:${{ steps.meta.outputs.tag }} + + - name: Summary + run: | + echo "Pushed ${{ steps.meta.outputs.image }}:${{ steps.meta.outputs.tag }}" >> "$GITHUB_STEP_SUMMARY" diff --git a/docker/fork-cuda.Containerfile b/docker/fork-cuda.Containerfile new file mode 100644 index 00000000..3bb7d690 --- /dev/null +++ b/docker/fork-cuda.Containerfile @@ -0,0 +1,72 @@ +# Build a CUDA llama-swap image FROM THIS FORK's source (includes the serial +# scheduler) and layer it on a pinned llama.cpp CUDA server base. Produces e.g.: +# gitea.stevedudenhoeffer.com/steve/llama-swap:v230-cuda-b9821 +# +# BASE_TAG selects the llama.cpp CUDA runtime + llama-server build, e.g. +# "server-cuda-b9821". The llama-swap binary (with the embedded Svelte UI) is +# compiled from the repo at build time, so no GitHub release is required. +# +# Build context is the repo root: +# docker build -f docker/fork-cuda.Containerfile \ +# --build-arg BASE_TAG=server-cuda-b9821 -t llama-swap:v230-cuda-b9821 . + +ARG BASE_IMAGE=ghcr.io/ggml-org/llama.cpp +ARG BASE_TAG=server-cuda-b9821 + +# ---- Stage 1: build the Svelte UI (embedded into the binary) ---- +FROM node:22-bookworm-slim AS ui +WORKDIR /src/ui-svelte +# Install deps first for layer caching. +COPY ui-svelte/package.json ui-svelte/package-lock.json ./ +RUN npm ci +COPY ui-svelte/ ./ +# `npm run build` is `vite build --emptyOutDir`; vite.config.ts writes to +# ../internal/server/ui_dist, which //go:embed picks up in the next stage. +RUN mkdir -p /src/internal/server && npm run build + +# ---- Stage 2: build the llama-swap binary with the embedded UI ---- +FROM golang:1.26-bookworm AS build +WORKDIR /src +# Cache modules independently of source churn. +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +# Overlay the freshly built UI so //go:embed ui_dist ships the real assets +# instead of the committed placeholder. +COPY --from=ui /src/internal/server/ui_dist/ ./internal/server/ui_dist/ +ARG LS_VERSION=v230 +ARG GIT_HASH=unknown +ARG BUILD_DATE=unknown +RUN CGO_ENABLED=0 GOOS=linux go build \ + -ldflags="-X main.version=${LS_VERSION} -X main.commit=${GIT_HASH} -X main.date=${BUILD_DATE}" \ + -o /out/llama-swap . + +# ---- Stage 3: runtime image on the pinned llama.cpp CUDA base ---- +FROM ${BASE_IMAGE}:${BASE_TAG} + +# Run as root by default to match the upstream `vNNN-cuda-bNNNN` (non-suffixed) +# image that ragnaros pulls today: it needs root to reach the mounted docker +# socket for container-backed models (sd-server). Override UID/GID at build time +# for a non-root variant. +ARG UID=0 +ARG GID=0 +ARG USER_HOME=/root +ENV HOME=$USER_HOME + +RUN set -eux; \ + if [ "$UID" -ne 0 ]; then \ + if [ "$GID" -ne 0 ]; then groupadd --system --gid "$GID" app; fi; \ + useradd --system --uid "$UID" --gid "$GID" --home "$USER_HOME" app; \ + fi; \ + mkdir --parents "$HOME" /app; \ + chown --recursive "$UID:$GID" "$HOME" /app + +COPY --from=build --chown=$UID:$GID /out/llama-swap /app/llama-swap +COPY --chown=$UID:$GID docker/config.example.yaml /app/config.yaml + +USER $UID:$GID +WORKDIR /app +ENV PATH="/app:${PATH}" + +HEALTHCHECK CMD curl -f http://localhost:8080/ || exit 1 +ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]