ci: add Gitea workflow to build fork CUDA image

Add a Gitea Actions workflow and multi-stage Containerfile that build this fork's llama-swap (serial scheduler + embedded Svelte UI) from source and layer it on a pinned llama.cpp CUDA server base, then push to the Gitea container registry as v230-cuda-b9821. - docker/fork-cuda.Containerfile: node UI -> go build -> cuda runtime, runs as root to match the upstream non-suffixed image - .gitea/workflows/build-cuda-image.yml: workflow_dispatch (version + llama.cpp build inputs) and push-on-build-files; logs in with REGISTRY_USER/REGISTRY_PASSWORD Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 12:48:48 -04:00
parent 542b79dacf
commit 617c7dc6b9
2 changed files with 148 additions and 0 deletions
@@ -0,0 +1,72 @@
+# Build a CUDA llama-swap image FROM THIS FORK's source (includes the serial
+# scheduler) and layer it on a pinned llama.cpp CUDA server base. Produces e.g.:
+#   gitea.stevedudenhoeffer.com/steve/llama-swap:v230-cuda-b9821
+#
+# BASE_TAG selects the llama.cpp CUDA runtime + llama-server build, e.g.
+# "server-cuda-b9821". The llama-swap binary (with the embedded Svelte UI) is
+# compiled from the repo at build time, so no GitHub release is required.
+#
+# Build context is the repo root:
+#   docker build -f docker/fork-cuda.Containerfile \
+#     --build-arg BASE_TAG=server-cuda-b9821 -t llama-swap:v230-cuda-b9821 .
+
+ARG BASE_IMAGE=ghcr.io/ggml-org/llama.cpp
+ARG BASE_TAG=server-cuda-b9821
+
+# ---- Stage 1: build the Svelte UI (embedded into the binary) ----
+FROM node:22-bookworm-slim AS ui
+WORKDIR /src/ui-svelte
+# Install deps first for layer caching.
+COPY ui-svelte/package.json ui-svelte/package-lock.json ./
+RUN npm ci
+COPY ui-svelte/ ./
+# `npm run build` is `vite build --emptyOutDir`; vite.config.ts writes to
+# ../internal/server/ui_dist, which //go:embed picks up in the next stage.
+RUN mkdir -p /src/internal/server && npm run build
+
+# ---- Stage 2: build the llama-swap binary with the embedded UI ----
+FROM golang:1.26-bookworm AS build
+WORKDIR /src
+# Cache modules independently of source churn.
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+# Overlay the freshly built UI so //go:embed ui_dist ships the real assets
+# instead of the committed placeholder.
+COPY --from=ui /src/internal/server/ui_dist/ ./internal/server/ui_dist/
+ARG LS_VERSION=v230
+ARG GIT_HASH=unknown
+ARG BUILD_DATE=unknown
+RUN CGO_ENABLED=0 GOOS=linux go build \
+    -ldflags="-X main.version=${LS_VERSION} -X main.commit=${GIT_HASH} -X main.date=${BUILD_DATE}" \
+    -o /out/llama-swap .
+
+# ---- Stage 3: runtime image on the pinned llama.cpp CUDA base ----
+FROM ${BASE_IMAGE}:${BASE_TAG}
+
+# Run as root by default to match the upstream `vNNN-cuda-bNNNN` (non-suffixed)
+# image that ragnaros pulls today: it needs root to reach the mounted docker
+# socket for container-backed models (sd-server). Override UID/GID at build time
+# for a non-root variant.
+ARG UID=0
+ARG GID=0
+ARG USER_HOME=/root
+ENV HOME=$USER_HOME
+
+RUN set -eux; \
+    if [ "$UID" -ne 0 ]; then \
+      if [ "$GID" -ne 0 ]; then groupadd --system --gid "$GID" app; fi; \
+      useradd --system --uid "$UID" --gid "$GID" --home "$USER_HOME" app; \
+    fi; \
+    mkdir --parents "$HOME" /app; \
+    chown --recursive "$UID:$GID" "$HOME" /app
+
+COPY --from=build --chown=$UID:$GID /out/llama-swap /app/llama-swap
+COPY --chown=$UID:$GID docker/config.example.yaml /app/config.yaml
+
+USER $UID:$GID
+WORKDIR /app
+ENV PATH="/app:${PATH}"
+
+HEALTHCHECK CMD curl -f http://localhost:8080/ || exit 1
+ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]