# Build a CUDA llama-swap image FROM THIS FORK's source (includes the serial # scheduler) and layer it on a pinned llama.cpp CUDA server base. Produces e.g.: # gitea.stevedudenhoeffer.com/steve/llama-swap:v230-cuda-b9821 # # BASE_TAG selects the llama.cpp CUDA runtime + llama-server build, e.g. # "server-cuda-b9821". The llama-swap binary (with the embedded Svelte UI) is # compiled from the repo at build time, so no GitHub release is required. # # Build context is the repo root: # docker build -f docker/fork-cuda.Containerfile \ # --build-arg BASE_TAG=server-cuda-b9821 -t llama-swap:v230-cuda-b9821 . ARG BASE_IMAGE=ghcr.io/ggml-org/llama.cpp ARG BASE_TAG=server-cuda-b9821 # ---- Stage 1: build the Svelte UI (embedded into the binary) ---- FROM node:22-bookworm-slim AS ui WORKDIR /src/ui-svelte # Install deps first for layer caching. .npmrc carries legacy-peer-deps=true, # which the project relies on (tailwind/vite peer ranges), so copy it before # npm ci or the strict resolver fails with ERESOLVE. COPY ui-svelte/package.json ui-svelte/package-lock.json ui-svelte/.npmrc ./ RUN npm ci COPY ui-svelte/ ./ # `npm run build` is `vite build --emptyOutDir`; vite.config.ts writes to # ../internal/server/ui_dist, which //go:embed picks up in the next stage. RUN mkdir -p /src/internal/server && npm run build # ---- Stage 2: build the llama-swap binary with the embedded UI ---- FROM golang:1.26-bookworm AS build WORKDIR /src # Cache modules independently of source churn. COPY go.mod go.sum ./ RUN go mod download COPY . . # Overlay the freshly built UI so //go:embed ui_dist ships the real assets # instead of the committed placeholder. COPY --from=ui /src/internal/server/ui_dist/ ./internal/server/ui_dist/ ARG LS_VERSION=v230 ARG GIT_HASH=unknown ARG BUILD_DATE=unknown RUN CGO_ENABLED=0 GOOS=linux go build \ -ldflags="-X main.version=${LS_VERSION} -X main.commit=${GIT_HASH} -X main.date=${BUILD_DATE}" \ -o /out/llama-swap . # ---- Stage 3: runtime image on the pinned llama.cpp CUDA base ---- FROM ${BASE_IMAGE}:${BASE_TAG} # Run as root by default to match the upstream `vNNN-cuda-bNNNN` (non-suffixed) # image that ragnaros pulls today: it needs root to reach the mounted docker # socket for container-backed models (sd-server). Override UID/GID at build time # for a non-root variant. ARG UID=0 ARG GID=0 ARG USER_HOME=/root ENV HOME=$USER_HOME RUN set -eux; \ if [ "$UID" -ne 0 ]; then \ if [ "$GID" -ne 0 ]; then groupadd --system --gid "$GID" app; fi; \ useradd --system --uid "$UID" --gid "$GID" --home "$USER_HOME" app; \ fi; \ mkdir --parents "$HOME" /app; \ chown --recursive "$UID:$GID" "$HOME" /app COPY --from=build --chown=$UID:$GID /out/llama-swap /app/llama-swap COPY --chown=$UID:$GID docker/config.example.yaml /app/config.yaml USER $UID:$GID WORKDIR /app ENV PATH="/app:${PATH}" HEALTHCHECK CMD curl -f http://localhost:8080/ || exit 1 ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]