0292c90ca1
Build CUDA image (fork) / build (push) Successful in 12m49s
npm ci ran without .npmrc (legacy-peer-deps=true), failing on the tailwind/vite peer dependency conflict. Copy .npmrc with the manifest. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
75 lines
2.9 KiB
Docker
75 lines
2.9 KiB
Docker
# Build a CUDA llama-swap image FROM THIS FORK's source (includes the serial
|
|
# scheduler) and layer it on a pinned llama.cpp CUDA server base. Produces e.g.:
|
|
# gitea.stevedudenhoeffer.com/steve/llama-swap:v230-cuda-b9821
|
|
#
|
|
# BASE_TAG selects the llama.cpp CUDA runtime + llama-server build, e.g.
|
|
# "server-cuda-b9821". The llama-swap binary (with the embedded Svelte UI) is
|
|
# compiled from the repo at build time, so no GitHub release is required.
|
|
#
|
|
# Build context is the repo root:
|
|
# docker build -f docker/fork-cuda.Containerfile \
|
|
# --build-arg BASE_TAG=server-cuda-b9821 -t llama-swap:v230-cuda-b9821 .
|
|
|
|
ARG BASE_IMAGE=ghcr.io/ggml-org/llama.cpp
|
|
ARG BASE_TAG=server-cuda-b9821
|
|
|
|
# ---- Stage 1: build the Svelte UI (embedded into the binary) ----
|
|
FROM node:22-bookworm-slim AS ui
|
|
WORKDIR /src/ui-svelte
|
|
# Install deps first for layer caching. .npmrc carries legacy-peer-deps=true,
|
|
# which the project relies on (tailwind/vite peer ranges), so copy it before
|
|
# npm ci or the strict resolver fails with ERESOLVE.
|
|
COPY ui-svelte/package.json ui-svelte/package-lock.json ui-svelte/.npmrc ./
|
|
RUN npm ci
|
|
COPY ui-svelte/ ./
|
|
# `npm run build` is `vite build --emptyOutDir`; vite.config.ts writes to
|
|
# ../internal/server/ui_dist, which //go:embed picks up in the next stage.
|
|
RUN mkdir -p /src/internal/server && npm run build
|
|
|
|
# ---- Stage 2: build the llama-swap binary with the embedded UI ----
|
|
FROM golang:1.26-bookworm AS build
|
|
WORKDIR /src
|
|
# Cache modules independently of source churn.
|
|
COPY go.mod go.sum ./
|
|
RUN go mod download
|
|
COPY . .
|
|
# Overlay the freshly built UI so //go:embed ui_dist ships the real assets
|
|
# instead of the committed placeholder.
|
|
COPY --from=ui /src/internal/server/ui_dist/ ./internal/server/ui_dist/
|
|
ARG LS_VERSION=v230
|
|
ARG GIT_HASH=unknown
|
|
ARG BUILD_DATE=unknown
|
|
RUN CGO_ENABLED=0 GOOS=linux go build \
|
|
-ldflags="-X main.version=${LS_VERSION} -X main.commit=${GIT_HASH} -X main.date=${BUILD_DATE}" \
|
|
-o /out/llama-swap .
|
|
|
|
# ---- Stage 3: runtime image on the pinned llama.cpp CUDA base ----
|
|
FROM ${BASE_IMAGE}:${BASE_TAG}
|
|
|
|
# Run as root by default to match the upstream `vNNN-cuda-bNNNN` (non-suffixed)
|
|
# image that ragnaros pulls today: it needs root to reach the mounted docker
|
|
# socket for container-backed models (sd-server). Override UID/GID at build time
|
|
# for a non-root variant.
|
|
ARG UID=0
|
|
ARG GID=0
|
|
ARG USER_HOME=/root
|
|
ENV HOME=$USER_HOME
|
|
|
|
RUN set -eux; \
|
|
if [ "$UID" -ne 0 ]; then \
|
|
if [ "$GID" -ne 0 ]; then groupadd --system --gid "$GID" app; fi; \
|
|
useradd --system --uid "$UID" --gid "$GID" --home "$USER_HOME" app; \
|
|
fi; \
|
|
mkdir --parents "$HOME" /app; \
|
|
chown --recursive "$UID:$GID" "$HOME" /app
|
|
|
|
COPY --from=build --chown=$UID:$GID /out/llama-swap /app/llama-swap
|
|
COPY --chown=$UID:$GID docker/config.example.yaml /app/config.yaml
|
|
|
|
USER $UID:$GID
|
|
WORKDIR /app
|
|
ENV PATH="/app:${PATH}"
|
|
|
|
HEALTHCHECK CMD curl -f http://localhost:8080/ || exit 1
|
|
ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]
|