ci: add Gitea workflow to build fork CUDA image

Add a Gitea Actions workflow and multi-stage Containerfile that build this fork's llama-swap (serial scheduler + embedded Svelte UI) from source and layer it on a pinned llama.cpp CUDA server base, then push to the Gitea container registry as v230-cuda-b9821. - docker/fork-cuda.Containerfile: node UI -> go build -> cuda runtime, runs as root to match the upstream non-suffixed image - .gitea/workflows/build-cuda-image.yml: workflow_dispatch (version + llama.cpp build inputs) and push-on-build-files; logs in with REGISTRY_USER/REGISTRY_PASSWORD Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 12:48:48 -04:00
parent 542b79dacf
commit 617c7dc6b9
2 changed files with 148 additions and 0 deletions
@@ -0,0 +1,76 @@
 name: Build CUDA image (fork)
 # Builds this fork's llama-swap (serial scheduler + embedded UI) from source and
 # layers it on a pinned llama.cpp CUDA server base, then pushes to the Gitea
 # container registry, e.g. gitea.stevedudenhoeffer.com/steve/llama-swap:v230-cuda-b9821
 #
 # Requires repo secrets: REGISTRY_USER, REGISTRY_PASSWORD (push to the registry).
 on:
  workflow_dispatch:
    inputs:
      llama_swap_version:
        description: "llama-swap version label (image tag prefix)"
        required: false
        default: "v230"
      llamacpp_build:
        description: "llama.cpp CUDA server build (base image tag suffix)"
        required: false
        default: "b9821"
  # Building the build definition itself kicks off a fresh image.
  push:
    branches: [main]
    paths:
      - ".gitea/workflows/build-cuda-image.yml"
      - "docker/fork-cuda.Containerfile"
 env:
  REGISTRY: gitea.stevedudenhoeffer.com
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Compute image metadata
        id: meta
        run: |
          LS_VER="${{ inputs.llama_swap_version || 'v230' }}"
          LCPP="${{ inputs.llamacpp_build || 'b9821' }}"
          {
            echo "image=${REGISTRY}/${{ github.repository }}"
            echo "tag=${LS_VER}-cuda-${LCPP}"
            echo "base_tag=server-cuda-${LCPP}"
            echo "ls_version=${LS_VER}"
            echo "build_date=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
          } >> "$GITHUB_OUTPUT"
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Gitea registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ secrets.REGISTRY_USER }}
          password: ${{ secrets.REGISTRY_PASSWORD }}
      - name: Build and push
        uses: docker/build-push-action@v6
        with:
          context: .
          file: docker/fork-cuda.Containerfile
          push: true
          provenance: false
          build-args: |
            BASE_TAG=${{ steps.meta.outputs.base_tag }}
            LS_VERSION=${{ steps.meta.outputs.ls_version }}
            GIT_HASH=${{ github.sha }}
            BUILD_DATE=${{ steps.meta.outputs.build_date }}
          tags: ${{ steps.meta.outputs.image }}:${{ steps.meta.outputs.tag }}
      - name: Summary
        run: |
          echo "Pushed ${{ steps.meta.outputs.image }}:${{ steps.meta.outputs.tag }}" >> "$GITHUB_STEP_SUMMARY"
@@ -0,0 +1,72 @@
 # Build a CUDA llama-swap image FROM THIS FORK's source (includes the serial
 # scheduler) and layer it on a pinned llama.cpp CUDA server base. Produces e.g.:
 #   gitea.stevedudenhoeffer.com/steve/llama-swap:v230-cuda-b9821
 #
 # BASE_TAG selects the llama.cpp CUDA runtime + llama-server build, e.g.
 # "server-cuda-b9821". The llama-swap binary (with the embedded Svelte UI) is
 # compiled from the repo at build time, so no GitHub release is required.
 #
 # Build context is the repo root:
 #   docker build -f docker/fork-cuda.Containerfile \
 #     --build-arg BASE_TAG=server-cuda-b9821 -t llama-swap:v230-cuda-b9821 .
 ARG BASE_IMAGE=ghcr.io/ggml-org/llama.cpp
 ARG BASE_TAG=server-cuda-b9821
 # ---- Stage 1: build the Svelte UI (embedded into the binary) ----
 FROM node:22-bookworm-slim AS ui
 WORKDIR /src/ui-svelte
 # Install deps first for layer caching.
 COPY ui-svelte/package.json ui-svelte/package-lock.json ./
 RUN npm ci
 COPY ui-svelte/ ./
 # `npm run build` is `vite build --emptyOutDir`; vite.config.ts writes to
 # ../internal/server/ui_dist, which //go:embed picks up in the next stage.
 RUN mkdir -p /src/internal/server && npm run build
 # ---- Stage 2: build the llama-swap binary with the embedded UI ----
 FROM golang:1.26-bookworm AS build
 WORKDIR /src
 # Cache modules independently of source churn.
 COPY go.mod go.sum ./
 RUN go mod download
 COPY . .
 # Overlay the freshly built UI so //go:embed ui_dist ships the real assets
 # instead of the committed placeholder.
 COPY --from=ui /src/internal/server/ui_dist/ ./internal/server/ui_dist/
 ARG LS_VERSION=v230
 ARG GIT_HASH=unknown
 ARG BUILD_DATE=unknown
 RUN CGO_ENABLED=0 GOOS=linux go build \
    -ldflags="-X main.version=${LS_VERSION} -X main.commit=${GIT_HASH} -X main.date=${BUILD_DATE}" \
    -o /out/llama-swap .
 # ---- Stage 3: runtime image on the pinned llama.cpp CUDA base ----
 FROM ${BASE_IMAGE}:${BASE_TAG}
 # Run as root by default to match the upstream `vNNN-cuda-bNNNN` (non-suffixed)
 # image that ragnaros pulls today: it needs root to reach the mounted docker
 # socket for container-backed models (sd-server). Override UID/GID at build time
 # for a non-root variant.
 ARG UID=0
 ARG GID=0
 ARG USER_HOME=/root
 ENV HOME=$USER_HOME
 RUN set -eux; \
    if [ "$UID" -ne 0 ]; then \
      if [ "$GID" -ne 0 ]; then groupadd --system --gid "$GID" app; fi; \
      useradd --system --uid "$UID" --gid "$GID" --home "$USER_HOME" app; \
    fi; \
    mkdir --parents "$HOME" /app; \
    chown --recursive "$UID:$GID" "$HOME" /app
 COPY --from=build --chown=$UID:$GID /out/llama-swap /app/llama-swap
 COPY --chown=$UID:$GID docker/config.example.yaml /app/config.yaml
 USER $UID:$GID
 WORKDIR /app
 ENV PATH="/app:${PATH}"
 HEALTHCHECK CMD curl -f http://localhost:8080/ || exit 1
 ENTRYPOINT [ "/app/llama-swap", "-config", "/app/config.yaml" ]