From 1dd1aadf93ee2489d0ce6857214c91382eede355 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Fri, 3 Apr 2026 15:16:30 +0800 Subject: [PATCH] docker/unified: add ik_llama.cpp to CUDA container (#620) --- .github/workflows/unified-docker.yml | 5 +++ docker/unified/Dockerfile | 38 ++++++++++++++++++---- docker/unified/build-image.sh | 44 ++++++++++++++++++++++--- docker/unified/install-ik-llama.sh | 48 ++++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 11 deletions(-) create mode 100644 docker/unified/install-ik-llama.sh diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml index cde3d308..c1021f0d 100644 --- a/.github/workflows/unified-docker.yml +++ b/.github/workflows/unified-docker.yml @@ -18,6 +18,10 @@ on: description: "stable-diffusion.cpp commit hash, tag, or branch" required: false default: "master" + ik_llama_ref: + description: "ik_llama.cpp commit hash, tag, or branch (CUDA only)" + required: false + default: "main" llama_swap_version: description: "llama-swap version (e.g. v198, latest, main)" required: false @@ -100,6 +104,7 @@ jobs: LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }} WHISPER_REF: ${{ inputs.whisper_ref || 'master' }} SD_REF: ${{ inputs.sd_ref || 'master' }} + IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }} LS_VERSION: ${{ inputs.llama_swap_version || 'main' }} DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }} # When running under act, use the local builder that has warm ccache. diff --git a/docker/unified/Dockerfile b/docker/unified/Dockerfile index 3d2425c6..b38e5e94 100644 --- a/docker/unified/Dockerfile +++ b/docker/unified/Dockerfile @@ -12,7 +12,7 @@ ARG BACKEND=cuda # ── Builder bases ────────────────────────────────────────────────────── -FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base-cuda +FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS builder-base-cuda ENV DEBIAN_FRONTEND=noninteractive ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" @@ -29,7 +29,7 @@ WORKDIR /build # ── -FROM ubuntu:26.04 AS builder-base-vulkan +FROM ubuntu:24.04 AS builder-base-vulkan ENV DEBIAN_FRONTEND=noninteractive ENV CCACHE_DIR=/ccache @@ -78,6 +78,27 @@ RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ --mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \ BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}" +# ── Build ik_llama.cpp (CUDA only) ──────────────────────────────────── +# +# Two named stages allow ARG BACKEND to select at build time: +# - ik-llama-cuda : real build (from builder-base-cuda) +# - ik-llama-vulkan: no-op (empty /install/bin, skips CUDA pull entirely) +# BuildKit only evaluates the selected branch, so vulkan builds never +# pull nvidia/cuda:*-devel or compile ik_llama.cpp. + +FROM builder-base-vulkan AS ik-llama-vulkan +RUN mkdir -p /install/bin + +FROM builder-base-cuda AS ik-llama-cuda +ARG IK_LLAMA_COMMIT_HASH=main +COPY install-ik-llama.sh /build/ +RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ + --mount=type=cache,id=ik-llama-cuda,target=/src/ik_llama.cpp/build \ + bash /build/install-ik-llama.sh "${IK_LLAMA_COMMIT_HASH}" + +ARG BACKEND=cuda +FROM ik-llama-${BACKEND} AS ik-llama-build + # ── Download llama-swap release binary ──────────────────────────────── FROM builder-base AS llama-swap-download @@ -87,14 +108,14 @@ RUN bash /build/install-llama-swap.sh "${LS_VERSION}" # ── Runtime bases ───────────────────────────────────────────────────── -FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda +FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda ENV DEBIAN_FRONTEND=noninteractive ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" ENV PATH="/usr/local/bin:${PATH}" RUN apt-get update && apt-get install -y --no-install-recommends \ - libgomp1 python3 curl ca-certificates git \ + libgomp1 python3 curl ca-certificates \ && rm -rf /var/lib/apt/lists/* # CUDA stub drivers for container compatibility @@ -103,14 +124,14 @@ COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/ # ── -FROM ubuntu:26.04 AS runtime-vulkan +FROM ubuntu:24.04 AS runtime-vulkan ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/local/bin:${PATH}" RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 libvulkan1 mesa-vulkan-drivers \ - python3 curl ca-certificates git \ + python3 curl ca-certificates \ && rm -rf /var/lib/apt/lists/* # ── Select runtime base by BACKEND ──────────────────────────────────── @@ -121,6 +142,7 @@ ARG BACKEND=cuda ARG LLAMA_COMMIT_HASH=unknown ARG WHISPER_COMMIT_HASH=unknown ARG SD_COMMIT_HASH=unknown +ARG IK_LLAMA_COMMIT_HASH=unknown RUN apt-get update && apt-get install -y --no-install-recommends \ python3-numpy python3-sentencepiece \ @@ -147,6 +169,9 @@ COPY --from=sd-build /install/lib/ /usr/local/lib/ COPY --from=llama-build /install/bin/llama-server /usr/local/bin/ COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/ +# Copy ik-llama-server (CUDA only; empty copy for vulkan) +COPY --from=ik-llama-build /install/bin/ /usr/local/bin/ + # Copy llama-swap binary COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/ COPY --from=llama-swap-download /install/llama-swap-version /tmp/ @@ -159,6 +184,7 @@ COPY config.example.yaml /etc/llama-swap/config/config.yaml RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \ echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \ echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \ + echo "ik_llama.cpp: ${IK_LLAMA_COMMIT_HASH}" >> /versions.txt && \ echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \ echo "backend: ${BACKEND}" >> /versions.txt && \ echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt diff --git a/docker/unified/build-image.sh b/docker/unified/build-image.sh index 7eae9808..22bd5649 100755 --- a/docker/unified/build-image.sh +++ b/docker/unified/build-image.sh @@ -11,6 +11,7 @@ # WHISPER_REF=v1.0.0 ./build-image.sh --vulkan # Pin whisper.cpp to a tag # SD_REF=master ./build-image.sh --cuda # Pin stable-diffusion.cpp to a branch # LS_VERSION=170 ./build-image.sh --cuda # Override llama-swap version +# IK_LLAMA_REF=main ./build-image.sh --cuda # Pin ik_llama.cpp to main branch (CUDA only) # set -euo pipefail @@ -43,6 +44,7 @@ for arg in "$@"; do echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch" echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch" echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch" + echo " IK_LLAMA_REF Pin ik_llama.cpp to a commit, tag, or branch (CUDA only)" echo " LS_VERSION Override llama-swap version (e.g., '170' or 'latest')" exit 0 ;; @@ -63,6 +65,7 @@ LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git" SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git" LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git" +IK_LLAMA_REPO="https://github.com/ikawrakow/ik_llama.cpp.git" # Resolve a git ref (commit hash, tag, or branch) to a full commit hash. # Requires only: git, network access to the remote. @@ -152,6 +155,24 @@ else echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}" fi +# Resolve ik_llama.cpp ref (CUDA only) +if [[ "$BACKEND" == "cuda" ]]; then + if [[ -n "${IK_LLAMA_REF:-}" ]]; then + IK_LLAMA_HASH=$(resolve_ref "${IK_LLAMA_REPO}" "${IK_LLAMA_REF}") || exit 1 + echo "ik_llama.cpp: ${IK_LLAMA_REF} -> ${IK_LLAMA_HASH}" + else + IK_LLAMA_HASH=$(get_latest_hash "${IK_LLAMA_REPO}") + if [[ -z "${IK_LLAMA_HASH}" ]]; then + echo "ERROR: Could not determine latest commit for ik_llama.cpp" >&2 + exit 1 + fi + echo "ik_llama.cpp: latest HEAD: ${IK_LLAMA_HASH}" + fi +else + IK_LLAMA_HASH="n/a" + echo "ik_llama.cpp: skipped (vulkan build)" +fi + # Resolve llama-swap ref if [[ -n "${LS_VERSION:-}" ]]; then LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1 @@ -178,6 +199,7 @@ BUILD_ARGS=( --build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}" --build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}" --build-arg "SD_COMMIT_HASH=${SD_HASH}" + --build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}" --build-arg "LS_VERSION=${LS_HASH}" -t "${DOCKER_IMAGE_TAG}" -f "${SCRIPT_DIR}/Dockerfile" @@ -203,8 +225,13 @@ echo "Verifying build artifacts..." echo "==========================================" echo "" +EXPECTED_BINARIES=(llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap) +if [[ "$BACKEND" == "cuda" ]]; then + EXPECTED_BINARIES+=(ik-llama-server) +fi + MISSING_BINARIES=() -for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do +for binary in "${EXPECTED_BINARIES[@]}"; do if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then MISSING_BINARIES+=("${binary}") fi @@ -221,7 +248,11 @@ if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then exit 1 fi -echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap" +VERIFIED_LIST="llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap" +if [[ "$BACKEND" == "cuda" ]]; then + VERIFIED_LIST="${VERIFIED_LIST}, ik-llama-server" +fi +echo "All expected binaries verified: ${VERIFIED_LIST}" echo "" echo "==========================================" @@ -231,10 +262,13 @@ echo "" echo "Image tag: ${DOCKER_IMAGE_TAG}" echo "" echo "Built with:" -echo " llama.cpp: ${LLAMA_HASH}" -echo " whisper.cpp: ${WHISPER_HASH}" +echo " llama.cpp: ${LLAMA_HASH}" +echo " whisper.cpp: ${WHISPER_HASH}" echo " stable-diffusion.cpp: ${SD_HASH}" -echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)" +if [[ "$BACKEND" == "cuda" ]]; then + echo " ik_llama.cpp: ${IK_LLAMA_HASH}" +fi +echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)" echo "" if [[ "$BACKEND" == "vulkan" ]]; then echo "Run with:" diff --git a/docker/unified/install-ik-llama.sh b/docker/unified/install-ik-llama.sh new file mode 100644 index 00000000..f78ecd2b --- /dev/null +++ b/docker/unified/install-ik-llama.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Install ik_llama.cpp - clone, build, and install binaries +# Usage: ./install-ik-llama.sh +# Note: CUDA only; always built against builder-base-cuda +set -e + +COMMIT_HASH="${1:-main}" + +mkdir -p /install/bin + +# Clone and checkout (init-based so cache-mounted build dir doesn't break clone) +echo "=== Cloning ik_llama.cpp at ${COMMIT_HASH} ===" +mkdir -p /src/ik_llama.cpp +cd /src/ik_llama.cpp +if [ ! -d .git ]; then + git init + git remote add origin https://github.com/ikawrakow/ik_llama.cpp.git +fi +git fetch --depth=1 origin "${COMMIT_HASH}" +git checkout FETCH_HEAD + +CMAKE_FLAGS=( + -DGGML_NATIVE=OFF + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER_LAUNCHER=ccache + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DGGML_CUDA=ON + "-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}" + "-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler" + "-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda -Wl,--allow-shlib-undefined" +) + +rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true + +echo "=== Building ik_llama.cpp ===" +cmake -B build "${CMAKE_FLAGS[@]}" +cmake --build build --config Release -j"$(nproc)" --target llama-server + +if [ ! -f "build/bin/llama-server" ]; then + echo "FATAL: llama-server not found in build/bin/" >&2 + exit 1 +fi + +# Install as ik-llama-server to avoid collision with llama.cpp's llama-server +cp "build/bin/llama-server" "/install/bin/ik-llama-server" +echo "=== ik_llama.cpp build complete ===" +ls -la /install/bin/