docker/unified: add ik_llama.cpp to CUDA container (#620)
This commit is contained in:
@@ -18,6 +18,10 @@ on:
|
||||
description: "stable-diffusion.cpp commit hash, tag, or branch"
|
||||
required: false
|
||||
default: "master"
|
||||
ik_llama_ref:
|
||||
description: "ik_llama.cpp commit hash, tag, or branch (CUDA only)"
|
||||
required: false
|
||||
default: "main"
|
||||
llama_swap_version:
|
||||
description: "llama-swap version (e.g. v198, latest, main)"
|
||||
required: false
|
||||
@@ -100,6 +104,7 @@ jobs:
|
||||
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
|
||||
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
|
||||
SD_REF: ${{ inputs.sd_ref || 'master' }}
|
||||
IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }}
|
||||
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
|
||||
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
|
||||
# When running under act, use the local builder that has warm ccache.
|
||||
|
||||
@@ -12,7 +12,7 @@ ARG BACKEND=cuda
|
||||
|
||||
# ── Builder bases ──────────────────────────────────────────────────────
|
||||
|
||||
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base-cuda
|
||||
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS builder-base-cuda
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
|
||||
@@ -29,7 +29,7 @@ WORKDIR /build
|
||||
|
||||
# ──
|
||||
|
||||
FROM ubuntu:26.04 AS builder-base-vulkan
|
||||
FROM ubuntu:24.04 AS builder-base-vulkan
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CCACHE_DIR=/ccache
|
||||
@@ -78,6 +78,27 @@ RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
||||
--mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \
|
||||
BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"
|
||||
|
||||
# ── Build ik_llama.cpp (CUDA only) ────────────────────────────────────
|
||||
#
|
||||
# Two named stages allow ARG BACKEND to select at build time:
|
||||
# - ik-llama-cuda : real build (from builder-base-cuda)
|
||||
# - ik-llama-vulkan: no-op (empty /install/bin, skips CUDA pull entirely)
|
||||
# BuildKit only evaluates the selected branch, so vulkan builds never
|
||||
# pull nvidia/cuda:*-devel or compile ik_llama.cpp.
|
||||
|
||||
FROM builder-base-vulkan AS ik-llama-vulkan
|
||||
RUN mkdir -p /install/bin
|
||||
|
||||
FROM builder-base-cuda AS ik-llama-cuda
|
||||
ARG IK_LLAMA_COMMIT_HASH=main
|
||||
COPY install-ik-llama.sh /build/
|
||||
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
|
||||
--mount=type=cache,id=ik-llama-cuda,target=/src/ik_llama.cpp/build \
|
||||
bash /build/install-ik-llama.sh "${IK_LLAMA_COMMIT_HASH}"
|
||||
|
||||
ARG BACKEND=cuda
|
||||
FROM ik-llama-${BACKEND} AS ik-llama-build
|
||||
|
||||
# ── Download llama-swap release binary ────────────────────────────────
|
||||
|
||||
FROM builder-base AS llama-swap-download
|
||||
@@ -87,14 +108,14 @@ RUN bash /build/install-llama-swap.sh "${LS_VERSION}"
|
||||
|
||||
# ── Runtime bases ─────────────────────────────────────────────────────
|
||||
|
||||
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda
|
||||
FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
|
||||
ENV PATH="/usr/local/bin:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgomp1 python3 curl ca-certificates git \
|
||||
libgomp1 python3 curl ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# CUDA stub drivers for container compatibility
|
||||
@@ -103,14 +124,14 @@ COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/
|
||||
|
||||
# ──
|
||||
|
||||
FROM ubuntu:26.04 AS runtime-vulkan
|
||||
FROM ubuntu:24.04 AS runtime-vulkan
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PATH="/usr/local/bin:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgomp1 libvulkan1 mesa-vulkan-drivers \
|
||||
python3 curl ca-certificates git \
|
||||
python3 curl ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ── Select runtime base by BACKEND ────────────────────────────────────
|
||||
@@ -121,6 +142,7 @@ ARG BACKEND=cuda
|
||||
ARG LLAMA_COMMIT_HASH=unknown
|
||||
ARG WHISPER_COMMIT_HASH=unknown
|
||||
ARG SD_COMMIT_HASH=unknown
|
||||
ARG IK_LLAMA_COMMIT_HASH=unknown
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3-numpy python3-sentencepiece \
|
||||
@@ -147,6 +169,9 @@ COPY --from=sd-build /install/lib/ /usr/local/lib/
|
||||
COPY --from=llama-build /install/bin/llama-server /usr/local/bin/
|
||||
COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
|
||||
|
||||
# Copy ik-llama-server (CUDA only; empty copy for vulkan)
|
||||
COPY --from=ik-llama-build /install/bin/ /usr/local/bin/
|
||||
|
||||
# Copy llama-swap binary
|
||||
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
|
||||
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
|
||||
@@ -159,6 +184,7 @@ COPY config.example.yaml /etc/llama-swap/config/config.yaml
|
||||
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
|
||||
echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "ik_llama.cpp: ${IK_LLAMA_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
|
||||
echo "backend: ${BACKEND}" >> /versions.txt && \
|
||||
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
# WHISPER_REF=v1.0.0 ./build-image.sh --vulkan # Pin whisper.cpp to a tag
|
||||
# SD_REF=master ./build-image.sh --cuda # Pin stable-diffusion.cpp to a branch
|
||||
# LS_VERSION=170 ./build-image.sh --cuda # Override llama-swap version
|
||||
# IK_LLAMA_REF=main ./build-image.sh --cuda # Pin ik_llama.cpp to main branch (CUDA only)
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
@@ -43,6 +44,7 @@ for arg in "$@"; do
|
||||
echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch"
|
||||
echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch"
|
||||
echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch"
|
||||
echo " IK_LLAMA_REF Pin ik_llama.cpp to a commit, tag, or branch (CUDA only)"
|
||||
echo " LS_VERSION Override llama-swap version (e.g., '170' or 'latest')"
|
||||
exit 0
|
||||
;;
|
||||
@@ -63,6 +65,7 @@ LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
|
||||
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
|
||||
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
|
||||
LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git"
|
||||
IK_LLAMA_REPO="https://github.com/ikawrakow/ik_llama.cpp.git"
|
||||
|
||||
# Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
|
||||
# Requires only: git, network access to the remote.
|
||||
@@ -152,6 +155,24 @@ else
|
||||
echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
|
||||
fi
|
||||
|
||||
# Resolve ik_llama.cpp ref (CUDA only)
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
if [[ -n "${IK_LLAMA_REF:-}" ]]; then
|
||||
IK_LLAMA_HASH=$(resolve_ref "${IK_LLAMA_REPO}" "${IK_LLAMA_REF}") || exit 1
|
||||
echo "ik_llama.cpp: ${IK_LLAMA_REF} -> ${IK_LLAMA_HASH}"
|
||||
else
|
||||
IK_LLAMA_HASH=$(get_latest_hash "${IK_LLAMA_REPO}")
|
||||
if [[ -z "${IK_LLAMA_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for ik_llama.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "ik_llama.cpp: latest HEAD: ${IK_LLAMA_HASH}"
|
||||
fi
|
||||
else
|
||||
IK_LLAMA_HASH="n/a"
|
||||
echo "ik_llama.cpp: skipped (vulkan build)"
|
||||
fi
|
||||
|
||||
# Resolve llama-swap ref
|
||||
if [[ -n "${LS_VERSION:-}" ]]; then
|
||||
LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1
|
||||
@@ -178,6 +199,7 @@ BUILD_ARGS=(
|
||||
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
|
||||
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
|
||||
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
||||
--build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}"
|
||||
--build-arg "LS_VERSION=${LS_HASH}"
|
||||
-t "${DOCKER_IMAGE_TAG}"
|
||||
-f "${SCRIPT_DIR}/Dockerfile"
|
||||
@@ -203,8 +225,13 @@ echo "Verifying build artifacts..."
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
EXPECTED_BINARIES=(llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap)
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
EXPECTED_BINARIES+=(ik-llama-server)
|
||||
fi
|
||||
|
||||
MISSING_BINARIES=()
|
||||
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
|
||||
for binary in "${EXPECTED_BINARIES[@]}"; do
|
||||
if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then
|
||||
MISSING_BINARIES+=("${binary}")
|
||||
fi
|
||||
@@ -221,7 +248,11 @@ if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
|
||||
VERIFIED_LIST="llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
VERIFIED_LIST="${VERIFIED_LIST}, ik-llama-server"
|
||||
fi
|
||||
echo "All expected binaries verified: ${VERIFIED_LIST}"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
@@ -231,10 +262,13 @@ echo ""
|
||||
echo "Image tag: ${DOCKER_IMAGE_TAG}"
|
||||
echo ""
|
||||
echo "Built with:"
|
||||
echo " llama.cpp: ${LLAMA_HASH}"
|
||||
echo " whisper.cpp: ${WHISPER_HASH}"
|
||||
echo " llama.cpp: ${LLAMA_HASH}"
|
||||
echo " whisper.cpp: ${WHISPER_HASH}"
|
||||
echo " stable-diffusion.cpp: ${SD_HASH}"
|
||||
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
echo " ik_llama.cpp: ${IK_LLAMA_HASH}"
|
||||
fi
|
||||
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
||||
echo ""
|
||||
if [[ "$BACKEND" == "vulkan" ]]; then
|
||||
echo "Run with:"
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
# Install ik_llama.cpp - clone, build, and install binaries
|
||||
# Usage: ./install-ik-llama.sh <commit_hash>
|
||||
# Note: CUDA only; always built against builder-base-cuda
|
||||
set -e
|
||||
|
||||
COMMIT_HASH="${1:-main}"
|
||||
|
||||
mkdir -p /install/bin
|
||||
|
||||
# Clone and checkout (init-based so cache-mounted build dir doesn't break clone)
|
||||
echo "=== Cloning ik_llama.cpp at ${COMMIT_HASH} ==="
|
||||
mkdir -p /src/ik_llama.cpp
|
||||
cd /src/ik_llama.cpp
|
||||
if [ ! -d .git ]; then
|
||||
git init
|
||||
git remote add origin https://github.com/ikawrakow/ik_llama.cpp.git
|
||||
fi
|
||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
CMAKE_FLAGS=(
|
||||
-DGGML_NATIVE=OFF
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DGGML_CUDA=ON
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:-60;61;75;86;89}"
|
||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda -Wl,--allow-shlib-undefined"
|
||||
)
|
||||
|
||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
||||
|
||||
echo "=== Building ik_llama.cpp ==="
|
||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
||||
cmake --build build --config Release -j"$(nproc)" --target llama-server
|
||||
|
||||
if [ ! -f "build/bin/llama-server" ]; then
|
||||
echo "FATAL: llama-server not found in build/bin/" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Install as ik-llama-server to avoid collision with llama.cpp's llama-server
|
||||
cp "build/bin/llama-server" "/install/bin/ik-llama-server"
|
||||
echo "=== ik_llama.cpp build complete ==="
|
||||
ls -la /install/bin/
|
||||
Reference in New Issue
Block a user