docker/unified: add ik_llama.cpp to CUDA container (#620)

This commit is contained in:
Benson Wong
2026-04-03 15:16:30 +08:00
committed by GitHub
parent 955900972a
commit 1dd1aadf93
4 changed files with 124 additions and 11 deletions
+32 -6
View File
@@ -12,7 +12,7 @@ ARG BACKEND=cuda
# ── Builder bases ──────────────────────────────────────────────────────
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base-cuda
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS builder-base-cuda
ENV DEBIAN_FRONTEND=noninteractive
ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
@@ -29,7 +29,7 @@ WORKDIR /build
# ──
FROM ubuntu:26.04 AS builder-base-vulkan
FROM ubuntu:24.04 AS builder-base-vulkan
ENV DEBIAN_FRONTEND=noninteractive
ENV CCACHE_DIR=/ccache
@@ -78,6 +78,27 @@ RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
--mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \
BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"
# ── Build ik_llama.cpp (CUDA only) ────────────────────────────────────
#
# Two named stages allow ARG BACKEND to select at build time:
# - ik-llama-cuda : real build (from builder-base-cuda)
# - ik-llama-vulkan: no-op (empty /install/bin, skips CUDA pull entirely)
# BuildKit only evaluates the selected branch, so vulkan builds never
# pull nvidia/cuda:*-devel or compile ik_llama.cpp.
FROM builder-base-vulkan AS ik-llama-vulkan
RUN mkdir -p /install/bin
FROM builder-base-cuda AS ik-llama-cuda
ARG IK_LLAMA_COMMIT_HASH=main
COPY install-ik-llama.sh /build/
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
--mount=type=cache,id=ik-llama-cuda,target=/src/ik_llama.cpp/build \
bash /build/install-ik-llama.sh "${IK_LLAMA_COMMIT_HASH}"
ARG BACKEND=cuda
FROM ik-llama-${BACKEND} AS ik-llama-build
# ── Download llama-swap release binary ────────────────────────────────
FROM builder-base AS llama-swap-download
@@ -87,14 +108,14 @@ RUN bash /build/install-llama-swap.sh "${LS_VERSION}"
# ── Runtime bases ─────────────────────────────────────────────────────
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda
FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda
ENV DEBIAN_FRONTEND=noninteractive
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV PATH="/usr/local/bin:${PATH}"
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 python3 curl ca-certificates git \
libgomp1 python3 curl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# CUDA stub drivers for container compatibility
@@ -103,14 +124,14 @@ COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/
# ──
FROM ubuntu:26.04 AS runtime-vulkan
FROM ubuntu:24.04 AS runtime-vulkan
ENV DEBIAN_FRONTEND=noninteractive
ENV PATH="/usr/local/bin:${PATH}"
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 libvulkan1 mesa-vulkan-drivers \
python3 curl ca-certificates git \
python3 curl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# ── Select runtime base by BACKEND ────────────────────────────────────
@@ -121,6 +142,7 @@ ARG BACKEND=cuda
ARG LLAMA_COMMIT_HASH=unknown
ARG WHISPER_COMMIT_HASH=unknown
ARG SD_COMMIT_HASH=unknown
ARG IK_LLAMA_COMMIT_HASH=unknown
RUN apt-get update && apt-get install -y --no-install-recommends \
python3-numpy python3-sentencepiece \
@@ -147,6 +169,9 @@ COPY --from=sd-build /install/lib/ /usr/local/lib/
COPY --from=llama-build /install/bin/llama-server /usr/local/bin/
COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
# Copy ik-llama-server (CUDA only; empty copy for vulkan)
COPY --from=ik-llama-build /install/bin/ /usr/local/bin/
# Copy llama-swap binary
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
@@ -159,6 +184,7 @@ COPY config.example.yaml /etc/llama-swap/config/config.yaml
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
echo "ik_llama.cpp: ${IK_LLAMA_COMMIT_HASH}" >> /versions.txt && \
echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
echo "backend: ${BACKEND}" >> /versions.txt && \
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt