docker/unified: add ik_llama.cpp to CUDA container (#620)
This commit is contained in:
@@ -12,7 +12,7 @@ ARG BACKEND=cuda
|
||||
|
||||
# ── Builder bases ──────────────────────────────────────────────────────
|
||||
|
||||
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS builder-base-cuda
|
||||
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS builder-base-cuda
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
|
||||
@@ -29,7 +29,7 @@ WORKDIR /build
|
||||
|
||||
# ──
|
||||
|
||||
FROM ubuntu:26.04 AS builder-base-vulkan
|
||||
FROM ubuntu:24.04 AS builder-base-vulkan
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CCACHE_DIR=/ccache
|
||||
@@ -78,6 +78,27 @@ RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
||||
--mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \
|
||||
BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"
|
||||
|
||||
# ── Build ik_llama.cpp (CUDA only) ────────────────────────────────────
|
||||
#
|
||||
# Two named stages allow ARG BACKEND to select at build time:
|
||||
# - ik-llama-cuda : real build (from builder-base-cuda)
|
||||
# - ik-llama-vulkan: no-op (empty /install/bin, skips CUDA pull entirely)
|
||||
# BuildKit only evaluates the selected branch, so vulkan builds never
|
||||
# pull nvidia/cuda:*-devel or compile ik_llama.cpp.
|
||||
|
||||
FROM builder-base-vulkan AS ik-llama-vulkan
|
||||
RUN mkdir -p /install/bin
|
||||
|
||||
FROM builder-base-cuda AS ik-llama-cuda
|
||||
ARG IK_LLAMA_COMMIT_HASH=main
|
||||
COPY install-ik-llama.sh /build/
|
||||
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
|
||||
--mount=type=cache,id=ik-llama-cuda,target=/src/ik_llama.cpp/build \
|
||||
bash /build/install-ik-llama.sh "${IK_LLAMA_COMMIT_HASH}"
|
||||
|
||||
ARG BACKEND=cuda
|
||||
FROM ik-llama-${BACKEND} AS ik-llama-build
|
||||
|
||||
# ── Download llama-swap release binary ────────────────────────────────
|
||||
|
||||
FROM builder-base AS llama-swap-download
|
||||
@@ -87,14 +108,14 @@ RUN bash /build/install-llama-swap.sh "${LS_VERSION}"
|
||||
|
||||
# ── Runtime bases ─────────────────────────────────────────────────────
|
||||
|
||||
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS runtime-cuda
|
||||
FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
|
||||
ENV PATH="/usr/local/bin:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgomp1 python3 curl ca-certificates git \
|
||||
libgomp1 python3 curl ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# CUDA stub drivers for container compatibility
|
||||
@@ -103,14 +124,14 @@ COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/
|
||||
|
||||
# ──
|
||||
|
||||
FROM ubuntu:26.04 AS runtime-vulkan
|
||||
FROM ubuntu:24.04 AS runtime-vulkan
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PATH="/usr/local/bin:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgomp1 libvulkan1 mesa-vulkan-drivers \
|
||||
python3 curl ca-certificates git \
|
||||
python3 curl ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ── Select runtime base by BACKEND ────────────────────────────────────
|
||||
@@ -121,6 +142,7 @@ ARG BACKEND=cuda
|
||||
ARG LLAMA_COMMIT_HASH=unknown
|
||||
ARG WHISPER_COMMIT_HASH=unknown
|
||||
ARG SD_COMMIT_HASH=unknown
|
||||
ARG IK_LLAMA_COMMIT_HASH=unknown
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3-numpy python3-sentencepiece \
|
||||
@@ -147,6 +169,9 @@ COPY --from=sd-build /install/lib/ /usr/local/lib/
|
||||
COPY --from=llama-build /install/bin/llama-server /usr/local/bin/
|
||||
COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
|
||||
|
||||
# Copy ik-llama-server (CUDA only; empty copy for vulkan)
|
||||
COPY --from=ik-llama-build /install/bin/ /usr/local/bin/
|
||||
|
||||
# Copy llama-swap binary
|
||||
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
|
||||
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
|
||||
@@ -159,6 +184,7 @@ COPY config.example.yaml /etc/llama-swap/config/config.yaml
|
||||
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
|
||||
echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "ik_llama.cpp: ${IK_LLAMA_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
|
||||
echo "backend: ${BACKEND}" >> /versions.txt && \
|
||||
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
|
||||
|
||||
Reference in New Issue
Block a user