.github,docker/unified: improve caching and fix bugs (#598)
- set up a GHA scheduled job to build the container nightly - enabling pushing a llama-swap:unified and a llama-swap:unified-Y-M-D image to ghcr.io - tidy up Dockerfile to use a non-root user and llama-swap as an entry point
This commit is contained in:
@@ -1,24 +1,27 @@
|
|||||||
name: Build Unified Docker Image
|
name: Build Unified Docker Image
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: "37 5 * * *"
|
||||||
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
llama_cpp_ref:
|
llama_cpp_ref:
|
||||||
description: "llama.cpp commit hash, tag, or branch"
|
description: "llama.cpp commit hash, tag, or branch"
|
||||||
required: false
|
required: false
|
||||||
default: "b8468"
|
default: "master"
|
||||||
whisper_ref:
|
whisper_ref:
|
||||||
description: "whisper.cpp commit hash, tag, or branch"
|
description: "whisper.cpp commit hash, tag, or branch"
|
||||||
required: false
|
required: false
|
||||||
default: "v1.8.4"
|
default: "master"
|
||||||
sd_ref:
|
sd_ref:
|
||||||
description: "stable-diffusion.cpp commit hash, tag, or branch"
|
description: "stable-diffusion.cpp commit hash, tag, or branch"
|
||||||
required: false
|
required: false
|
||||||
default: "545fac4"
|
default: "master"
|
||||||
llama_swap_version:
|
llama_swap_version:
|
||||||
description: "llama-swap version (e.g. v198, latest)"
|
description: "llama-swap version (e.g. v198, latest, main)"
|
||||||
required: false
|
required: false
|
||||||
default: "v198"
|
default: "main"
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
@@ -50,9 +53,8 @@ jobs:
|
|||||||
if: ${{ !env.ACT }}
|
if: ${{ !env.ACT }}
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
# Disabled until ready to publish
|
|
||||||
- name: Log in to GitHub Container Registry
|
- name: Log in to GitHub Container Registry
|
||||||
if: false
|
if: ${{ !env.ACT }}
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
@@ -61,10 +63,10 @@ jobs:
|
|||||||
|
|
||||||
- name: Build unified Docker image
|
- name: Build unified Docker image
|
||||||
env:
|
env:
|
||||||
LLAMA_REF: ${{ inputs.llama_cpp_ref }}
|
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
|
||||||
WHISPER_REF: ${{ inputs.whisper_ref }}
|
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
|
||||||
SD_REF: ${{ inputs.sd_ref }}
|
SD_REF: ${{ inputs.sd_ref || 'master' }}
|
||||||
LS_VERSION: ${{ inputs.llama_swap_version }}
|
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
|
||||||
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified
|
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified
|
||||||
# When running under act, use the local builder that has warm ccache.
|
# When running under act, use the local builder that has warm ccache.
|
||||||
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
|
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
|
||||||
@@ -74,7 +76,9 @@ jobs:
|
|||||||
chmod +x docker/unified/build-image.sh
|
chmod +x docker/unified/build-image.sh
|
||||||
docker/unified/build-image.sh
|
docker/unified/build-image.sh
|
||||||
|
|
||||||
# Disabled until ready to publish
|
|
||||||
- name: Push to GitHub Container Registry
|
- name: Push to GitHub Container Registry
|
||||||
if: false
|
if: ${{ !env.ACT }}
|
||||||
run: docker push ghcr.io/mostlygeek/llama-swap:unified
|
run: |
|
||||||
|
docker push ghcr.io/mostlygeek/llama-swap:unified
|
||||||
|
docker tag ghcr.io/mostlygeek/llama-swap:unified ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
|
||||||
|
docker push ghcr.io/mostlygeek/llama-swap:unified-$(date -u +%Y-%m-%d)
|
||||||
|
|||||||
@@ -74,6 +74,11 @@ COPY --from=builder-base /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/
|
|||||||
|
|
||||||
RUN pip3 install --no-cache-dir numpy sentencepiece
|
RUN pip3 install --no-cache-dir numpy sentencepiece
|
||||||
|
|
||||||
|
# Create llama-swap user and config directory
|
||||||
|
RUN useradd --system --no-create-home --shell /sbin/nologin llama-swap && \
|
||||||
|
mkdir -p /etc/llama-swap/config && \
|
||||||
|
chown -R llama-swap:llama-swap /etc/llama-swap
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy whisper.cpp binaries and libraries
|
# Copy whisper.cpp binaries and libraries
|
||||||
@@ -97,17 +102,7 @@ COPY --from=llama-swap-download /install/llama-swap-version /tmp/
|
|||||||
|
|
||||||
RUN ldconfig
|
RUN ldconfig
|
||||||
|
|
||||||
# Convenience symlinks
|
COPY config.example.yaml /etc/llama-swap/config/config.yaml
|
||||||
RUN ln -sf /usr/local/bin/llama-cli /usr/local/bin/llama && \
|
|
||||||
ln -sf /usr/local/bin/whisper-cli /usr/local/bin/whisper && \
|
|
||||||
ln -sf /usr/local/bin/sd-cli /usr/local/bin/stable-diffusion
|
|
||||||
|
|
||||||
# Validate all binaries exist
|
|
||||||
RUN set -e && \
|
|
||||||
for bin in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do \
|
|
||||||
test -x /usr/local/bin/$bin || { echo "FATAL: $bin missing from /usr/local/bin"; exit 1; }; \
|
|
||||||
done && \
|
|
||||||
echo "All binaries validated successfully"
|
|
||||||
|
|
||||||
# Version tracking
|
# Version tracking
|
||||||
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
|
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
|
||||||
@@ -118,4 +113,6 @@ RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
|
|||||||
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
|
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
|
||||||
|
|
||||||
WORKDIR /models
|
WORKDIR /models
|
||||||
CMD ["bash"]
|
USER llama-swap
|
||||||
|
ENTRYPOINT ["llama-swap"]
|
||||||
|
CMD ["-config", "/etc/llama-swap/config/config.yaml", "-listen", "0.0.0.0:8080"]
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified}"
|
|||||||
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
|
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
|
||||||
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
|
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
|
||||||
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
|
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
|
||||||
|
LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git"
|
||||||
|
|
||||||
# Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
|
# Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
|
||||||
# Requires only: git, network access to the remote.
|
# Requires only: git, network access to the remote.
|
||||||
@@ -131,9 +132,18 @@ else
|
|||||||
echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
|
echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Resolve llama-swap version
|
# Resolve llama-swap ref
|
||||||
LS_VER="${LS_VERSION:-latest}"
|
if [[ -n "${LS_VERSION:-}" ]]; then
|
||||||
echo "llama-swap: ${LS_VER}"
|
LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1
|
||||||
|
echo "llama-swap: ${LS_VERSION} -> ${LS_HASH}"
|
||||||
|
else
|
||||||
|
LS_HASH=$(get_latest_hash "${LLAMA_SWAP_REPO}")
|
||||||
|
if [[ -z "${LS_HASH}" ]]; then
|
||||||
|
echo "ERROR: Could not determine latest commit for llama-swap" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "llama-swap: latest HEAD: ${LS_HASH}"
|
||||||
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=========================================="
|
echo "=========================================="
|
||||||
@@ -147,7 +157,7 @@ BUILD_ARGS=(
|
|||||||
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
|
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
|
||||||
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
|
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
|
||||||
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
||||||
--build-arg "LS_VERSION=${LS_VER}"
|
--build-arg "LS_VERSION=${LS_HASH}"
|
||||||
-t "${DOCKER_IMAGE_TAG}"
|
-t "${DOCKER_IMAGE_TAG}"
|
||||||
-f "${SCRIPT_DIR}/Dockerfile"
|
-f "${SCRIPT_DIR}/Dockerfile"
|
||||||
)
|
)
|
||||||
@@ -155,6 +165,13 @@ BUILD_ARGS=(
|
|||||||
if [[ "$NO_CACHE" == true ]]; then
|
if [[ "$NO_CACHE" == true ]]; then
|
||||||
BUILD_ARGS+=(--no-cache)
|
BUILD_ARGS+=(--no-cache)
|
||||||
echo "Note: Building without cache"
|
echo "Note: Building without cache"
|
||||||
|
elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then
|
||||||
|
CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-cache"
|
||||||
|
BUILD_ARGS+=(
|
||||||
|
--cache-from "type=registry,ref=${CACHE_REF}"
|
||||||
|
--cache-to "type=registry,ref=${CACHE_REF},mode=max"
|
||||||
|
)
|
||||||
|
echo "Note: Using registry cache (${CACHE_REF})"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"
|
DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"
|
||||||
@@ -167,7 +184,7 @@ echo ""
|
|||||||
|
|
||||||
MISSING_BINARIES=()
|
MISSING_BINARIES=()
|
||||||
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
|
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
|
||||||
if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then
|
if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then
|
||||||
MISSING_BINARIES+=("${binary}")
|
MISSING_BINARIES+=("${binary}")
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
@@ -196,7 +213,7 @@ echo "Built with:"
|
|||||||
echo " llama.cpp: ${LLAMA_HASH}"
|
echo " llama.cpp: ${LLAMA_HASH}"
|
||||||
echo " whisper.cpp: ${WHISPER_HASH}"
|
echo " whisper.cpp: ${WHISPER_HASH}"
|
||||||
echo " stable-diffusion.cpp: ${SD_HASH}"
|
echo " stable-diffusion.cpp: ${SD_HASH}"
|
||||||
echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Run with:"
|
echo "Run with:"
|
||||||
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
|
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
|
||||||
|
|||||||
@@ -0,0 +1,33 @@
|
|||||||
|
# placeholder example configuration
|
||||||
|
healthCheckTimeout: 300
|
||||||
|
logRequests: true
|
||||||
|
|
||||||
|
models:
|
||||||
|
"llama":
|
||||||
|
cmd: >
|
||||||
|
llama-server
|
||||||
|
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
||||||
|
--port ${PORT}
|
||||||
|
|
||||||
|
"whisper":
|
||||||
|
checkEndpoint: /v1/audio/transcriptions/
|
||||||
|
cmd: >
|
||||||
|
whisper-server
|
||||||
|
--port ${PORT}
|
||||||
|
--m /models/whisper.bin
|
||||||
|
--flash-attn
|
||||||
|
--request-path /v1/audio/transcriptions --inference-path ""
|
||||||
|
|
||||||
|
"image":
|
||||||
|
checkEndpoint: /
|
||||||
|
cmd: |
|
||||||
|
/app/sd-server
|
||||||
|
--listen-port 9999
|
||||||
|
--diffusion-fa
|
||||||
|
--diffusion-model /models/z_image_turbo-Q8_0.gguf
|
||||||
|
--vae /models/ae.safetensors
|
||||||
|
--llm /models/qwen3-4b-instruct-2507-q8_0.gguf
|
||||||
|
--offload-to-cpu
|
||||||
|
--cfg-scale 1.0
|
||||||
|
--height 512 --width 512
|
||||||
|
--steps 8
|
||||||
@@ -5,12 +5,27 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
VERSION="${1:-latest}"
|
VERSION="${1:-latest}"
|
||||||
# Strip leading 'v' prefix so both "198" and "v198" work
|
|
||||||
VERSION="${VERSION#v}"
|
|
||||||
REPO="mostlygeek/llama-swap"
|
REPO="mostlygeek/llama-swap"
|
||||||
|
|
||||||
mkdir -p /install/bin
|
mkdir -p /install/bin
|
||||||
|
|
||||||
|
# If a full commit hash is given, find the release tag that points to it
|
||||||
|
if echo "${VERSION}" | grep -qE '^[0-9a-f]{40}$'; then
|
||||||
|
echo "=== Resolving commit ${VERSION:0:7} to release tag ==="
|
||||||
|
TAG=$(git ls-remote --tags "https://github.com/${REPO}.git" 2>/dev/null \
|
||||||
|
| grep "^${VERSION}" | sed 's|.*refs/tags/||' | grep -v '\^{}' | head -1)
|
||||||
|
if [ -n "${TAG}" ]; then
|
||||||
|
echo "Resolved to tag: ${TAG}"
|
||||||
|
VERSION="${TAG#v}"
|
||||||
|
else
|
||||||
|
echo "No release tag found for commit ${VERSION:0:7}, using latest"
|
||||||
|
VERSION="latest"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Strip leading 'v' prefix so both "198" and "v198" work
|
||||||
|
VERSION="${VERSION#v}"
|
||||||
|
|
||||||
# Resolve "latest" to actual version number
|
# Resolve "latest" to actual version number
|
||||||
if [ "$VERSION" = "latest" ]; then
|
if [ "$VERSION" = "latest" ]; then
|
||||||
echo "=== Resolving latest llama-swap release ==="
|
echo "=== Resolving latest llama-swap release ==="
|
||||||
|
|||||||
Reference in New Issue
Block a user