Improve container build reliability (#457)
* docker: add .env usage in build-container.sh * .github,docker: add rocm, improve logging * .github,CLAUDE.md: fix workflow and update guidelines Update containers workflow to only push images when triggered manually or on schedule, not on workflow file changes. - add push trigger for workflow file changes in containers.yml - update push condition to skip on regular push events - update CLAUDE.md commit message guidelines * docker: remove comma in build-container.sh * .github,docker: improve container build workflow Add pagination support for fetching llama.cpp tags and improve debugging. - add build-container.sh to workflow trigger paths - implement fetch_llama_tag() with pagination support - replace .env with local testing instructions - add DEBUG_ABORT_BUILD flag for testing
This commit is contained in:
@@ -10,12 +10,18 @@ on:
|
|||||||
# Allows manual triggering of the workflow
|
# Allows manual triggering of the workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
|
# Run on workflow file changes (without pushing)
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/containers.yml'
|
||||||
|
- 'docker/build-container.sh'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-and-push:
|
build-and-push:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
platform: [intel, cuda, vulkan, cpu, musa]
|
platform: [intel, cuda, vulkan, cpu, musa, rocm]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
@@ -31,7 +37,7 @@ jobs:
|
|||||||
- name: Run build-container
|
- name: Run build-container
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
run: ./docker/build-container.sh ${{ matrix.platform }} true
|
run: ./docker/build-container.sh ${{ matrix.platform }} ${{ github.event_name != 'push' }}
|
||||||
|
|
||||||
# note make sure mostlygeek/llama-swap has admin rights to the llama-swap package
|
# note make sure mostlygeek/llama-swap has admin rights to the llama-swap package
|
||||||
# see: https://github.com/actions/delete-package-versions/issues/74
|
# see: https://github.com/actions/delete-package-versions/issues/74
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
|
|||||||
- when summarizing changes only include details that require further action
|
- when summarizing changes only include details that require further action
|
||||||
- just say "Done." when there is no further action
|
- just say "Done." when there is no further action
|
||||||
- use `gh` to create PRs and load issues
|
- use `gh` to create PRs and load issues
|
||||||
- do not mention "created by claude" in commit messages
|
- do include Co-Authored-By or created by when committing changes or creating PRs
|
||||||
- keep PR descriptions short and focused on changes.
|
- keep PR descriptions short and focused on changes.
|
||||||
- never include a test plan
|
- never include a test plan
|
||||||
|
|
||||||
|
|||||||
+79
-14
@@ -2,21 +2,37 @@
|
|||||||
|
|
||||||
cd $(dirname "$0")
|
cd $(dirname "$0")
|
||||||
|
|
||||||
|
# use this to test locally, example:
|
||||||
|
# GITHUB_TOKEN=$(gh auth token) LOG_DEBUG=1 DEBUG_ABORT_BUILD=1 ./docker/build-container.sh rocm
|
||||||
|
# you need read:package scope on the token. Generate a personal access token with
|
||||||
|
# the scopes: gist, read:org, repo, write:packages
|
||||||
|
# then: gh auth login (and copy/paste the new token)
|
||||||
|
|
||||||
|
log_debug() {
|
||||||
|
if [ "$LOG_DEBUG" = "1" ]; then
|
||||||
|
echo "[DEBUG] $*"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
log_info() {
|
||||||
|
echo "[INFO] $*"
|
||||||
|
}
|
||||||
|
|
||||||
ARCH=$1
|
ARCH=$1
|
||||||
PUSH_IMAGES=${2:-false}
|
PUSH_IMAGES=${2:-false}
|
||||||
|
|
||||||
# List of allowed architectures
|
# List of allowed architectures
|
||||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu")
|
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm")
|
||||||
|
|
||||||
# Check if ARCH is in the allowed list
|
# Check if ARCH is in the allowed list
|
||||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||||
echo "Error: ARCH must be one of the following: ${ALLOWED_ARCHS[@]}"
|
log_info "Error: ARCH must be one of the following: ${ALLOWED_ARCHS[@]}"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if GITHUB_TOKEN is set and not empty
|
# Check if GITHUB_TOKEN is set and not empty
|
||||||
if [[ -z "$GITHUB_TOKEN" ]]; then
|
if [[ -z "$GITHUB_TOKEN" ]]; then
|
||||||
echo "Error: GITHUB_TOKEN is not set or is empty."
|
log_info "Error: GITHUB_TOKEN is not set or is empty."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -32,25 +48,74 @@ LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap}
|
|||||||
# have to strip out the 'v' due to .tar.gz file naming
|
# have to strip out the 'v' due to .tar.gz file naming
|
||||||
LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//')
|
LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//')
|
||||||
|
|
||||||
|
# Fetches the most recent llama.cpp tag matching the given prefix
|
||||||
|
# Handles pagination to search beyond the first 100 results
|
||||||
|
# $1 - tag_prefix (e.g., "server" or "server-vulkan")
|
||||||
|
# Returns: the version number extracted from the tag
|
||||||
|
fetch_llama_tag() {
|
||||||
|
local tag_prefix=$1
|
||||||
|
local page=1
|
||||||
|
local per_page=100
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
log_debug "Fetching page $page for tag prefix: $tag_prefix"
|
||||||
|
|
||||||
|
local response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
||||||
|
"https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions?per_page=${per_page}&page=${page}")
|
||||||
|
|
||||||
|
# Check for API errors
|
||||||
|
if echo "$response" | jq -e '.message' > /dev/null 2>&1; then
|
||||||
|
local error_msg=$(echo "$response" | jq -r '.message')
|
||||||
|
log_info "GitHub API error: $error_msg"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if response is empty array (no more pages)
|
||||||
|
if [ "$(echo "$response" | jq 'length')" -eq 0 ]; then
|
||||||
|
log_debug "No more pages (empty response)"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract matching tag from this page
|
||||||
|
local found_tag=$(echo "$response" | jq -r \
|
||||||
|
".[] | select(.metadata.container.tags[]? | startswith(\"$tag_prefix\")) | .metadata.container.tags[] | select(startswith(\"$tag_prefix\"))" \
|
||||||
|
| sort -r | head -n1)
|
||||||
|
|
||||||
|
if [ -n "$found_tag" ]; then
|
||||||
|
log_debug "Found tag: $found_tag on page $page"
|
||||||
|
echo "$found_tag" | awk -F '-' '{print $NF}'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
page=$((page + 1))
|
||||||
|
|
||||||
|
# Safety limit to prevent infinite loops
|
||||||
|
if [ $page -gt 50 ]; then
|
||||||
|
log_info "Reached pagination safety limit (50 pages)"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
if [ "$ARCH" == "cpu" ]; then
|
if [ "$ARCH" == "cpu" ]; then
|
||||||
# cpu only containers just use the server tag
|
LCPP_TAG=$(fetch_llama_tag "server")
|
||||||
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
|
||||||
"https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions" \
|
|
||||||
| jq -r '.[] | select(.metadata.container.tags[] | startswith("server")) | .metadata.container.tags[]' \
|
|
||||||
| sort -r | head -n1 | awk -F '-' '{print $3}')
|
|
||||||
BASE_TAG=server-${LCPP_TAG}
|
BASE_TAG=server-${LCPP_TAG}
|
||||||
else
|
else
|
||||||
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
LCPP_TAG=$(fetch_llama_tag "server-${ARCH}")
|
||||||
"https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions" \
|
|
||||||
| jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \
|
|
||||||
| sort -r | head -n1 | awk -F '-' '{print $3}')
|
|
||||||
BASE_TAG=server-${ARCH}-${LCPP_TAG}
|
BASE_TAG=server-${ARCH}-${LCPP_TAG}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Abort if LCPP_TAG is empty.
|
# Abort if LCPP_TAG is empty.
|
||||||
if [[ -z "$LCPP_TAG" ]]; then
|
if [[ -z "$LCPP_TAG" ]]; then
|
||||||
echo "Abort: Could not find llama-server container for arch: $ARCH"
|
log_info "Abort: Could not find llama-server container for arch: $ARCH"
|
||||||
exit 1
|
exit 1
|
||||||
|
else
|
||||||
|
log_info "LCPP_TAG: $LCPP_TAG"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -z "$DEBUG_ABORT_BUILD" ]]; then
|
||||||
|
log_info "Abort: DEBUG_ABORT_BUILD set"
|
||||||
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for CONTAINER_TYPE in non-root root; do
|
for CONTAINER_TYPE in non-root root; do
|
||||||
@@ -68,7 +133,7 @@ for CONTAINER_TYPE in non-root root; do
|
|||||||
USER_HOME=/app
|
USER_HOME=/app
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
||||||
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||||
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||||
|
|||||||
Reference in New Issue
Block a user