Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d87f0ce2c5 | |||
| 06bc6a614c | |||
| a37b4866d8 | |||
| 981910d734 | |||
| a185efe37e | |||
| 1dd1aadf93 | |||
| 955900972a | |||
| c2c8cfaf81 | |||
| 1e440770ea | |||
| c794273c83 | |||
| 6574a52cbb | |||
| 8fabc75634 | |||
| e5e7391b6d | |||
| 2c282dccad | |||
| 916d13f5bd | |||
| a3725e7d09 | |||
| 15bd55d3a9 | |||
| c3c258a55d | |||
| 29a38fde0d | |||
| d569681daa | |||
| 24efdb76b1 | |||
| cc77139ff8 | |||
| 390a35bf93 | |||
| 181f71ca11 |
@@ -4,11 +4,15 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "config-schema.json"
|
||||
- "config.example.yaml"
|
||||
- ".github/workflows/config-schema.yml"
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "config-schema.json"
|
||||
- "config.example.yaml"
|
||||
- ".github/workflows/config-schema.yml"
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -39,3 +43,14 @@ jobs:
|
||||
fi
|
||||
|
||||
echo "✓ config-schema.json is valid"
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.x"
|
||||
|
||||
- name: Install check-jsonschema
|
||||
run: pip install check-jsonschema
|
||||
|
||||
- name: Validate config.example.yaml against schema
|
||||
run: check-jsonschema --schemafile config-schema.json config.example.yaml
|
||||
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [intel, cuda, vulkan, cpu, musa, rocm]
|
||||
platform: [intel, cuda, cuda13, vulkan, cpu, musa, rocm]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '1.23'
|
||||
go-version-file: go.mod
|
||||
|
||||
# Only run in this linux based runner
|
||||
- name: Check Formatting
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: ./build
|
||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
||||
|
||||
# necessary for testing proxy/Process swapping
|
||||
- name: Create simple-responder
|
||||
@@ -67,4 +67,4 @@ jobs:
|
||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
|
||||
|
||||
- name: Test all
|
||||
run: make test-all
|
||||
run: make test-all
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
name: Build Unified Docker Image
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "37 5 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
llama_cpp_ref:
|
||||
description: "llama.cpp commit hash, tag, or branch"
|
||||
required: false
|
||||
default: "master"
|
||||
whisper_ref:
|
||||
description: "whisper.cpp commit hash, tag, or branch"
|
||||
required: false
|
||||
default: "master"
|
||||
sd_ref:
|
||||
description: "stable-diffusion.cpp commit hash, tag, or branch"
|
||||
required: false
|
||||
default: "master"
|
||||
ik_llama_ref:
|
||||
description: "ik_llama.cpp commit hash, tag, or branch (CUDA only)"
|
||||
required: false
|
||||
default: "main"
|
||||
llama_swap_version:
|
||||
description: "llama-swap version (e.g. v198, latest, main)"
|
||||
required: false
|
||||
default: "main"
|
||||
build_cuda:
|
||||
description: "Build CUDA image"
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
build_vulkan:
|
||||
description: "Build Vulkan image"
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- id: set-matrix
|
||||
run: |
|
||||
backends=()
|
||||
# schedule uses defaults (build both); workflow_dispatch respects inputs
|
||||
if [[ "${{ github.event_name }}" == "schedule" ]] || [[ "${{ inputs.build_cuda }}" == "true" ]]; then
|
||||
backends+=("cuda")
|
||||
fi
|
||||
if [[ "${{ github.event_name }}" == "schedule" ]] || [[ "${{ inputs.build_vulkan }}" == "true" ]]; then
|
||||
backends+=("vulkan")
|
||||
fi
|
||||
matrix=$(printf '%s\n' "${backends[@]}" | jq -R . | jq -sc .)
|
||||
echo "matrix=$matrix" >> $GITHUB_OUTPUT
|
||||
|
||||
build:
|
||||
needs: setup
|
||||
if: ${{ needs.setup.outputs.matrix != '[]' }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
backend: ${{ fromJSON(needs.setup.outputs.matrix) }}
|
||||
variant:
|
||||
- name: root
|
||||
uid: "0"
|
||||
suffix: ""
|
||||
- name: rootless
|
||||
uid: "10001"
|
||||
suffix: "-rootless"
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free up disk space
|
||||
run: |
|
||||
echo "Before cleanup:"
|
||||
df -h
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /opt/ghc
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||
sudo docker system prune -af
|
||||
echo "After cleanup:"
|
||||
df -h
|
||||
|
||||
# On GitHub Actions runners, create a fresh builder.
|
||||
# When running locally under act, skip this and reuse the existing
|
||||
# llama-swap-builder (which has ccache warm) to avoid exhausting disk.
|
||||
- name: Set up Docker Buildx
|
||||
if: ${{ !env.ACT }}
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
if: ${{ !env.ACT }}
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build unified Docker image (${{ matrix.backend }}, ${{ matrix.variant.name }})
|
||||
env:
|
||||
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
|
||||
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
|
||||
SD_REF: ${{ inputs.sd_ref || 'master' }}
|
||||
IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }}
|
||||
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
|
||||
RUN_UID: ${{ matrix.variant.uid }}
|
||||
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}
|
||||
# When running under act, use the local builder that has warm ccache.
|
||||
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
|
||||
# created by setup-buildx-action above.
|
||||
BUILDX_BUILDER: ${{ env.ACT == 'true' && 'llama-swap-builder' || '' }}
|
||||
run: |
|
||||
chmod +x docker/unified/build-image.sh
|
||||
docker/unified/build-image.sh --${{ matrix.backend }}
|
||||
|
||||
- name: Push to GitHub Container Registry
|
||||
if: ${{ !env.ACT }}
|
||||
run: |
|
||||
TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}"
|
||||
docker push "${TAG}"
|
||||
DATE_TAG=$(date -u +%Y-%m-%d)
|
||||
docker tag "${TAG}" "${TAG}-${DATE_TAG}"
|
||||
docker push "${TAG}-${DATE_TAG}"
|
||||
@@ -21,6 +21,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
|
||||
|
||||
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
||||
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
||||
- Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
|
||||
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ mac: ui
|
||||
linux: ui
|
||||
@echo "Building Linux binary..."
|
||||
GOOS=linux GOARCH=amd64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
|
||||
GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
|
||||
#GOOS=linux GOARCH=arm64 go build -ldflags="-X main.commit=${GIT_HASH} -X main.version=local_${GIT_HASH} -X main.date=${BUILD_DATE}" -o $(BUILD_DIR)/$(APP_NAME)-linux-arm64
|
||||
|
||||
# Build Windows binary
|
||||
windows: ui
|
||||
|
||||
@@ -32,6 +32,10 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
||||
- `v1/rerank`, `v1/reranking`, `/rerank`
|
||||
- `/infill` - for code infilling
|
||||
- `/completion` - for completion endpoint
|
||||
- ✅ SDAPI via [stable-diffusion.cpp's server](https://github.com/leejet/stable-diffusion.cpp/tree/master/examples/server)
|
||||
- `/sdapi/v1/txt2img`
|
||||
- `/sdapi/v1/img2img`
|
||||
- `/sdapi/v1/loras` - requires `model` in request body to fetch the correct loras
|
||||
- ✅ llama-swap API
|
||||
- `/ui` - web UI
|
||||
- `/upstream/:model_id` - direct access to upstream server ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
||||
|
||||
@@ -274,6 +274,43 @@ func main() {
|
||||
c.String(200, fmt.Sprintf("%s %s", c.Request.Method, c.Request.URL.Path))
|
||||
})
|
||||
|
||||
// SD API endpoints
|
||||
r.POST("/sdapi/v1/txt2img", func(c *gin.Context) {
|
||||
body, err := io.ReadAll(c.Request.Body)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read request body"})
|
||||
return
|
||||
}
|
||||
defer c.Request.Body.Close()
|
||||
|
||||
modelName := gjson.GetBytes(body, "model").String()
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"model": modelName,
|
||||
"images": []string{},
|
||||
})
|
||||
})
|
||||
|
||||
r.POST("/sdapi/v1/img2img", func(c *gin.Context) {
|
||||
body, err := io.ReadAll(c.Request.Body)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read request body"})
|
||||
return
|
||||
}
|
||||
defer c.Request.Body.Close()
|
||||
|
||||
modelName := gjson.GetBytes(body, "model").String()
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"model": modelName,
|
||||
"images": []string{},
|
||||
})
|
||||
})
|
||||
|
||||
r.GET("/sdapi/v1/loras", func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"loras": []string{},
|
||||
})
|
||||
})
|
||||
|
||||
address := "127.0.0.1:" + *port // Address with the specified port
|
||||
|
||||
srv := &http.Server{
|
||||
|
||||
+81
-4
@@ -39,6 +39,43 @@
|
||||
},
|
||||
"default": {},
|
||||
"description": "A dictionary of string substitutions. Macros are reusable snippets used in model cmd, cmdStop, proxy, checkEndpoint, filters.stripParams. Macro names must be <64 chars, match ^[a-zA-Z0-9_-]+$, and not be PORT or MODEL_ID. Values can be string, number, or boolean. Macros can reference other macros defined before them."
|
||||
},
|
||||
"timeouts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"connect": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 30,
|
||||
"description": "TCP connection timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
},
|
||||
"responseHeader": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 60,
|
||||
"description": "Time to wait for response headers in seconds. Set to 0 to disable (not recommended)."
|
||||
},
|
||||
"tlsHandshake": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 10,
|
||||
"description": "TLS handshake timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
},
|
||||
"expectContinue": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 1,
|
||||
"description": "Expect-Continue timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
},
|
||||
"idleConn": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 90,
|
||||
"description": "Idle connection timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "Timeout settings for proxy connections."
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
@@ -48,6 +85,12 @@
|
||||
"default": 120,
|
||||
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
||||
},
|
||||
"globalTTL": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Default TTL for all models in seconds, 0 means no TTL and models will never be automatically unloaded"
|
||||
},
|
||||
"logLevel": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
@@ -177,9 +220,9 @@
|
||||
},
|
||||
"ttl": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Automatically unload the model after ttl seconds. 0 disables unloading. Must be >0 to enable."
|
||||
"minimum": -1,
|
||||
"default": -1,
|
||||
"description": "Automatically unload the model after ttl seconds. -1 uses the global TTL value, 0 disables unloading. Must be >0 to enable."
|
||||
},
|
||||
"useModelName": {
|
||||
"type": "string",
|
||||
@@ -235,6 +278,9 @@
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
|
||||
},
|
||||
"timeouts": {
|
||||
"$ref": "#/definitions/timeouts"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -361,6 +407,37 @@
|
||||
"additionalProperties": false,
|
||||
"default": {},
|
||||
"description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
|
||||
},
|
||||
"timeouts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"connect": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 30,
|
||||
"description": "TCP connection timeout in seconds."
|
||||
},
|
||||
"responseHeader": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 60,
|
||||
"description": "Time to wait for response headers in seconds."
|
||||
},
|
||||
"tlsHandshake": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 10,
|
||||
"description": "TLS handshake timeout in seconds."
|
||||
},
|
||||
"idleConn": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 90,
|
||||
"description": "Idle connection timeout in seconds."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "Timeout settings for proxy connections to this peer."
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -368,4 +445,4 @@
|
||||
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
+34
-2
@@ -75,6 +75,11 @@ sendLoadingState: true
|
||||
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
||||
includeAliasesInList: false
|
||||
|
||||
# globalTTL: the default TTL in seconds before unloading a model
|
||||
# - optional, default: 0 (never automatically unload)
|
||||
# - must be >= 0
|
||||
globalTTL: 0
|
||||
|
||||
# macros: a dictionary of string substitutions
|
||||
# - optional, default: empty dictionary
|
||||
# - macros are reusable snippets
|
||||
@@ -180,8 +185,10 @@ models:
|
||||
checkEndpoint: /custom-endpoint
|
||||
|
||||
# ttl: automatically unload the model after ttl seconds
|
||||
# - optional, default: 0
|
||||
# - ttl values must be a value greater than 0
|
||||
# - optional, default: -1 (use global default)
|
||||
# - ttl values must be a value greater than or equal to 0
|
||||
# - a ttl of -1 will use the global TTL value as the default
|
||||
# - a ttl of 0 will mean never unload
|
||||
# - a value of 0 disables automatic unloading of the model
|
||||
ttl: 60
|
||||
|
||||
@@ -277,6 +284,21 @@ models:
|
||||
# - optional, default: undefined (use global setting)
|
||||
sendLoadingState: false
|
||||
|
||||
# timeouts: configure proxy connection timeouts for this model
|
||||
# - optional, defaults shown below
|
||||
# - useful for models running on slower hardware that need longer timeouts
|
||||
# - connect: TCP connection timeout in seconds
|
||||
# - responseHeader: time to wait for response headers in seconds
|
||||
# (increasing this helps avoid 502 errors on slow hardware)
|
||||
# - tlsHandshake: TLS handshake timeout in seconds
|
||||
# - idleConn: idle connection timeout in seconds
|
||||
# - set any value to 0 to disable that timeout (not recommended)
|
||||
timeouts:
|
||||
connect: 30
|
||||
responseHeader: 60
|
||||
tlsHandshake: 10
|
||||
idleConn: 90
|
||||
|
||||
# Unlisted model example:
|
||||
"qwen-unlisted":
|
||||
# unlisted: boolean, true or false
|
||||
@@ -419,6 +441,16 @@ peers:
|
||||
- z-ai/glm-4.7
|
||||
- moonshotai/kimi-k2-0905
|
||||
- minimax/minimax-m2.1
|
||||
# timeouts: configure proxy connection timeouts for this peer
|
||||
# - optional, defaults shown below
|
||||
# - useful when the peer runs on slower hardware
|
||||
# - set any value to 0 to disable that timeout (not recommended)
|
||||
timeouts:
|
||||
connect: 30
|
||||
responseHeader: 60
|
||||
tlsHandshake: 10
|
||||
idleConn: 90
|
||||
|
||||
# filters: a dictionary of filter settings for peer requests
|
||||
# - optional, default: empty dictionary
|
||||
# - same capabilities as model filters (stripParams, setParams)
|
||||
|
||||
@@ -27,7 +27,7 @@ ARCH=$1
|
||||
PUSH_IMAGES=${2:-false}
|
||||
|
||||
# List of allowed architectures
|
||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm")
|
||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cuda13" "cpu" "rocm")
|
||||
|
||||
# Check if ARCH is in the allowed list
|
||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||
|
||||
Executable
+305
@@ -0,0 +1,305 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Build script for llama-swap-docker with commit hash pinning
|
||||
#
|
||||
# Usage:
|
||||
# ./build-image.sh --cuda # Build CUDA image
|
||||
# ./build-image.sh --vulkan # Build Vulkan image
|
||||
# ./build-image.sh --cuda --no-cache # Build CUDA image without cache
|
||||
# LLAMA_COMMIT_HASH=abc123 ./build-image.sh --cuda # Override llama.cpp commit
|
||||
# LLAMA_COMMIT_HASH=b8429 ./build-image.sh --vulkan # Override llama.cpp release tag (vulkan uses prebuilt binaries)
|
||||
# WHISPER_COMMIT_HASH=def456 ./build-image.sh --vulkan # Override whisper.cpp commit
|
||||
# SD_COMMIT_HASH=ghi789 ./build-image.sh --cuda # Override stable-diffusion.cpp commit
|
||||
#
|
||||
# Features:
|
||||
# - Auto-detects latest commit hashes from git repos
|
||||
# - Builds llama-swap from local source code
|
||||
# - Allows environment variable overrides for reproducible builds
|
||||
# - Cache-friendly: changing commit hash busts cache appropriately
|
||||
# - Supports both CUDA and Vulkan backends (requires explicit flag)
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Parse command line arguments
|
||||
BACKEND=""
|
||||
NO_CACHE=false
|
||||
|
||||
if [[ $# -eq 0 ]]; then
|
||||
echo "Error: No backend specified. Please use --cuda or --vulkan."
|
||||
echo ""
|
||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --cuda Build CUDA image (NVIDIA GPUs)"
|
||||
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
|
||||
echo " --no-cache Force rebuild without using Docker cache"
|
||||
echo " --help, -h Show this help message"
|
||||
echo ""
|
||||
echo "Environment variables:"
|
||||
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
|
||||
echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash"
|
||||
echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash"
|
||||
echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--cuda)
|
||||
BACKEND="cuda"
|
||||
;;
|
||||
--vulkan)
|
||||
BACKEND="vulkan"
|
||||
;;
|
||||
--no-cache)
|
||||
NO_CACHE=true
|
||||
;;
|
||||
--help|-h)
|
||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --cuda Build CUDA image (NVIDIA GPUs)"
|
||||
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
|
||||
echo " --no-cache Force rebuild without using Docker cache"
|
||||
echo " --help, -h Show this help message"
|
||||
echo ""
|
||||
echo "Environment variables:"
|
||||
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:cuda or llama-swap:vulkan)"
|
||||
echo " LLAMA_COMMIT_HASH Override llama.cpp commit hash"
|
||||
echo " WHISPER_COMMIT_HASH Override whisper.cpp commit hash"
|
||||
echo " SD_COMMIT_HASH Override stable-diffusion.cpp commit hash"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate backend selection
|
||||
if [[ -z "$BACKEND" ]]; then
|
||||
echo "Error: No backend specified. Please use --cuda or --vulkan."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configuration
|
||||
if [[ -n "${DOCKER_IMAGE_TAG:-}" ]]; then
|
||||
# User provided a custom tag, use it as-is
|
||||
:
|
||||
elif [[ "$BACKEND" == "vulkan" ]]; then
|
||||
DOCKER_IMAGE_TAG="llama-swap:vulkan"
|
||||
else
|
||||
DOCKER_IMAGE_TAG="llama-swap:cuda"
|
||||
fi
|
||||
DOCKER_BUILDKIT="${DOCKER_BUILDKIT:-1}"
|
||||
|
||||
# Single unified Dockerfile, backend selected via build arg
|
||||
DOCKERFILE="Dockerfile"
|
||||
if [[ "$BACKEND" == "vulkan" ]]; then
|
||||
echo "Building for: Vulkan (AMD GPUs and compatible hardware)"
|
||||
else
|
||||
echo "Building for: CUDA (NVIDIA GPUs)"
|
||||
fi
|
||||
|
||||
# Git repository URLs
|
||||
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
|
||||
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
|
||||
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
|
||||
|
||||
# Function to get the latest commit hash from a git repo's default branch
|
||||
get_latest_commit() {
|
||||
local repo_url="$1"
|
||||
local branch="${2:-master}"
|
||||
|
||||
# Try to get the latest commit hash for the specified branch
|
||||
git ls-remote --heads "${repo_url}" "${branch}" 2>/dev/null | head -1 | cut -f1
|
||||
}
|
||||
|
||||
# Function to get the default branch name (master or main)
|
||||
get_default_branch() {
|
||||
local repo_url="$1"
|
||||
|
||||
# Check for master first
|
||||
if git ls-remote --heads "${repo_url}" master &>/dev/null; then
|
||||
echo "master"
|
||||
elif git ls-remote --heads "${repo_url}" main &>/dev/null; then
|
||||
echo "main"
|
||||
else
|
||||
echo "master" # fallback
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to get the latest release tag from a GitHub repo
|
||||
get_latest_release_tag() {
|
||||
local owner_repo="$1"
|
||||
curl -fsSL "https://api.github.com/repos/${owner_repo}/releases/latest" \
|
||||
| grep '"tag_name"' | head -1 | cut -d'"' -f4
|
||||
}
|
||||
|
||||
echo "=========================================="
|
||||
echo "llama-swap-docker Build Script"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Determine commit hashes / release tags - use env vars or auto-detect
|
||||
# For vulkan builds, llama and sd use GitHub release tags (prebuilt binaries).
|
||||
# For cuda builds (or whisper on any backend), use git commit hashes.
|
||||
if [[ -n "${LLAMA_COMMIT_HASH:-}" ]]; then
|
||||
LLAMA_HASH="${LLAMA_COMMIT_HASH}"
|
||||
echo "llama.cpp: Using provided version: ${LLAMA_HASH}"
|
||||
elif [[ "$BACKEND" == "vulkan" ]]; then
|
||||
LLAMA_HASH=$(get_latest_release_tag "ggml-org/llama.cpp")
|
||||
if [[ -z "${LLAMA_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest release tag for llama.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "llama.cpp: Auto-detected latest release tag: ${LLAMA_HASH}"
|
||||
else
|
||||
LLAMA_BRANCH=$(get_default_branch "${LLAMA_REPO}")
|
||||
LLAMA_HASH=$(get_latest_commit "${LLAMA_REPO}" "${LLAMA_BRANCH}")
|
||||
if [[ -z "${LLAMA_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for llama.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "llama.cpp: Auto-detected latest commit (${LLAMA_BRANCH}): ${LLAMA_HASH}"
|
||||
fi
|
||||
|
||||
if [[ -n "${WHISPER_COMMIT_HASH:-}" ]]; then
|
||||
WHISPER_HASH="${WHISPER_COMMIT_HASH}"
|
||||
echo "whisper.cpp: Using provided commit hash: ${WHISPER_HASH}"
|
||||
else
|
||||
WHISPER_BRANCH=$(get_default_branch "${WHISPER_REPO}")
|
||||
WHISPER_HASH=$(get_latest_commit "${WHISPER_REPO}" "${WHISPER_BRANCH}")
|
||||
if [[ -z "${WHISPER_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for whisper.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "whisper.cpp: Auto-detected latest commit (${WHISPER_BRANCH}): ${WHISPER_HASH}"
|
||||
fi
|
||||
|
||||
if [[ -n "${SD_COMMIT_HASH:-}" ]]; then
|
||||
SD_HASH="${SD_COMMIT_HASH}"
|
||||
echo "stable-diffusion.cpp: Using provided version: ${SD_HASH}"
|
||||
elif [[ "$BACKEND" == "vulkan" ]]; then
|
||||
SD_HASH=$(get_latest_release_tag "leejet/stable-diffusion.cpp")
|
||||
if [[ -z "${SD_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest release tag for stable-diffusion.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "stable-diffusion.cpp: Auto-detected latest release tag: ${SD_HASH}"
|
||||
else
|
||||
SD_BRANCH=$(get_default_branch "${SD_REPO}")
|
||||
SD_HASH=$(get_latest_commit "${SD_REPO}" "${SD_BRANCH}")
|
||||
if [[ -z "${SD_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "stable-diffusion.cpp: Auto-detected latest commit (${SD_BRANCH}): ${SD_HASH}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Starting Docker build..."
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Build the Docker image with commit hashes as build args
|
||||
# Build context is the repository root (..) so the Dockerfile can access Go source
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
BUILD_ARGS=(
|
||||
--build-arg "BACKEND=${BACKEND}"
|
||||
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
|
||||
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
|
||||
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
||||
-t "${DOCKER_IMAGE_TAG}"
|
||||
-f "${SCRIPT_DIR}/${DOCKERFILE}"
|
||||
)
|
||||
|
||||
if [[ "$NO_CACHE" == true ]]; then
|
||||
BUILD_ARGS+=(--no-cache)
|
||||
echo "Note: Building without cache"
|
||||
fi
|
||||
|
||||
# Use docker buildx with a custom builder for parallelism control
|
||||
# The legacy DOCKER_BUILDKIT=1 docker build doesn't respect BUILDKIT_MAX_PARALLELISM env var
|
||||
# We need to use a custom builder with a buildkitd.toml config file
|
||||
BUILDER_NAME="llama-swap-builder"
|
||||
|
||||
# Check if our custom builder exists with the right config, create/update if needed
|
||||
if ! docker buildx inspect "$BUILDER_NAME" >/dev/null 2>&1; then
|
||||
echo "Creating custom buildx builder with max-parallelism=1..."
|
||||
|
||||
# Create buildkitd.toml config file
|
||||
cat > buildkitd.toml << 'BUILDKIT_EOF'
|
||||
[worker.oci]
|
||||
max-parallelism = 1
|
||||
BUILDKIT_EOF
|
||||
|
||||
# Create the builder with the config
|
||||
docker buildx create --name "$BUILDER_NAME" \
|
||||
--driver docker-container \
|
||||
--buildkitd-config buildkitd.toml \
|
||||
--use
|
||||
else
|
||||
# Switch to our builder
|
||||
docker buildx use "$BUILDER_NAME"
|
||||
fi
|
||||
|
||||
echo "Building with sequential stages (one at a time), each using all CPU cores..."
|
||||
echo "Using builder: $BUILDER_NAME"
|
||||
|
||||
# Use docker buildx build with --load to load the image into Docker
|
||||
# The --builder flag ensures we use our custom builder with max-parallelism=1
|
||||
# Build context is the repository root so we can access Go source files
|
||||
docker buildx build --builder "$BUILDER_NAME" --load "${BUILD_ARGS[@]}" "${REPO_ROOT}"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Verifying build artifacts..."
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Verify all expected binaries exist in the image
|
||||
MISSING_BINARIES=()
|
||||
|
||||
for binary in llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap; do
|
||||
if ! docker run --rm "${DOCKER_IMAGE_TAG}" which "${binary}" >/dev/null 2>&1; then
|
||||
MISSING_BINARIES+=("${binary}")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
|
||||
echo "ERROR: Build succeeded but the following binaries are missing from the image:"
|
||||
for binary in "${MISSING_BINARIES[@]}"; do
|
||||
echo " - ${binary}"
|
||||
done
|
||||
echo ""
|
||||
echo "This usually indicates a build stage failure. Try running with --no-cache flag:"
|
||||
echo " ./build-image.sh --vulkan --no-cache"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All expected binaries verified: llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Build complete!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Image tag: ${DOCKER_IMAGE_TAG}"
|
||||
echo ""
|
||||
echo "Built with:"
|
||||
echo " llama.cpp: ${LLAMA_HASH}"
|
||||
echo " whisper.cpp: ${WHISPER_HASH}"
|
||||
echo " stable-diffusion.cpp: ${SD_HASH}"
|
||||
echo " llama-swap: $(docker run --rm "${DOCKER_IMAGE_TAG}" cat /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
||||
echo ""
|
||||
if [[ "$BACKEND" == "vulkan" ]]; then
|
||||
echo "Run with:"
|
||||
echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
|
||||
echo ""
|
||||
echo "Note: For AMD GPUs, you may also need to mount render devices:"
|
||||
echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
|
||||
else
|
||||
echo "Run with:"
|
||||
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
|
||||
fi
|
||||
@@ -0,0 +1,203 @@
|
||||
# Unified multi-stage Dockerfile for AI inference tools
|
||||
# Supports CUDA and Vulkan backends via BACKEND build arg
|
||||
#
|
||||
# Usage:
|
||||
# docker buildx build --build-arg BACKEND=cuda -t llama-swap:unified-cuda .
|
||||
# docker buildx build --build-arg BACKEND=vulkan -t llama-swap:unified-vulkan .
|
||||
# docker buildx build --build-arg BACKEND=cuda --build-arg CMAKE_CUDA_ARCHITECTURES="86;89" -t llama-swap:unified-cuda .
|
||||
#
|
||||
# Each project has its own install script that handles cloning, building,
|
||||
# and installing binaries. Build stages are independent for cache efficiency.
|
||||
|
||||
ARG BACKEND=cuda
|
||||
|
||||
# ── Builder bases ──────────────────────────────────────────────────────
|
||||
|
||||
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS builder-base-cuda
|
||||
|
||||
ARG CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89"
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}
|
||||
ENV CCACHE_DIR=/ccache
|
||||
ENV CCACHE_MAXSIZE=2G
|
||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential cmake git python3 python3-pip libssl-dev \
|
||||
curl ca-certificates ccache make wget \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# ──
|
||||
|
||||
FROM ubuntu:24.04 AS builder-base-vulkan
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CCACHE_DIR=/ccache
|
||||
ENV CCACHE_MAXSIZE=2G
|
||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential cmake git python3 python3-pip libssl-dev \
|
||||
curl ca-certificates ccache make wget software-properties-common \
|
||||
libvulkan-dev glslang-tools spirv-tools vulkan-validationlayers glslc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# ── Select builder base by BACKEND ────────────────────────────────────
|
||||
|
||||
FROM builder-base-${BACKEND} AS builder-base
|
||||
|
||||
# ── Build whisper.cpp (fastest build, run first) ──────────────────────
|
||||
|
||||
FROM builder-base AS whisper-build
|
||||
ARG BACKEND=cuda
|
||||
ARG WHISPER_COMMIT_HASH=master
|
||||
COPY install-whisper.sh /build/
|
||||
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
||||
--mount=type=cache,id=whisper-${BACKEND},target=/src/whisper.cpp/build \
|
||||
BACKEND=${BACKEND} bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}"
|
||||
|
||||
# ── Build stable-diffusion.cpp ────────────────────────────────────────
|
||||
|
||||
FROM builder-base AS sd-build
|
||||
ARG BACKEND=cuda
|
||||
ARG SD_COMMIT_HASH=master
|
||||
COPY install-sd.sh /build/
|
||||
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
||||
--mount=type=cache,id=sd-${BACKEND},target=/src/stable-diffusion.cpp/build \
|
||||
BACKEND=${BACKEND} bash /build/install-sd.sh "${SD_COMMIT_HASH}"
|
||||
|
||||
# ── Build llama.cpp (slowest build, run last) ─────────────────────────
|
||||
|
||||
FROM builder-base AS llama-build
|
||||
ARG BACKEND=cuda
|
||||
ARG LLAMA_COMMIT_HASH=master
|
||||
COPY install-llama.sh /build/
|
||||
RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \
|
||||
--mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \
|
||||
BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}"
|
||||
|
||||
# ── Build ik_llama.cpp (CUDA only) ────────────────────────────────────
|
||||
#
|
||||
# Two named stages allow ARG BACKEND to select at build time:
|
||||
# - ik-llama-cuda : real build (from builder-base-cuda)
|
||||
# - ik-llama-vulkan: no-op (empty /install/bin, skips CUDA pull entirely)
|
||||
# BuildKit only evaluates the selected branch, so vulkan builds never
|
||||
# pull nvidia/cuda:*-devel or compile ik_llama.cpp.
|
||||
|
||||
FROM builder-base-vulkan AS ik-llama-vulkan
|
||||
RUN mkdir -p /install/bin
|
||||
|
||||
FROM builder-base-cuda AS ik-llama-cuda
|
||||
ARG IK_LLAMA_COMMIT_HASH=main
|
||||
COPY install-ik-llama.sh /build/
|
||||
RUN --mount=type=cache,id=ccache-cuda,target=/ccache \
|
||||
--mount=type=cache,id=ik-llama-cuda,target=/src/ik_llama.cpp/build \
|
||||
bash /build/install-ik-llama.sh "${IK_LLAMA_COMMIT_HASH}"
|
||||
|
||||
ARG BACKEND=cuda
|
||||
FROM ik-llama-${BACKEND} AS ik-llama-build
|
||||
|
||||
# ── Download llama-swap release binary ────────────────────────────────
|
||||
|
||||
FROM builder-base AS llama-swap-download
|
||||
ARG LS_VERSION=latest
|
||||
COPY install-llama-swap.sh /build/
|
||||
RUN bash /build/install-llama-swap.sh "${LS_VERSION}"
|
||||
|
||||
# ── Runtime bases ─────────────────────────────────────────────────────
|
||||
|
||||
FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
|
||||
ENV PATH="/usr/local/bin:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgomp1 python3 curl ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# CUDA stub drivers for container compatibility
|
||||
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so
|
||||
COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
|
||||
|
||||
# ──
|
||||
|
||||
FROM ubuntu:24.04 AS runtime-vulkan
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PATH="/usr/local/bin:${PATH}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgomp1 libvulkan1 mesa-vulkan-drivers \
|
||||
python3 curl ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ── Select runtime base by BACKEND ────────────────────────────────────
|
||||
|
||||
FROM runtime-${BACKEND} AS runtime
|
||||
|
||||
ARG BACKEND=cuda
|
||||
ARG LLAMA_COMMIT_HASH=unknown
|
||||
ARG WHISPER_COMMIT_HASH=unknown
|
||||
ARG SD_COMMIT_HASH=unknown
|
||||
ARG IK_LLAMA_COMMIT_HASH=unknown
|
||||
ARG RUN_UID=0
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3-numpy python3-sentencepiece \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create non-root user when RUN_UID != 0
|
||||
RUN if [ "$RUN_UID" != "0" ]; then \
|
||||
groupadd --system --gid $RUN_UID llama-swap && \
|
||||
useradd --system --uid $RUN_UID --gid $RUN_UID \
|
||||
--home /app --shell /sbin/nologin llama-swap; \
|
||||
fi && \
|
||||
mkdir -p /etc/llama-swap/config && \
|
||||
chown -R ${RUN_UID}:${RUN_UID} /etc/llama-swap
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy whisper.cpp binaries and libraries
|
||||
COPY --from=whisper-build /install/bin/whisper-server /usr/local/bin/
|
||||
COPY --from=whisper-build /install/bin/whisper-cli /usr/local/bin/
|
||||
COPY --from=whisper-build /install/lib/ /usr/local/lib/
|
||||
|
||||
# Copy stable-diffusion.cpp binaries and libraries
|
||||
COPY --from=sd-build /install/bin/sd-server /usr/local/bin/
|
||||
COPY --from=sd-build /install/bin/sd-cli /usr/local/bin/
|
||||
COPY --from=sd-build /install/lib/ /usr/local/lib/
|
||||
|
||||
# Copy llama.cpp binaries (statically linked)
|
||||
COPY --from=llama-build /install/bin/llama-server /usr/local/bin/
|
||||
COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
|
||||
|
||||
# Copy ik-llama-server (CUDA only; empty copy for vulkan)
|
||||
COPY --from=ik-llama-build /install/bin/ /usr/local/bin/
|
||||
|
||||
# Copy llama-swap binary
|
||||
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
|
||||
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
|
||||
|
||||
RUN ldconfig
|
||||
|
||||
COPY config.example.yaml /etc/llama-swap/config/config.yaml
|
||||
|
||||
# Version tracking
|
||||
RUN echo "llama.cpp: ${LLAMA_COMMIT_HASH}" > /versions.txt && \
|
||||
echo "whisper.cpp: ${WHISPER_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "stable-diffusion.cpp: ${SD_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "ik_llama.cpp: ${IK_LLAMA_COMMIT_HASH}" >> /versions.txt && \
|
||||
echo "llama-swap: $(cat /tmp/llama-swap-version)" >> /versions.txt && \
|
||||
echo "backend: ${BACKEND}" >> /versions.txt && \
|
||||
echo "build_timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /versions.txt
|
||||
|
||||
RUN mkdir -p /models && chown ${RUN_UID}:${RUN_UID} /models
|
||||
WORKDIR /models
|
||||
USER ${RUN_UID}
|
||||
ENTRYPOINT ["llama-swap"]
|
||||
CMD ["-config", "/etc/llama-swap/config/config.yaml", "-listen", "0.0.0.0:8080"]
|
||||
@@ -0,0 +1,8 @@
|
||||
# Unified Docker Container
|
||||
|
||||
These scripts create a custom llama-swap container that contains:
|
||||
|
||||
- llama-server for LLMs, rerank and embedding model support
|
||||
- sd-server (stable-diffusion.cpp) for image generation
|
||||
- whisper.cpp for ASR
|
||||
|
||||
Executable
+283
@@ -0,0 +1,283 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Build script for unified container with version pinning
|
||||
#
|
||||
# Usage:
|
||||
# ./build-image.sh --cuda # Build CUDA image
|
||||
# ./build-image.sh --vulkan # Build Vulkan image
|
||||
# ./build-image.sh --cuda --no-cache # Build without cache
|
||||
# LLAMA_REF=b1234 ./build-image.sh --vulkan # Pin llama.cpp to a commit hash
|
||||
# LLAMA_REF=v1.2.3 ./build-image.sh --cuda # Pin llama.cpp to a tag
|
||||
# WHISPER_REF=v1.0.0 ./build-image.sh --vulkan # Pin whisper.cpp to a tag
|
||||
# SD_REF=master ./build-image.sh --cuda # Pin stable-diffusion.cpp to a branch
|
||||
# LS_VERSION=170 ./build-image.sh --cuda # Override llama-swap version
|
||||
# IK_LLAMA_REF=main ./build-image.sh --cuda # Pin ik_llama.cpp to main branch (CUDA only)
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
BACKEND=""
|
||||
NO_CACHE=false
|
||||
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--cuda)
|
||||
BACKEND="cuda"
|
||||
;;
|
||||
--vulkan)
|
||||
BACKEND="vulkan"
|
||||
;;
|
||||
--no-cache)
|
||||
NO_CACHE=true
|
||||
;;
|
||||
--help|-h)
|
||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --cuda Build CUDA image (NVIDIA GPUs)"
|
||||
echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)"
|
||||
echo " --no-cache Force rebuild without using Docker cache"
|
||||
echo " --help, -h Show this help message"
|
||||
echo ""
|
||||
echo "Environment variables:"
|
||||
echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified-cuda or llama-swap:unified-vulkan)"
|
||||
echo " LLAMA_REF Pin llama.cpp to a commit, tag, or branch"
|
||||
echo " WHISPER_REF Pin whisper.cpp to a commit, tag, or branch"
|
||||
echo " SD_REF Pin stable-diffusion.cpp to a commit, tag, or branch"
|
||||
echo " IK_LLAMA_REF Pin ik_llama.cpp to a commit, tag, or branch (CUDA only)"
|
||||
echo " LS_VERSION Override llama-swap version (e.g., '170' or 'latest')"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$BACKEND" ]]; then
|
||||
echo "Error: No backend specified. Please use --cuda or --vulkan."
|
||||
echo ""
|
||||
echo "Usage: ./build-image.sh --cuda|--vulkan [--no-cache]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified-${BACKEND}}"
|
||||
|
||||
# Git repository URLs
|
||||
LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git"
|
||||
WHISPER_REPO="https://github.com/ggml-org/whisper.cpp.git"
|
||||
SD_REPO="https://github.com/leejet/stable-diffusion.cpp.git"
|
||||
LLAMA_SWAP_REPO="https://github.com/mostlygeek/llama-swap.git"
|
||||
IK_LLAMA_REPO="https://github.com/ikawrakow/ik_llama.cpp.git"
|
||||
|
||||
# Resolve a git ref (commit hash, tag, or branch) to a full commit hash.
|
||||
# Requires only: git, network access to the remote.
|
||||
resolve_ref() {
|
||||
local repo_url="$1"
|
||||
local ref="$2"
|
||||
|
||||
# Full 40-char SHA — use as-is
|
||||
if [[ "${ref}" =~ ^[0-9a-f]{40}$ ]]; then
|
||||
echo "${ref}"
|
||||
return
|
||||
fi
|
||||
|
||||
# Try tag then branch (exact match)
|
||||
local hash
|
||||
hash=$(git ls-remote "${repo_url}" "refs/tags/${ref}" "refs/heads/${ref}" 2>/dev/null | head -1 | cut -f1)
|
||||
if [[ -n "${hash}" ]]; then
|
||||
echo "${hash}"
|
||||
return
|
||||
fi
|
||||
|
||||
# Short hash (7+ chars): scan all refs for a SHA with this prefix
|
||||
if [[ "${ref}" =~ ^[0-9a-f]{7,}$ ]]; then
|
||||
hash=$(git ls-remote "${repo_url}" 2>/dev/null | grep "^${ref}" | head -1 | cut -f1)
|
||||
if [[ -n "${hash}" ]]; then
|
||||
echo "${hash}"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "ERROR: Could not resolve ref '${ref}' for ${repo_url}" >&2
|
||||
if [[ "${ref}" =~ ^[0-9a-f]+$ && ${#ref} -lt 7 ]]; then
|
||||
echo " Short hashes must be at least 7 characters (got ${#ref})." >&2
|
||||
else
|
||||
echo " Tried: tag, branch, git ls-remote prefix match" >&2
|
||||
fi
|
||||
echo " Use a full 40-char SHA, a tag name, a branch name, or a 7+ char short hash." >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Resolve HEAD of a repo without needing to know the default branch name.
|
||||
get_latest_hash() {
|
||||
git ls-remote "${1}" HEAD 2>/dev/null | head -1 | cut -f1
|
||||
}
|
||||
|
||||
echo "=========================================="
|
||||
echo "llama-swap Unified Build (${BACKEND})"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Resolve llama.cpp ref
|
||||
if [[ -n "${LLAMA_REF:-}" ]]; then
|
||||
LLAMA_HASH=$(resolve_ref "${LLAMA_REPO}" "${LLAMA_REF}") || exit 1
|
||||
echo "llama.cpp: ${LLAMA_REF} -> ${LLAMA_HASH}"
|
||||
else
|
||||
LLAMA_HASH=$(get_latest_hash "${LLAMA_REPO}")
|
||||
if [[ -z "${LLAMA_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for llama.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "llama.cpp: latest HEAD: ${LLAMA_HASH}"
|
||||
fi
|
||||
|
||||
# Resolve whisper.cpp ref
|
||||
if [[ -n "${WHISPER_REF:-}" ]]; then
|
||||
WHISPER_HASH=$(resolve_ref "${WHISPER_REPO}" "${WHISPER_REF}") || exit 1
|
||||
echo "whisper.cpp: ${WHISPER_REF} -> ${WHISPER_HASH}"
|
||||
else
|
||||
WHISPER_HASH=$(get_latest_hash "${WHISPER_REPO}")
|
||||
if [[ -z "${WHISPER_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for whisper.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "whisper.cpp: latest HEAD: ${WHISPER_HASH}"
|
||||
fi
|
||||
|
||||
# Resolve stable-diffusion.cpp ref
|
||||
if [[ -n "${SD_REF:-}" ]]; then
|
||||
SD_HASH=$(resolve_ref "${SD_REPO}" "${SD_REF}") || exit 1
|
||||
echo "stable-diffusion.cpp: ${SD_REF} -> ${SD_HASH}"
|
||||
else
|
||||
SD_HASH=$(get_latest_hash "${SD_REPO}")
|
||||
if [[ -z "${SD_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for stable-diffusion.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "stable-diffusion.cpp: latest HEAD: ${SD_HASH}"
|
||||
fi
|
||||
|
||||
# Resolve ik_llama.cpp ref (CUDA only)
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
if [[ -n "${IK_LLAMA_REF:-}" ]]; then
|
||||
IK_LLAMA_HASH=$(resolve_ref "${IK_LLAMA_REPO}" "${IK_LLAMA_REF}") || exit 1
|
||||
echo "ik_llama.cpp: ${IK_LLAMA_REF} -> ${IK_LLAMA_HASH}"
|
||||
else
|
||||
IK_LLAMA_HASH=$(get_latest_hash "${IK_LLAMA_REPO}")
|
||||
if [[ -z "${IK_LLAMA_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for ik_llama.cpp" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "ik_llama.cpp: latest HEAD: ${IK_LLAMA_HASH}"
|
||||
fi
|
||||
else
|
||||
IK_LLAMA_HASH="n/a"
|
||||
echo "ik_llama.cpp: skipped (vulkan build)"
|
||||
fi
|
||||
|
||||
# Resolve llama-swap ref
|
||||
if [[ -n "${LS_VERSION:-}" ]]; then
|
||||
LS_HASH=$(resolve_ref "${LLAMA_SWAP_REPO}" "${LS_VERSION}") || exit 1
|
||||
echo "llama-swap: ${LS_VERSION} -> ${LS_HASH}"
|
||||
else
|
||||
LS_HASH=$(get_latest_hash "${LLAMA_SWAP_REPO}")
|
||||
if [[ -z "${LS_HASH}" ]]; then
|
||||
echo "ERROR: Could not determine latest commit for llama-swap" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "llama-swap: latest HEAD: ${LS_HASH}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Starting Docker build..."
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
BUILD_ARGS=(
|
||||
--build-arg "BACKEND=${BACKEND}"
|
||||
--build-arg "LLAMA_COMMIT_HASH=${LLAMA_HASH}"
|
||||
--build-arg "WHISPER_COMMIT_HASH=${WHISPER_HASH}"
|
||||
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
||||
--build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}"
|
||||
--build-arg "LS_VERSION=${LS_HASH}"
|
||||
--build-arg "RUN_UID=${RUN_UID:-0}"
|
||||
-t "${DOCKER_IMAGE_TAG}"
|
||||
-f "${SCRIPT_DIR}/Dockerfile"
|
||||
)
|
||||
|
||||
if [[ "$NO_CACHE" == true ]]; then
|
||||
BUILD_ARGS+=(--no-cache)
|
||||
echo "Note: Building without cache"
|
||||
elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then
|
||||
CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-${BACKEND}-cache"
|
||||
BUILD_ARGS+=(
|
||||
--cache-from "type=registry,ref=${CACHE_REF}"
|
||||
--cache-to "type=registry,ref=${CACHE_REF},mode=max"
|
||||
)
|
||||
echo "Note: Using registry cache (${CACHE_REF})"
|
||||
fi
|
||||
|
||||
DOCKER_BUILDKIT=1 docker buildx build --load "${BUILD_ARGS[@]}" "${SCRIPT_DIR}"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Verifying build artifacts..."
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
EXPECTED_BINARIES=(llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap)
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
EXPECTED_BINARIES+=(ik-llama-server)
|
||||
fi
|
||||
|
||||
MISSING_BINARIES=()
|
||||
for binary in "${EXPECTED_BINARIES[@]}"; do
|
||||
if ! docker run --rm --entrypoint which "${DOCKER_IMAGE_TAG}" "${binary}" >/dev/null 2>&1; then
|
||||
MISSING_BINARIES+=("${binary}")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then
|
||||
echo "ERROR: Build succeeded but the following binaries are missing:"
|
||||
for binary in "${MISSING_BINARIES[@]}"; do
|
||||
echo " - ${binary}"
|
||||
done
|
||||
echo ""
|
||||
echo "Try running with --no-cache flag:"
|
||||
echo " ./build-image.sh --${BACKEND} --no-cache"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
VERIFIED_LIST="llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap"
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
VERIFIED_LIST="${VERIFIED_LIST}, ik-llama-server"
|
||||
fi
|
||||
echo "All expected binaries verified: ${VERIFIED_LIST}"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Build complete!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Image tag: ${DOCKER_IMAGE_TAG}"
|
||||
echo ""
|
||||
echo "Built with:"
|
||||
echo " llama.cpp: ${LLAMA_HASH}"
|
||||
echo " whisper.cpp: ${WHISPER_HASH}"
|
||||
echo " stable-diffusion.cpp: ${SD_HASH}"
|
||||
if [[ "$BACKEND" == "cuda" ]]; then
|
||||
echo " ik_llama.cpp: ${IK_LLAMA_HASH}"
|
||||
fi
|
||||
echo " llama-swap: $(docker run --rm --entrypoint cat "${DOCKER_IMAGE_TAG}" /versions.txt | grep llama-swap | cut -d' ' -f2-)"
|
||||
echo ""
|
||||
if [[ "$BACKEND" == "vulkan" ]]; then
|
||||
echo "Run with:"
|
||||
echo " docker run -it --rm --device /dev/dri:/dev/dri ${DOCKER_IMAGE_TAG}"
|
||||
echo ""
|
||||
echo "Note: For AMD GPUs, you may also need:"
|
||||
echo " docker run -it --rm --device /dev/dri:/dev/dri --group-add video ${DOCKER_IMAGE_TAG}"
|
||||
else
|
||||
echo "Run with:"
|
||||
echo " docker run -it --rm --gpus all ${DOCKER_IMAGE_TAG}"
|
||||
fi
|
||||
@@ -0,0 +1,33 @@
|
||||
# placeholder example configuration
|
||||
healthCheckTimeout: 300
|
||||
logRequests: true
|
||||
|
||||
models:
|
||||
"llama":
|
||||
cmd: >
|
||||
llama-server
|
||||
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
||||
--port ${PORT}
|
||||
|
||||
"whisper":
|
||||
checkEndpoint: /v1/audio/transcriptions/
|
||||
cmd: >
|
||||
whisper-server
|
||||
--port ${PORT}
|
||||
--m /models/whisper.bin
|
||||
--flash-attn
|
||||
--request-path /v1/audio/transcriptions --inference-path ""
|
||||
|
||||
"image":
|
||||
checkEndpoint: /
|
||||
cmd: |
|
||||
/app/sd-server
|
||||
--listen-port 9999
|
||||
--diffusion-fa
|
||||
--diffusion-model /models/z_image_turbo-Q8_0.gguf
|
||||
--vae /models/ae.safetensors
|
||||
--llm /models/qwen3-4b-instruct-2507-q8_0.gguf
|
||||
--offload-to-cpu
|
||||
--cfg-scale 1.0
|
||||
--height 512 --width 512
|
||||
--steps 8
|
||||
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
# Install ik_llama.cpp - clone, build, and install binaries
|
||||
# Usage: ./install-ik-llama.sh <commit_hash>
|
||||
# Note: CUDA only; always built against builder-base-cuda
|
||||
set -e
|
||||
|
||||
COMMIT_HASH="${1:-main}"
|
||||
|
||||
mkdir -p /install/bin
|
||||
|
||||
# Clone and checkout (init-based so cache-mounted build dir doesn't break clone)
|
||||
echo "=== Cloning ik_llama.cpp at ${COMMIT_HASH} ==="
|
||||
mkdir -p /src/ik_llama.cpp
|
||||
cd /src/ik_llama.cpp
|
||||
if [ ! -d .git ]; then
|
||||
git init
|
||||
git remote add origin https://github.com/ikawrakow/ik_llama.cpp.git
|
||||
fi
|
||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
CMAKE_FLAGS=(
|
||||
-DGGML_NATIVE=OFF
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DGGML_CUDA=ON
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda -Wl,--allow-shlib-undefined"
|
||||
)
|
||||
|
||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
||||
|
||||
echo "=== Building ik_llama.cpp ==="
|
||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
||||
cmake --build build --config Release -j"$(nproc)" --target llama-server
|
||||
|
||||
if [ ! -f "build/bin/llama-server" ]; then
|
||||
echo "FATAL: llama-server not found in build/bin/" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Install as ik-llama-server to avoid collision with llama.cpp's llama-server
|
||||
cp "build/bin/llama-server" "/install/bin/ik-llama-server"
|
||||
echo "=== ik_llama.cpp build complete ==="
|
||||
ls -la /install/bin/
|
||||
Executable
+59
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
# Install llama-swap - download latest release binary from GitHub
|
||||
# Usage: ./install-llama-swap.sh [version]
|
||||
# version: release version number (e.g., "170") or "latest" (default)
|
||||
set -e
|
||||
|
||||
VERSION="${1:-latest}"
|
||||
REPO="mostlygeek/llama-swap"
|
||||
|
||||
mkdir -p /install/bin
|
||||
|
||||
# If a full commit hash is given, find the release tag that points to it
|
||||
if echo "${VERSION}" | grep -qE '^[0-9a-f]{40}$'; then
|
||||
echo "=== Resolving commit ${VERSION:0:7} to release tag ==="
|
||||
TAG=$(git ls-remote --tags "https://github.com/${REPO}.git" 2>/dev/null \
|
||||
| grep "^${VERSION}" | sed 's|.*refs/tags/||' | grep -v '\^{}' | head -1)
|
||||
if [ -n "${TAG}" ]; then
|
||||
echo "Resolved to tag: ${TAG}"
|
||||
VERSION="${TAG#v}"
|
||||
else
|
||||
echo "No release tag found for commit ${VERSION:0:7}, using latest"
|
||||
VERSION="latest"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Strip leading 'v' prefix so both "198" and "v198" work
|
||||
VERSION="${VERSION#v}"
|
||||
|
||||
# Resolve "latest" to actual version number
|
||||
if [ "$VERSION" = "latest" ]; then
|
||||
echo "=== Resolving latest llama-swap release ==="
|
||||
VERSION=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" \
|
||||
| grep '"tag_name"' | head -1 | cut -d'"' -f4 | sed 's/^v//')
|
||||
if [ -z "$VERSION" ]; then
|
||||
echo "FATAL: Could not determine latest release version" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Latest version: ${VERSION}"
|
||||
fi
|
||||
|
||||
# Download and extract
|
||||
URL="https://github.com/${REPO}/releases/download/v${VERSION}/llama-swap_${VERSION}_linux_amd64.tar.gz"
|
||||
echo "=== Downloading llama-swap v${VERSION} ==="
|
||||
echo "URL: $URL"
|
||||
curl -fSL -o /tmp/llama-swap.tar.gz "$URL"
|
||||
tar -xzf /tmp/llama-swap.tar.gz -C /install/bin/
|
||||
rm /tmp/llama-swap.tar.gz
|
||||
|
||||
# Validate
|
||||
if [ ! -x "/install/bin/llama-swap" ]; then
|
||||
echo "FATAL: llama-swap binary not found or not executable" >&2
|
||||
ls -la /install/bin/ >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$VERSION" > /install/llama-swap-version
|
||||
|
||||
echo "=== llama-swap v${VERSION} installed ==="
|
||||
ls -la /install/bin/llama-swap
|
||||
Executable
+63
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
# Install llama.cpp - clone, build, and install binaries
|
||||
# Usage: BACKEND=cuda|vulkan ./install-llama.sh <commit_hash>
|
||||
set -e
|
||||
|
||||
COMMIT_HASH="${1:-master}"
|
||||
BACKEND="${BACKEND:-cuda}"
|
||||
|
||||
mkdir -p /install/bin
|
||||
|
||||
# Clone and checkout (init-based so cache-mounted /src/llama.cpp/build dir doesn't break clone)
|
||||
echo "=== Cloning llama.cpp at ${COMMIT_HASH} ==="
|
||||
mkdir -p /src/llama.cpp
|
||||
cd /src/llama.cpp
|
||||
if [ ! -d .git ]; then
|
||||
git init
|
||||
git remote add origin https://github.com/ggml-org/llama.cpp.git
|
||||
fi
|
||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
# Common cmake flags
|
||||
CMAKE_FLAGS=(
|
||||
-DGGML_NATIVE=OFF
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DLLAMA_BUILD_TESTS=OFF
|
||||
)
|
||||
|
||||
if [ "$BACKEND" = "cuda" ]; then
|
||||
CMAKE_FLAGS+=(
|
||||
-DGGML_CUDA=ON
|
||||
-DGGML_VULKAN=OFF
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
||||
)
|
||||
elif [ "$BACKEND" = "vulkan" ]; then
|
||||
CMAKE_FLAGS+=(
|
||||
-DGGML_CUDA=OFF
|
||||
-DGGML_VULKAN=ON
|
||||
)
|
||||
fi
|
||||
|
||||
TARGETS=(llama-cli llama-server)
|
||||
|
||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
||||
|
||||
echo "=== Building llama.cpp for ${BACKEND} ==="
|
||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
||||
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
|
||||
|
||||
for bin in "${TARGETS[@]}"; do
|
||||
if [ ! -f "build/bin/$bin" ]; then
|
||||
echo "FATAL: $bin not found in build/bin/" >&2
|
||||
exit 1
|
||||
fi
|
||||
cp "build/bin/$bin" "/install/bin/"
|
||||
done
|
||||
echo "=== llama.cpp build complete ==="
|
||||
ls -la /install/bin/
|
||||
Executable
+68
@@ -0,0 +1,68 @@
|
||||
#!/bin/bash
|
||||
# Install stable-diffusion.cpp - clone, build, and install binaries and library
|
||||
# Usage: BACKEND=cuda|vulkan ./install-sd.sh <commit_hash>
|
||||
set -e
|
||||
|
||||
COMMIT_HASH="${1:-master}"
|
||||
BACKEND="${BACKEND:-cuda}"
|
||||
|
||||
mkdir -p /install/bin /install/lib
|
||||
|
||||
# Clone and checkout (init-based so cache-mounted /src/stable-diffusion.cpp/build dir doesn't break clone)
|
||||
echo "=== Cloning stable-diffusion.cpp at ${COMMIT_HASH} ==="
|
||||
mkdir -p /src/stable-diffusion.cpp
|
||||
cd /src/stable-diffusion.cpp
|
||||
if [ ! -d .git ]; then
|
||||
git init
|
||||
git remote add origin https://github.com/leejet/stable-diffusion.cpp.git
|
||||
fi
|
||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
||||
git checkout FETCH_HEAD
|
||||
git submodule update --init --recursive --depth=1
|
||||
|
||||
# Common cmake flags
|
||||
CMAKE_FLAGS=(
|
||||
-DGGML_NATIVE=OFF
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
-DSD_BUILD_EXAMPLES=ON
|
||||
)
|
||||
|
||||
if [ "$BACKEND" = "cuda" ]; then
|
||||
CMAKE_FLAGS+=(
|
||||
-DGGML_CUDA=ON
|
||||
-DGGML_VULKAN=OFF
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
||||
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
||||
-DSD_CUDA=ON
|
||||
)
|
||||
elif [ "$BACKEND" = "vulkan" ]; then
|
||||
CMAKE_FLAGS+=(
|
||||
-DGGML_CUDA=OFF
|
||||
-DGGML_VULKAN=ON
|
||||
-DSD_VULKAN=ON
|
||||
)
|
||||
fi
|
||||
|
||||
TARGETS=(stable-diffusion sd-cli sd-server)
|
||||
|
||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
||||
|
||||
echo "=== Building stable-diffusion.cpp for ${BACKEND} ==="
|
||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
||||
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
|
||||
|
||||
for bin in sd-cli sd-server; do
|
||||
if [ ! -f "build/bin/$bin" ]; then
|
||||
echo "FATAL: $bin not found in build/bin/" >&2
|
||||
exit 1
|
||||
fi
|
||||
cp "build/bin/$bin" "/install/bin/"
|
||||
done
|
||||
find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
|
||||
|
||||
echo "=== stable-diffusion.cpp build complete ==="
|
||||
ls -la /install/bin/ /install/lib/
|
||||
Executable
+64
@@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
# Install whisper.cpp - clone, build, and install binaries
|
||||
# Usage: BACKEND=cuda|vulkan ./install-whisper.sh <commit_hash>
|
||||
set -e
|
||||
|
||||
COMMIT_HASH="${1:-master}"
|
||||
BACKEND="${BACKEND:-cuda}"
|
||||
|
||||
mkdir -p /install/bin /install/lib
|
||||
|
||||
# Clone and checkout (init-based so cache-mounted /src/whisper.cpp/build dir doesn't break clone)
|
||||
echo "=== Cloning whisper.cpp at ${COMMIT_HASH} ==="
|
||||
mkdir -p /src/whisper.cpp
|
||||
cd /src/whisper.cpp
|
||||
if [ ! -d .git ]; then
|
||||
git init
|
||||
git remote add origin https://github.com/ggml-org/whisper.cpp.git
|
||||
fi
|
||||
git fetch --depth=1 origin "${COMMIT_HASH}"
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
# Common cmake flags
|
||||
CMAKE_FLAGS=(
|
||||
-DGGML_NATIVE=OFF
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
)
|
||||
|
||||
if [ "$BACKEND" = "cuda" ]; then
|
||||
CMAKE_FLAGS+=(
|
||||
-DGGML_CUDA=ON
|
||||
-DGGML_VULKAN=OFF
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES:?CMAKE_CUDA_ARCHITECTURES must be set}"
|
||||
"-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler"
|
||||
"-DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
||||
"-DCMAKE_SHARED_LINKER_FLAGS=-Wl,-rpath-link,/usr/local/cuda/lib64/stubs -lcuda"
|
||||
)
|
||||
elif [ "$BACKEND" = "vulkan" ]; then
|
||||
CMAKE_FLAGS+=(
|
||||
-DGGML_CUDA=OFF
|
||||
-DGGML_VULKAN=ON
|
||||
)
|
||||
fi
|
||||
|
||||
TARGETS=(whisper-cli whisper-server)
|
||||
|
||||
rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true
|
||||
|
||||
echo "=== Building whisper.cpp for ${BACKEND} ==="
|
||||
cmake -B build "${CMAKE_FLAGS[@]}"
|
||||
cmake --build build --config Release -j"$(nproc)" --target "${TARGETS[@]}"
|
||||
|
||||
for bin in "${TARGETS[@]}"; do
|
||||
if [ ! -f "build/bin/$bin" ]; then
|
||||
echo "FATAL: $bin not found in build/bin/" >&2
|
||||
exit 1
|
||||
fi
|
||||
cp "build/bin/$bin" "/install/bin/"
|
||||
done
|
||||
find build -name "*.so*" -type f -exec cp {} /install/lib/ \;
|
||||
|
||||
echo "=== whisper.cpp build complete ==="
|
||||
ls -la /install/bin/
|
||||
@@ -319,6 +319,29 @@ models:
|
||||
# - recommended to be omitted and the default used
|
||||
concurrencyLimit: 0
|
||||
|
||||
# timeouts: configure proxy connection timeouts for this model
|
||||
# - optional, defaults shown below
|
||||
# - useful for models on slower hardware that need longer timeouts
|
||||
# - increase responseHeader to avoid "timeout awaiting response headers" errors
|
||||
# - set any value to 0 to disable that timeout (not recommended)
|
||||
timeouts:
|
||||
# connect: TCP connection timeout in seconds
|
||||
# - default: 30
|
||||
connect: 30
|
||||
|
||||
# responseHeader: time to wait for response headers in seconds
|
||||
# - default: 60
|
||||
# - for slow image generation or large models, consider increasing to 300+ seconds
|
||||
responseHeader: 60
|
||||
|
||||
# tlsHandshake: TLS handshake timeout in seconds
|
||||
# - default: 10
|
||||
tlsHandshake: 10
|
||||
|
||||
# idleConn: idle connection timeout in seconds
|
||||
# - default: 90
|
||||
idleConn: 90
|
||||
|
||||
# sendLoadingState: overrides the global sendLoadingState setting for this model
|
||||
# - optional, default: undefined (use global setting)
|
||||
sendLoadingState: false
|
||||
@@ -444,6 +467,17 @@ peers:
|
||||
# - required
|
||||
# - requested path to llama-swap will be appended to the end of the proxy value
|
||||
proxy: http://192.168.1.23
|
||||
|
||||
# timeouts: configure proxy connection timeouts for this peer
|
||||
# - optional, defaults shown below
|
||||
# - useful when the peer runs on slower hardware
|
||||
# - set any value to 0 to disable that timeout (not recommended)
|
||||
timeouts:
|
||||
connect: 30
|
||||
responseHeader: 60
|
||||
tlsHandshake: 10
|
||||
idleConn: 90
|
||||
|
||||
# models: a list of models served by the peer
|
||||
# - required
|
||||
models:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
module github.com/mostlygeek/llama-swap
|
||||
|
||||
go 1.25.4
|
||||
go 1.26.1
|
||||
|
||||
require (
|
||||
github.com/billziss-gh/golib v0.2.0
|
||||
|
||||
@@ -124,6 +124,7 @@ type Config struct {
|
||||
LogToStdout string `yaml:"logToStdout"`
|
||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||
CaptureBuffer int `yaml:"captureBuffer"`
|
||||
GlobalTTL int `yaml:"globalTTL"`
|
||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||
Profiles map[string][]string `yaml:"profiles"`
|
||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||
@@ -203,6 +204,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
LogToStdout: LogToStdoutProxy,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
GlobalTTL: 0,
|
||||
}
|
||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||
return Config{}, err
|
||||
@@ -216,6 +218,10 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||
}
|
||||
|
||||
if config.GlobalTTL < 0 {
|
||||
return Config{}, fmt.Errorf("globalTTL must be >= 0")
|
||||
}
|
||||
|
||||
switch config.LogToStdout {
|
||||
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||
default:
|
||||
@@ -255,6 +261,15 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||
|
||||
// set model TTL to globalTTL it is the default value
|
||||
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
|
||||
modelConfig.UnloadAfter = config.GlobalTTL
|
||||
}
|
||||
|
||||
if modelConfig.UnloadAfter < 0 {
|
||||
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
|
||||
}
|
||||
|
||||
// Validate model macros
|
||||
for _, macro := range modelConfig.Macros {
|
||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||
@@ -293,6 +308,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
// Substitute macros in SetParamsByID keys and values
|
||||
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
||||
@@ -336,6 +353,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||
@@ -355,6 +374,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
"proxy": modelConfig.Proxy,
|
||||
"checkEndpoint": modelConfig.CheckEndpoint,
|
||||
"filters.stripParams": modelConfig.Filters.StripParams,
|
||||
"name": modelConfig.Name,
|
||||
"description": modelConfig.Description,
|
||||
}
|
||||
|
||||
for fieldName, fieldValue := range fieldMap {
|
||||
|
||||
@@ -187,6 +187,13 @@ groups:
|
||||
Name: "Model 1",
|
||||
Description: "This is model 1",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
"model2": {
|
||||
Cmd: "path/to/server --arg1 one",
|
||||
@@ -195,6 +202,13 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
"model3": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -203,6 +217,13 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
"model4": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -211,6 +232,13 @@ groups:
|
||||
Aliases: []string{},
|
||||
Env: []string{},
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
},
|
||||
HealthCheckTimeout: 15,
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestConfig_GroupMemberIsUnique(t *testing.T) {
|
||||
@@ -848,6 +849,71 @@ func TestConfig_APIKeys_EnvMacros(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestConfig_GlobalTTL(t *testing.T) {
|
||||
t.Run("globalTTL sets default for models", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: 300
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 300, config.GlobalTTL)
|
||||
assert.Equal(t, 300, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("model ttl=0 overrides globalTTL", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: 300
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
ttl: 0
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("model explicit ttl overrides globalTTL", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: 300
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
ttl: 600
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 600, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("globalTTL defaults to 0", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, config.GlobalTTL)
|
||||
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("negative globalTTL rejected", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: -1
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "globalTTL must be >= 0")
|
||||
})
|
||||
}
|
||||
|
||||
func TestConfig_EnvMacros(t *testing.T) {
|
||||
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
||||
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
||||
@@ -1373,3 +1439,108 @@ models:
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestConfig_TimeoutsParsing(t *testing.T) {
|
||||
configYaml := `
|
||||
models:
|
||||
model1:
|
||||
cmd: test-server --port ${PORT}
|
||||
timeouts:
|
||||
connect: 45
|
||||
responseHeader: 120
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||
require.NoError(t, err)
|
||||
|
||||
modelConfig, found := config.Models["model1"]
|
||||
require.True(t, found, "model1 should exist in config")
|
||||
|
||||
assert.Equal(t, 45, modelConfig.Timeouts.Connect)
|
||||
assert.Equal(t, 120, modelConfig.Timeouts.ResponseHeader)
|
||||
}
|
||||
|
||||
func TestConfig_TimeoutsDefaults(t *testing.T) {
|
||||
configYaml := `
|
||||
models:
|
||||
model1:
|
||||
cmd: test-server --port ${PORT}
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||
require.NoError(t, err)
|
||||
|
||||
modelConfig, found := config.Models["model1"]
|
||||
require.True(t, found, "model1 should exist in config")
|
||||
|
||||
// Default values should be set during unmarshaling
|
||||
assert.Equal(t, 30, modelConfig.Timeouts.Connect)
|
||||
assert.Equal(t, 60, modelConfig.Timeouts.ResponseHeader)
|
||||
assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
|
||||
assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
|
||||
assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
|
||||
}
|
||||
|
||||
func TestConfig_TimeoutsZeroAllowed(t *testing.T) {
|
||||
configYaml := `
|
||||
models:
|
||||
model1:
|
||||
cmd: test-server --port ${PORT}
|
||||
timeouts:
|
||||
connect: 0
|
||||
responseHeader: 0
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||
require.NoError(t, err)
|
||||
|
||||
modelConfig, found := config.Models["model1"]
|
||||
require.True(t, found, "model1 should exist in config")
|
||||
|
||||
// Explicit 0 should be preserved (disables timeout)
|
||||
assert.Equal(t, 0, modelConfig.Timeouts.Connect)
|
||||
assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
|
||||
}
|
||||
|
||||
func TestConfig_PeerTimeoutsParsing(t *testing.T) {
|
||||
configYaml := `
|
||||
peers:
|
||||
peer1:
|
||||
proxy: http://example.com
|
||||
models: [model1]
|
||||
timeouts:
|
||||
connect: 45
|
||||
responseHeader: 120
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||
require.NoError(t, err)
|
||||
|
||||
peerConfig, found := config.Peers["peer1"]
|
||||
require.True(t, found, "peer1 should exist in config")
|
||||
|
||||
assert.Equal(t, 45, peerConfig.Timeouts.Connect)
|
||||
assert.Equal(t, 120, peerConfig.Timeouts.ResponseHeader)
|
||||
}
|
||||
|
||||
func TestConfig_PeerTimeoutsDefaults(t *testing.T) {
|
||||
configYaml := `
|
||||
peers:
|
||||
peer1:
|
||||
proxy: http://example.com
|
||||
models: [model1]
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||
require.NoError(t, err)
|
||||
|
||||
peerConfig, found := config.Peers["peer1"]
|
||||
require.True(t, found, "peer1 should exist in config")
|
||||
|
||||
// Default values should be set during unmarshaling
|
||||
assert.Equal(t, 30, peerConfig.Timeouts.Connect)
|
||||
assert.Equal(t, 60, peerConfig.Timeouts.ResponseHeader)
|
||||
assert.Equal(t, 10, peerConfig.Timeouts.TLSHandshake)
|
||||
assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
|
||||
assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
|
||||
}
|
||||
|
||||
@@ -173,6 +173,13 @@ groups:
|
||||
Env: []string{"VAR1=value1", "VAR2=value2"},
|
||||
CheckEndpoint: "/health",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
"model2": {
|
||||
Cmd: "path/to/server --arg1 one",
|
||||
@@ -182,6 +189,13 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
"model3": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -191,6 +205,13 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
"model4": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -200,6 +221,13 @@ groups:
|
||||
Aliases: []string{},
|
||||
Env: []string{},
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
},
|
||||
},
|
||||
HealthCheckTimeout: 15,
|
||||
|
||||
@@ -104,6 +104,62 @@ models:
|
||||
assert.Contains(t, err.Error(), "self-reference")
|
||||
}
|
||||
|
||||
// Test macro substitution in name and description fields
|
||||
func TestConfig_MacroInNameAndDescription(t *testing.T) {
|
||||
content := `
|
||||
startPort: 10000
|
||||
macros:
|
||||
"VARIANT": "Q4_K_M"
|
||||
"FAMILY": "llama"
|
||||
|
||||
models:
|
||||
my-model:
|
||||
cmd: echo ok
|
||||
proxy: http://localhost:8080
|
||||
name: "${FAMILY} ${VARIANT}"
|
||||
description: "A ${FAMILY} model in ${VARIANT} format"
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "llama Q4_K_M", config.Models["my-model"].Name)
|
||||
assert.Equal(t, "A llama model in Q4_K_M format", config.Models["my-model"].Description)
|
||||
}
|
||||
|
||||
// Test MODEL_ID macro in name and description fields
|
||||
func TestConfig_ModelIDInNameAndDescription(t *testing.T) {
|
||||
content := `
|
||||
startPort: 10000
|
||||
models:
|
||||
llama-3b:
|
||||
cmd: echo ok
|
||||
proxy: http://localhost:8080
|
||||
name: "Model: ${MODEL_ID}"
|
||||
description: "Running ${MODEL_ID}"
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "Model: llama-3b", config.Models["llama-3b"].Name)
|
||||
assert.Equal(t, "Running llama-3b", config.Models["llama-3b"].Description)
|
||||
}
|
||||
|
||||
// Test unknown macro in name or description returns an error
|
||||
func TestConfig_UnknownMacroInNameDescription(t *testing.T) {
|
||||
content := `
|
||||
startPort: 10000
|
||||
models:
|
||||
test:
|
||||
cmd: echo ok
|
||||
proxy: http://localhost:8080
|
||||
name: "Model ${UNDEFINED}"
|
||||
`
|
||||
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "UNDEFINED")
|
||||
}
|
||||
|
||||
// Test undefined macro reference error
|
||||
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
||||
content := `
|
||||
|
||||
@@ -5,6 +5,19 @@ import (
|
||||
"runtime"
|
||||
)
|
||||
|
||||
const (
|
||||
MODEL_CONFIG_DEFAULT_TTL = -1
|
||||
)
|
||||
|
||||
// TimeoutsConfig holds timeout settings for proxy connections
|
||||
type TimeoutsConfig struct {
|
||||
Connect int `yaml:"connect"` // seconds, 0 = no timeout (not recommended)
|
||||
ResponseHeader int `yaml:"responseHeader"` // seconds, 0 = no timeout (not recommended)
|
||||
TLSHandshake int `yaml:"tlsHandshake"` // seconds, 0 = no timeout (not recommended)
|
||||
ExpectContinue int `yaml:"expectContinue"` // seconds, 0 = no timeout (not recommended)
|
||||
IdleConn int `yaml:"idleConn"` // seconds, 0 = no timeout (not recommended)
|
||||
}
|
||||
|
||||
type ModelConfig struct {
|
||||
Cmd string `yaml:"cmd"`
|
||||
CmdStop string `yaml:"cmdStop"`
|
||||
@@ -36,6 +49,9 @@ type ModelConfig struct {
|
||||
|
||||
// override global setting
|
||||
SendLoadingState *bool `yaml:"sendLoadingState"`
|
||||
|
||||
// Timeout settings for proxy connections
|
||||
Timeouts TimeoutsConfig `yaml:"timeouts"`
|
||||
}
|
||||
|
||||
func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
@@ -47,12 +63,19 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
Aliases: []string{},
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/health",
|
||||
UnloadAfter: 0,
|
||||
UnloadAfter: MODEL_CONFIG_DEFAULT_TTL, // use GlobalTTL
|
||||
Unlisted: false,
|
||||
UseModelName: "",
|
||||
ConcurrencyLimit: 0,
|
||||
Name: "",
|
||||
Description: "",
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
}
|
||||
|
||||
// the default cmdStop to taskkill /f /t /pid ${PID}
|
||||
|
||||
@@ -12,6 +12,9 @@ type PeerConfig struct {
|
||||
ApiKey string `yaml:"apiKey"`
|
||||
Models []string `yaml:"models"`
|
||||
Filters Filters `yaml:"filters"`
|
||||
|
||||
// Timeout settings for proxy connections
|
||||
Timeouts TimeoutsConfig `yaml:"timeouts"`
|
||||
}
|
||||
|
||||
func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
@@ -21,6 +24,13 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
ApiKey: "",
|
||||
Models: []string{},
|
||||
Filters: Filters{},
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
}
|
||||
|
||||
if err := unmarshal(&defaults); err != nil {
|
||||
|
||||
@@ -350,6 +350,11 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
|
||||
usage := parsed.Get("usage")
|
||||
timings := parsed.Get("timings")
|
||||
|
||||
// v1/responses format nests usage under response.usage
|
||||
if !usage.Exists() {
|
||||
usage = parsed.Get("response.usage")
|
||||
}
|
||||
|
||||
if usage.Exists() || timings.Exists() {
|
||||
return parseMetrics(modelID, start, usage, timings)
|
||||
}
|
||||
@@ -360,6 +365,8 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
|
||||
}
|
||||
|
||||
func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result) (TokenMetrics, error) {
|
||||
wallDurationMs := int(time.Since(start).Milliseconds())
|
||||
|
||||
// default values
|
||||
cachedTokens := -1 // unknown or missing data
|
||||
outputTokens := 0
|
||||
@@ -368,7 +375,7 @@ func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result)
|
||||
// timings data
|
||||
tokensPerSecond := -1.0
|
||||
promptPerSecond := -1.0
|
||||
durationMs := int(time.Since(start).Milliseconds())
|
||||
durationMs := wallDurationMs
|
||||
|
||||
if usage.Exists() {
|
||||
if pt := usage.Get("prompt_tokens"); pt.Exists() {
|
||||
@@ -397,7 +404,10 @@ func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result)
|
||||
outputTokens = int(timings.Get("predicted_n").Int())
|
||||
promptPerSecond = timings.Get("prompt_per_second").Float()
|
||||
tokensPerSecond = timings.Get("predicted_per_second").Float()
|
||||
durationMs = int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
|
||||
timingsDurationMs := int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
|
||||
if timingsDurationMs > durationMs {
|
||||
durationMs = timingsDurationMs
|
||||
}
|
||||
|
||||
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
|
||||
cachedTokens = int(cachedValue.Int())
|
||||
@@ -503,9 +513,9 @@ func filterAcceptEncoding(acceptEncoding string) string {
|
||||
supported := map[string]bool{"gzip": true, "deflate": true}
|
||||
var filtered []string
|
||||
|
||||
for _, part := range strings.Split(acceptEncoding, ",") {
|
||||
for part := range strings.SplitSeq(acceptEncoding, ",") {
|
||||
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
||||
encoding := strings.TrimSpace(strings.Split(part, ";")[0])
|
||||
encoding, _, _ := strings.Cut(strings.TrimSpace(part), ";")
|
||||
if supported[strings.ToLower(encoding)] {
|
||||
filtered = append(filtered, strings.TrimSpace(part))
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/event"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||
@@ -570,6 +571,27 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
||||
t.Run("keeps wall clock duration when timings underreport request time", func(t *testing.T) {
|
||||
start := time.Now().Add(-5 * time.Second)
|
||||
usage := gjson.Parse(`{"prompt_tokens": 5, "completion_tokens": 1}`)
|
||||
timings := gjson.Parse(`{
|
||||
"prompt_n": 5,
|
||||
"predicted_n": 1,
|
||||
"prompt_per_second": 10.0,
|
||||
"predicted_per_second": 2.0,
|
||||
"prompt_ms": 5.0,
|
||||
"predicted_ms": 15.0
|
||||
}`)
|
||||
|
||||
metrics, err := parseMetrics("test-model", start, usage, timings)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 5, metrics.InputTokens)
|
||||
assert.Equal(t, 1, metrics.OutputTokens)
|
||||
assert.Equal(t, 10.0, metrics.PromptPerSecond)
|
||||
assert.Equal(t, 2.0, metrics.TokensPerSecond)
|
||||
assert.GreaterOrEqual(t, metrics.DurationMs, 5000)
|
||||
})
|
||||
|
||||
t.Run("prefers timings over usage data", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
@@ -709,6 +731,35 @@ data: [DONE]
|
||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||
})
|
||||
|
||||
t.Run("v1/responses format with nested response.usage", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
// v1/responses SSE format: usage is nested under response.usage
|
||||
responseBody := "event: response.completed\n" +
|
||||
`data: {"type":"response.completed","response":{"id":"resp_abc","object":"response","created_at":1773416985,"status":"completed","model":"test-model","output":[],"usage":{"input_tokens":17,"output_tokens":23,"total_tokens":40}}}` +
|
||||
"\n\n"
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(responseBody))
|
||||
return nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/responses", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
ginCtx, _ := gin.CreateTestContext(rec)
|
||||
|
||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||
assert.NoError(t, err)
|
||||
|
||||
metrics := mm.getMetrics()
|
||||
assert.Equal(t, 1, len(metrics))
|
||||
assert.Equal(t, "test-model", metrics[0].Model)
|
||||
assert.Equal(t, 17, metrics[0].InputTokens)
|
||||
assert.Equal(t, 23, metrics[0].OutputTokens)
|
||||
})
|
||||
|
||||
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
|
||||
+17
-15
@@ -34,23 +34,25 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
|
||||
}
|
||||
sort.Strings(peerIDs)
|
||||
|
||||
// Create a shared transport with reasonable timeouts for peer connections
|
||||
// these can be tuned with feedback later
|
||||
peerTransport := &http.Transport{
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 30 * time.Second, // Connection timeout
|
||||
KeepAlive: 30 * time.Second,
|
||||
}).DialContext,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
ResponseHeaderTimeout: 60 * time.Second, // Time to wait for response headers
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
MaxIdleConns: 100,
|
||||
MaxIdleConnsPerHost: 10,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
}
|
||||
|
||||
for _, peerID := range peerIDs {
|
||||
peer := peers[peerID]
|
||||
|
||||
// Create a transport with per-peer timeout configuration
|
||||
peerTransport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(peer.Timeouts.Connect) * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
}).DialContext,
|
||||
TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
|
||||
ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
|
||||
ExpectContinueTimeout: time.Duration(peer.Timeouts.ExpectContinue) * time.Second,
|
||||
ForceAttemptHTTP2: true,
|
||||
MaxIdleConns: 100,
|
||||
MaxIdleConnsPerHost: 10,
|
||||
IdleConnTimeout: time.Duration(peer.Timeouts.IdleConn) * time.Second,
|
||||
}
|
||||
|
||||
// Create reverse proxy for this peer
|
||||
reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
|
||||
reverseProxy.Transport = peerTransport
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -266,3 +267,45 @@ func TestProxyRequest_SSEHeaderModification(t *testing.T) {
|
||||
// The X-Accel-Buffering header should be set to "no" for SSE
|
||||
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
||||
}
|
||||
|
||||
func TestNewPeerProxy_CustomTimeouts(t *testing.T) {
|
||||
proxyURL, _ := url.Parse("http://localhost:8080")
|
||||
|
||||
peers := config.PeerDictionaryConfig{
|
||||
"test-peer": config.PeerConfig{
|
||||
Proxy: "http://localhost:8080",
|
||||
ProxyURL: proxyURL,
|
||||
Models: []string{"model1"},
|
||||
Timeouts: config.TimeoutsConfig{
|
||||
Connect: 45,
|
||||
ResponseHeader: 300,
|
||||
TLSHandshake: 15,
|
||||
ExpectContinue: 2,
|
||||
IdleConn: 120,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
peerProxy, err := NewPeerProxy(peers, testLogger)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, peerProxy)
|
||||
assert.True(t, peerProxy.HasPeerModel("model1"))
|
||||
|
||||
// Verify the timeout values are actually applied to the transport
|
||||
member, found := peerProxy.proxyMap["model1"]
|
||||
require.True(t, found, "model1 should exist in proxyMap")
|
||||
assert.NotNil(t, member.reverseProxy)
|
||||
assert.NotNil(t, member.reverseProxy.Transport)
|
||||
|
||||
transport, ok := member.reverseProxy.Transport.(*http.Transport)
|
||||
require.True(t, ok, "Transport should be *http.Transport")
|
||||
|
||||
// Verify all timeout values are correctly applied
|
||||
assert.Equal(t, 300*time.Second, transport.ResponseHeaderTimeout)
|
||||
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
|
||||
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
|
||||
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
|
||||
// ForceAttemptHTTP2 should be enabled
|
||||
assert.True(t, transport.ForceAttemptHTTP2)
|
||||
}
|
||||
|
||||
@@ -96,6 +96,24 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
|
||||
var reverseProxy *httputil.ReverseProxy
|
||||
if proxyURL != nil {
|
||||
reverseProxy = httputil.NewSingleHostReverseProxy(proxyURL)
|
||||
|
||||
// Create custom transport with configured timeouts
|
||||
transport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(config.Timeouts.Connect) * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
}).DialContext,
|
||||
TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second,
|
||||
ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,
|
||||
ExpectContinueTimeout: time.Duration(config.Timeouts.ExpectContinue) * time.Second,
|
||||
ForceAttemptHTTP2: true,
|
||||
MaxIdleConns: 100,
|
||||
MaxIdleConnsPerHost: 10,
|
||||
IdleConnTimeout: time.Duration(config.Timeouts.IdleConn) * time.Second,
|
||||
}
|
||||
reverseProxy.Transport = transport
|
||||
|
||||
reverseProxy.ModifyResponse = func(resp *http.Response) error {
|
||||
// prevent nginx from buffering streaming responses (e.g., SSE)
|
||||
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
|
||||
|
||||
+47
-10
@@ -2,6 +2,7 @@ package proxy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
@@ -117,12 +118,12 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
|
||||
}
|
||||
|
||||
expectedMessage := "I_sense_imminent_danger"
|
||||
config := getTestSimpleResponderConfig(expectedMessage)
|
||||
assert.Equal(t, 0, config.UnloadAfter)
|
||||
config.UnloadAfter = 3 // seconds
|
||||
assert.Equal(t, 3, config.UnloadAfter)
|
||||
conf := getTestSimpleResponderConfig(expectedMessage)
|
||||
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||
conf.UnloadAfter = 3 // seconds
|
||||
assert.Equal(t, 3, conf.UnloadAfter)
|
||||
|
||||
process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger)
|
||||
process := NewProcess("ttl_test", 2, conf, debugLogger, debugLogger)
|
||||
defer process.Stop()
|
||||
|
||||
// this should take 4 seconds
|
||||
@@ -159,12 +160,12 @@ func TestProcess_LowTTLValue(t *testing.T) {
|
||||
t.Skip("skipping test, edit process_test.go to run it ")
|
||||
}
|
||||
|
||||
config := getTestSimpleResponderConfig("fast_ttl")
|
||||
assert.Equal(t, 0, config.UnloadAfter)
|
||||
config.UnloadAfter = 1 // second
|
||||
assert.Equal(t, 1, config.UnloadAfter)
|
||||
conf := getTestSimpleResponderConfig("fast_ttl")
|
||||
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||
conf.UnloadAfter = 1 // second
|
||||
assert.Equal(t, 1, conf.UnloadAfter)
|
||||
|
||||
process := NewProcess("ttl", 2, config, debugLogger, debugLogger)
|
||||
process := NewProcess("ttl", 2, conf, debugLogger, debugLogger)
|
||||
defer process.Stop()
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
@@ -569,3 +570,39 @@ func (w *panicOnWriteResponseWriter) Write(b []byte) (int, error) {
|
||||
}
|
||||
return w.ResponseRecorder.Write(b)
|
||||
}
|
||||
|
||||
func TestProcess_CustomTimeouts(t *testing.T) {
|
||||
modelConfig := config.ModelConfig{
|
||||
Cmd: "echo test",
|
||||
Proxy: "http://localhost:8080",
|
||||
CheckEndpoint: "/health",
|
||||
Timeouts: config.TimeoutsConfig{
|
||||
Connect: 45,
|
||||
ResponseHeader: 120,
|
||||
TLSHandshake: 15,
|
||||
ExpectContinue: 2,
|
||||
IdleConn: 120,
|
||||
},
|
||||
}
|
||||
|
||||
debugLogger := NewLogMonitorWriter(io.Discard)
|
||||
process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
|
||||
|
||||
// Verify the process was created successfully
|
||||
assert.NotNil(t, process)
|
||||
assert.Equal(t, "test-model", process.ID)
|
||||
assert.NotNil(t, process.reverseProxy)
|
||||
assert.NotNil(t, process.reverseProxy.Transport)
|
||||
|
||||
// Verify it's using http.Transport (not some other type)
|
||||
transport, ok := process.reverseProxy.Transport.(*http.Transport)
|
||||
assert.True(t, ok, "Transport should be *http.Transport")
|
||||
assert.NotNil(t, transport)
|
||||
|
||||
// Verify the timeouts are correctly applied
|
||||
assert.Equal(t, 120*time.Second, transport.ResponseHeaderTimeout)
|
||||
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
|
||||
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
|
||||
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
|
||||
assert.True(t, transport.ForceAttemptHTTP2)
|
||||
}
|
||||
|
||||
@@ -346,6 +346,11 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
|
||||
|
||||
// sd.cpp /sdapi/v1 endpoints
|
||||
pm.ginEngine.POST("/sdapi/v1/txt2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/sdapi/v1/img2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.GET("/sdapi/v1/loras", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
||||
|
||||
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
|
||||
|
||||
// in proxymanager_loghandlers.go
|
||||
|
||||
@@ -730,7 +730,7 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
|
||||
// Verify extended fields are present
|
||||
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
||||
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
||||
assert.Equal(t, 0, response.Running[0].TTL, "ttl should default to 0")
|
||||
assert.Equal(t, -1, response.Running[0].TTL, "ttl should default to -1 (use globalTTL)")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1659,3 +1659,82 @@ models:
|
||||
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
||||
})
|
||||
}
|
||||
|
||||
func TestProxyManager_SdApiTxt2ImgRouting(t *testing.T) {
|
||||
conf := config.AddDefaultGroupToConfig(config.Config{
|
||||
HealthCheckTimeout: 15,
|
||||
Models: map[string]config.ModelConfig{
|
||||
"sd-model": getTestSimpleResponderConfig("sd-model"),
|
||||
},
|
||||
LogLevel: "error",
|
||||
})
|
||||
|
||||
proxy := New(conf)
|
||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||
|
||||
t.Run("successful txt2img with model", func(t *testing.T) {
|
||||
reqBody := `{"model":"sd-model","prompt":"a cat"}`
|
||||
req := httptest.NewRequest("POST", "/sdapi/v1/txt2img", bytes.NewBufferString(reqBody))
|
||||
w := CreateTestResponseRecorder()
|
||||
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusOK, w.Code)
|
||||
assert.Contains(t, w.Body.String(), "sd-model")
|
||||
})
|
||||
|
||||
t.Run("successful img2img with model", func(t *testing.T) {
|
||||
reqBody := `{"model":"sd-model","prompt":"a cat","init_images":[]}`
|
||||
req := httptest.NewRequest("POST", "/sdapi/v1/img2img", bytes.NewBufferString(reqBody))
|
||||
w := CreateTestResponseRecorder()
|
||||
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusOK, w.Code)
|
||||
assert.Contains(t, w.Body.String(), "sd-model")
|
||||
})
|
||||
|
||||
t.Run("missing model returns 400", func(t *testing.T) {
|
||||
reqBody := `{"prompt":"a cat"}`
|
||||
req := httptest.NewRequest("POST", "/sdapi/v1/txt2img", bytes.NewBufferString(reqBody))
|
||||
w := CreateTestResponseRecorder()
|
||||
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusBadRequest, w.Code)
|
||||
assert.Contains(t, w.Body.String(), "missing or invalid 'model' key")
|
||||
})
|
||||
}
|
||||
|
||||
func TestProxyManager_SdApiGetLoras(t *testing.T) {
|
||||
conf := config.AddDefaultGroupToConfig(config.Config{
|
||||
HealthCheckTimeout: 15,
|
||||
Models: map[string]config.ModelConfig{
|
||||
"sd-model": getTestSimpleResponderConfig("sd-model"),
|
||||
},
|
||||
LogLevel: "error",
|
||||
})
|
||||
|
||||
proxy := New(conf)
|
||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||
|
||||
t.Run("successful GET loras with model query param", func(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "/sdapi/v1/loras?model=sd-model", nil)
|
||||
w := CreateTestResponseRecorder()
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusOK, w.Code)
|
||||
})
|
||||
|
||||
t.Run("missing model query param returns 400", func(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "/sdapi/v1/loras", nil)
|
||||
w := CreateTestResponseRecorder()
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusBadRequest, w.Code)
|
||||
assert.Contains(t, w.Body.String(), "missing required 'model' query parameter")
|
||||
})
|
||||
|
||||
t.Run("unknown model returns 400", func(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "/sdapi/v1/loras?model=nonexistent", nil)
|
||||
w := CreateTestResponseRecorder()
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusBadRequest, w.Code)
|
||||
assert.Contains(t, w.Body.String(), "could not find suitable handler")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
legacy-peer-deps=true
|
||||
Generated
+979
-1189
File diff suppressed because it is too large
Load Diff
@@ -12,18 +12,18 @@
|
||||
"test:watch": "vitest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
||||
"@sveltejs/vite-plugin-svelte": "^7.0.0",
|
||||
"@tailwindcss/vite": "^4.1.8",
|
||||
"@tsconfig/svelte": "^5.0.4",
|
||||
"@types/hast": "^3.0.4",
|
||||
"@types/node": "^25.1.0",
|
||||
"svelte": "^5.19.0",
|
||||
"svelte": "^5.46.4",
|
||||
"svelte-check": "^4.1.4",
|
||||
"tailwindcss": "^4.1.8",
|
||||
"typescript": "~5.8.3",
|
||||
"vite": "^6.3.5",
|
||||
"vite-plugin-compression2": "^2.4.0",
|
||||
"vitest": "^4.0.18"
|
||||
"vite": "^8.0.0",
|
||||
"vite-plugin-compression2": "^2.5.1",
|
||||
"vitest": "^4.1.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"highlight.js": "^11.11.1",
|
||||
|
||||
@@ -116,6 +116,47 @@
|
||||
cancelEdit();
|
||||
}
|
||||
}
|
||||
|
||||
const COPY_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;
|
||||
const CHECK_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>`;
|
||||
|
||||
function codeBlockCopy(node: HTMLElement) {
|
||||
function attachButtons() {
|
||||
node.querySelectorAll<HTMLPreElement>('pre:not([data-copy-btn])').forEach(pre => {
|
||||
pre.setAttribute('data-copy-btn', 'true');
|
||||
const btn = document.createElement('button');
|
||||
btn.className = 'code-copy-btn';
|
||||
btn.title = 'Copy code';
|
||||
btn.innerHTML = COPY_SVG;
|
||||
btn.addEventListener('click', async () => {
|
||||
const text = pre.querySelector('code')?.textContent ?? pre.textContent ?? '';
|
||||
try {
|
||||
if (navigator.clipboard && window.isSecureContext) {
|
||||
await navigator.clipboard.writeText(text);
|
||||
} else {
|
||||
const ta = document.createElement('textarea');
|
||||
ta.value = text;
|
||||
ta.style.cssText = 'position:fixed;left:-9999px';
|
||||
document.body.appendChild(ta);
|
||||
ta.select();
|
||||
document.execCommand('copy');
|
||||
document.body.removeChild(ta);
|
||||
}
|
||||
btn.innerHTML = CHECK_SVG;
|
||||
btn.classList.add('copied');
|
||||
setTimeout(() => { btn.innerHTML = COPY_SVG; btn.classList.remove('copied'); }, 2000);
|
||||
} catch (e) {
|
||||
console.error('copy failed', e);
|
||||
}
|
||||
});
|
||||
pre.appendChild(btn);
|
||||
});
|
||||
}
|
||||
attachButtons();
|
||||
const mo = new MutationObserver(attachButtons);
|
||||
mo.observe(node, { childList: true, subtree: true });
|
||||
return { destroy: () => mo.disconnect() };
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
||||
@@ -174,7 +215,7 @@
|
||||
{#if showRaw}
|
||||
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
||||
{:else}
|
||||
<div class="prose prose-sm dark:prose-invert max-w-none">
|
||||
<div class="prose prose-sm dark:prose-invert max-w-none" use:codeBlockCopy>
|
||||
{#each renderedParts.blocks as block (block.id)}
|
||||
{@html block.html}
|
||||
{/each}
|
||||
@@ -299,14 +340,42 @@
|
||||
|
||||
<style>
|
||||
.prose :global(pre) {
|
||||
position: relative;
|
||||
background-color: var(--color-surface);
|
||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||
border-radius: 0.375rem;
|
||||
padding: 0.75rem;
|
||||
padding-right: 2.5rem;
|
||||
overflow-x: auto;
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn) {
|
||||
position: absolute;
|
||||
top: 0.375rem;
|
||||
right: 0.375rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 0.25rem;
|
||||
border-radius: 0.25rem;
|
||||
border: 1px solid var(--color-border);
|
||||
background: var(--color-surface);
|
||||
color: var(--color-txtsecondary);
|
||||
cursor: pointer;
|
||||
transition: background-color 0.15s;
|
||||
line-height: 0;
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn:hover) {
|
||||
background: var(--color-secondary);
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn.copied) {
|
||||
color: var(--color-success);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.prose :global(code) {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
||||
font-size: 0.875em;
|
||||
|
||||
@@ -2,26 +2,91 @@
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { generateImage } from "../../lib/imageApi";
|
||||
import { generateSdImage, fetchSdLoras } from "../../lib/sdApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||
import type { ImageApiMode, SdApiLora, SdApiLoraRef } from "../../lib/types";
|
||||
|
||||
const selectedModelStore = persistentStore<string>("playground-image-model", "");
|
||||
const selectedSizeStore = persistentStore<string>("playground-image-size", "1024x1024");
|
||||
const apiModeStore = persistentStore<ImageApiMode>("playground-image-api-mode", "openai");
|
||||
|
||||
// SDAPI persistent settings
|
||||
const sdNegativePromptStore = persistentStore<string>("playground-sdapi-negative-prompt", "");
|
||||
const sdStepsStore = persistentStore<number>("playground-sdapi-steps", 20);
|
||||
const sdCfgScaleStore = persistentStore<number>("playground-sdapi-cfg-scale", 7);
|
||||
const sdSeedStore = persistentStore<number>("playground-sdapi-seed", -1);
|
||||
const sdSamplerStore = persistentStore<string>("playground-sdapi-sampler", "");
|
||||
const sdSchedulerStore = persistentStore<string>("playground-sdapi-scheduler", "");
|
||||
const sdBatchSizeStore = persistentStore<number>("playground-sdapi-batch-size", 1);
|
||||
|
||||
let prompt = $state("");
|
||||
let isGenerating = $state(false);
|
||||
let generatedImage = $state<string | null>(null);
|
||||
let generatedImages = $state<string[]>([]);
|
||||
let error = $state<string | null>(null);
|
||||
let abortController = $state<AbortController | null>(null);
|
||||
let showFullscreen = $state(false);
|
||||
let fullscreenIndex = $state(0);
|
||||
let showSettings = $state(false);
|
||||
|
||||
// SDAPI lora state
|
||||
let availableLoras = $state<SdApiLora[]>([]);
|
||||
let selectedLoras = $state<SdApiLoraRef[]>([]);
|
||||
let isLoadingLoras = $state(false);
|
||||
let lorasLoaded = $state(false);
|
||||
let loraError = $state<string | null>(null);
|
||||
|
||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||
let isSdapi = $derived($apiModeStore === "sdapi");
|
||||
|
||||
$effect(() => {
|
||||
playgroundStores.imageGenerating.set(isGenerating);
|
||||
});
|
||||
|
||||
async function loadLoras() {
|
||||
if (!$selectedModelStore || isLoadingLoras) return;
|
||||
isLoadingLoras = true;
|
||||
loraError = null;
|
||||
try {
|
||||
const loras = await fetchSdLoras($selectedModelStore);
|
||||
availableLoras = loras;
|
||||
lorasLoaded = true;
|
||||
} catch (err) {
|
||||
availableLoras = [];
|
||||
loraError = err instanceof Error ? err.message : "Failed to load LoRAs";
|
||||
lorasLoaded = false;
|
||||
} finally {
|
||||
isLoadingLoras = false;
|
||||
}
|
||||
}
|
||||
|
||||
function addLora(event: Event) {
|
||||
const select = event.target as HTMLSelectElement;
|
||||
const path = select.value;
|
||||
if (!path) return;
|
||||
|
||||
const lora = availableLoras.find((l) => l.path === path);
|
||||
if (lora && !selectedLoras.some((l) => l.path === path)) {
|
||||
selectedLoras = [...selectedLoras, { path: lora.path, multiplier: 1.0 }];
|
||||
}
|
||||
select.value = "";
|
||||
}
|
||||
|
||||
function removeLora(path: string) {
|
||||
selectedLoras = selectedLoras.filter((l) => l.path !== path);
|
||||
}
|
||||
|
||||
function updateLoraMultiplier(path: string, multiplier: number) {
|
||||
selectedLoras = selectedLoras.map((l) =>
|
||||
l.path === path ? { ...l, multiplier } : l
|
||||
);
|
||||
}
|
||||
|
||||
function getLoraName(path: string): string {
|
||||
return availableLoras.find((l) => l.path === path)?.name ?? path;
|
||||
}
|
||||
|
||||
async function generate() {
|
||||
const trimmedPrompt = prompt.trim();
|
||||
if (!trimmedPrompt || !$selectedModelStore || isGenerating) return;
|
||||
@@ -31,19 +96,44 @@
|
||||
abortController = new AbortController();
|
||||
|
||||
try {
|
||||
const response = await generateImage(
|
||||
$selectedModelStore,
|
||||
trimmedPrompt,
|
||||
$selectedSizeStore,
|
||||
abortController.signal
|
||||
);
|
||||
if (isSdapi) {
|
||||
const [w, h] = $selectedSizeStore.split("x").map(Number);
|
||||
const request = {
|
||||
model: $selectedModelStore,
|
||||
prompt: trimmedPrompt,
|
||||
negative_prompt: $sdNegativePromptStore || undefined,
|
||||
width: w,
|
||||
height: h,
|
||||
steps: $sdStepsStore,
|
||||
cfg_scale: $sdCfgScaleStore,
|
||||
seed: $sdSeedStore,
|
||||
batch_size: $sdBatchSizeStore,
|
||||
sampler_name: $sdSamplerStore || undefined,
|
||||
scheduler: $sdSchedulerStore || undefined,
|
||||
lora: selectedLoras.length > 0 ? selectedLoras : undefined,
|
||||
};
|
||||
|
||||
if (response.data && response.data.length > 0) {
|
||||
const imageData = response.data[0];
|
||||
if (imageData.b64_json) {
|
||||
generatedImage = `data:image/png;base64,${imageData.b64_json}`;
|
||||
} else if (imageData.url) {
|
||||
generatedImage = imageData.url;
|
||||
const response = await generateSdImage(request, abortController.signal);
|
||||
if (response.images && response.images.length > 0) {
|
||||
generatedImages = response.images.map(
|
||||
(img) => `data:image/png;base64,${img}`
|
||||
);
|
||||
}
|
||||
} else {
|
||||
const response = await generateImage(
|
||||
$selectedModelStore,
|
||||
trimmedPrompt,
|
||||
$selectedSizeStore,
|
||||
abortController.signal
|
||||
);
|
||||
|
||||
if (response.data && response.data.length > 0) {
|
||||
const imageData = response.data[0];
|
||||
if (imageData.b64_json) {
|
||||
generatedImages = [`data:image/png;base64,${imageData.b64_json}`];
|
||||
} else if (imageData.url) {
|
||||
generatedImages = [imageData.url];
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
@@ -63,28 +153,29 @@
|
||||
}
|
||||
|
||||
function clearImage() {
|
||||
generatedImage = null;
|
||||
generatedImages = [];
|
||||
error = null;
|
||||
prompt = "";
|
||||
}
|
||||
|
||||
function downloadImage() {
|
||||
if (!generatedImage) return;
|
||||
function downloadImage(index: number = 0) {
|
||||
const img = generatedImages[index];
|
||||
if (!img) return;
|
||||
|
||||
const link = document.createElement("a");
|
||||
link.href = generatedImage;
|
||||
link.download = `generated-image-${Date.now()}.png`;
|
||||
link.href = img;
|
||||
link.download = `generated-image-${Date.now()}-${index}.png`;
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
}
|
||||
|
||||
function openFullscreen() {
|
||||
function openFullscreen(index: number = 0) {
|
||||
fullscreenIndex = index;
|
||||
showFullscreen = true;
|
||||
}
|
||||
|
||||
function closeFullscreen(event?: MouseEvent) {
|
||||
// Only close if clicking the background, not the image
|
||||
if (event && event.target !== event.currentTarget) {
|
||||
return;
|
||||
}
|
||||
@@ -100,9 +191,19 @@
|
||||
</script>
|
||||
|
||||
<div class="flex flex-col h-full">
|
||||
<!-- Model selector -->
|
||||
<!-- Model selector and mode toggle -->
|
||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an image model..." disabled={isGenerating} />
|
||||
|
||||
<select
|
||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$apiModeStore}
|
||||
disabled={isGenerating}
|
||||
>
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="sdapi">SDAPI</option>
|
||||
</select>
|
||||
|
||||
<select
|
||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$selectedSizeStore}
|
||||
@@ -123,8 +224,166 @@
|
||||
<option value="1024x1792">1024x1792 (SDXL)</option>
|
||||
</optgroup>
|
||||
</select>
|
||||
|
||||
{#if isSdapi}
|
||||
<button
|
||||
class="px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors"
|
||||
onclick={() => showSettings = !showSettings}
|
||||
>
|
||||
{showSettings ? "Hide Settings" : "Settings"}
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- SDAPI Settings Panel -->
|
||||
{#if isSdapi && showSettings}
|
||||
<div class="shrink-0 mb-4 p-4 rounded border border-gray-200 dark:border-white/10 bg-surface">
|
||||
<div class="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Steps</span>
|
||||
<input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdStepsStore}
|
||||
min="1"
|
||||
max="150"
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">CFG Scale</span>
|
||||
<input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdCfgScaleStore}
|
||||
min="1"
|
||||
max="30"
|
||||
step="0.5"
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Seed (-1 = random)</span>
|
||||
<input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdSeedStore}
|
||||
min="-1"
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Batch Size</span>
|
||||
<input
|
||||
type="number"
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdBatchSizeStore}
|
||||
min="1"
|
||||
max="8"
|
||||
/>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Sampler</span>
|
||||
<select
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdSamplerStore}
|
||||
>
|
||||
<option value="">Default</option>
|
||||
<option value="euler_a">euler_a</option>
|
||||
<option value="euler">euler</option>
|
||||
<option value="heun">heun</option>
|
||||
<option value="dpm2">dpm2</option>
|
||||
<option value="dpmpp2s_a">dpmpp2s_a</option>
|
||||
<option value="dpmpp2m">dpmpp2m</option>
|
||||
<option value="dpmpp2mv2">dpmpp2mv2</option>
|
||||
<option value="ipndm">ipndm</option>
|
||||
<option value="ipndm_v">ipndm_v</option>
|
||||
<option value="lcm">lcm</option>
|
||||
<option value="ddim_trailing">ddim_trailing</option>
|
||||
<option value="tcd">tcd</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="flex flex-col gap-1">
|
||||
<span class="text-xs text-txtsecondary">Scheduler</span>
|
||||
<select
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
bind:value={$sdSchedulerStore}
|
||||
>
|
||||
<option value="">Auto for model</option>
|
||||
<option value="discrete">discrete</option>
|
||||
<option value="karras">karras</option>
|
||||
<option value="exponential">exponential</option>
|
||||
<option value="ays">ays</option>
|
||||
<option value="gits">gits</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<label class="flex flex-col gap-1 mb-3">
|
||||
<span class="text-xs text-txtsecondary">Negative Prompt</span>
|
||||
<textarea
|
||||
class="px-2 py-1 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-y text-sm"
|
||||
bind:value={$sdNegativePromptStore}
|
||||
rows="2"
|
||||
placeholder="Elements to avoid..."
|
||||
></textarea>
|
||||
</label>
|
||||
|
||||
<!-- LoRA Selection -->
|
||||
<div>
|
||||
<span class="text-xs text-txtsecondary block mb-1">LoRAs</span>
|
||||
<div class="flex items-center gap-2 mb-2">
|
||||
<button
|
||||
class="px-3 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface hover:bg-secondary-hover transition-colors disabled:opacity-50"
|
||||
onclick={loadLoras}
|
||||
disabled={!$selectedModelStore || isLoadingLoras}
|
||||
>
|
||||
{isLoadingLoras ? "Loading..." : lorasLoaded ? "Reload LoRAs" : "Load LoRAs"}
|
||||
</button>
|
||||
{#if lorasLoaded && availableLoras.length > 0}
|
||||
<select
|
||||
class="flex-1 px-2 py-1.5 text-sm rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
onchange={addLora}
|
||||
>
|
||||
<option value="">Add a LoRA...</option>
|
||||
{#each availableLoras.filter((l) => !selectedLoras.some((s) => s.path === l.path)) as lora}
|
||||
<option value={lora.path}>{lora.name}</option>
|
||||
{/each}
|
||||
</select>
|
||||
{/if}
|
||||
</div>
|
||||
{#if loraError}
|
||||
<p class="text-xs text-red-500 mb-1">{loraError}</p>
|
||||
{/if}
|
||||
{#if lorasLoaded && availableLoras.length === 0}
|
||||
<p class="text-xs text-txtsecondary">No LoRAs available</p>
|
||||
{/if}
|
||||
{#if selectedLoras.length > 0}
|
||||
<div class="flex flex-col gap-1.5">
|
||||
{#each selectedLoras as lora}
|
||||
<div class="flex items-center gap-2 text-sm">
|
||||
<span class="flex-1 truncate">{getLoraName(lora.path)}</span>
|
||||
<input
|
||||
type="number"
|
||||
class="w-20 px-1.5 py-1 text-xs rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-1 focus:ring-primary"
|
||||
value={lora.multiplier}
|
||||
oninput={(e) => updateLoraMultiplier(lora.path, parseFloat((e.target as HTMLInputElement).value) || 1)}
|
||||
min="0"
|
||||
max="2"
|
||||
step="0.1"
|
||||
/>
|
||||
<button
|
||||
class="px-1.5 py-0.5 text-xs rounded border border-gray-200 dark:border-white/10 hover:bg-red-500 hover:text-white hover:border-red-500 transition-colors"
|
||||
onclick={() => removeLora(lora.path)}
|
||||
aria-label="Remove LoRA"
|
||||
>
|
||||
x
|
||||
</button>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Empty state for no models configured -->
|
||||
{#if !hasModels}
|
||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||
@@ -143,22 +402,50 @@
|
||||
<p class="font-medium">Error</p>
|
||||
<p class="text-sm mt-1">{error}</p>
|
||||
</div>
|
||||
{:else if generatedImage}
|
||||
{:else if generatedImages.length > 1}
|
||||
<!-- Grid for multiple images (batch) -->
|
||||
<div class="grid grid-cols-2 gap-2 p-2 w-full h-full overflow-auto">
|
||||
{#each generatedImages as img, i}
|
||||
<div class="relative flex items-center justify-center">
|
||||
<button
|
||||
class="p-0 border-0 bg-transparent cursor-pointer"
|
||||
onclick={() => openFullscreen(i)}
|
||||
aria-label="View fullscreen"
|
||||
>
|
||||
<img
|
||||
src={img}
|
||||
alt="AI generated content {i + 1}"
|
||||
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
|
||||
/>
|
||||
</button>
|
||||
<button
|
||||
class="absolute bottom-2 right-2 p-1.5 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
|
||||
onclick={(e) => { e.stopPropagation(); downloadImage(i); }}
|
||||
aria-label="Download image"
|
||||
>
|
||||
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
{:else if generatedImages.length === 1}
|
||||
<div class="relative max-w-full max-h-full flex items-center justify-center">
|
||||
<button
|
||||
class="p-0 border-0 bg-transparent cursor-pointer"
|
||||
onclick={openFullscreen}
|
||||
onclick={() => openFullscreen(0)}
|
||||
aria-label="View fullscreen"
|
||||
>
|
||||
<img
|
||||
src={generatedImage}
|
||||
src={generatedImages[0]}
|
||||
alt="AI generated content"
|
||||
class="max-w-full max-h-full object-contain hover:opacity-90 transition-opacity"
|
||||
/>
|
||||
</button>
|
||||
<button
|
||||
class="absolute bottom-2 right-2 p-2 bg-black/60 hover:bg-black/80 text-white rounded-full transition-colors"
|
||||
onclick={(e) => { e.stopPropagation(); downloadImage(); }}
|
||||
onclick={(e) => { e.stopPropagation(); downloadImage(0); }}
|
||||
aria-label="Download image"
|
||||
>
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@@ -198,7 +485,7 @@
|
||||
<button
|
||||
class="btn flex-1 md:flex-none"
|
||||
onclick={clearImage}
|
||||
disabled={!generatedImage && !error && !prompt.trim()}
|
||||
disabled={generatedImages.length === 0 && !error && !prompt.trim()}
|
||||
>
|
||||
Clear
|
||||
</button>
|
||||
@@ -209,7 +496,7 @@
|
||||
</div>
|
||||
|
||||
<!-- Fullscreen dialog -->
|
||||
{#if showFullscreen && generatedImage}
|
||||
{#if showFullscreen && generatedImages[fullscreenIndex]}
|
||||
<div
|
||||
class="fixed inset-0 bg-black/90 z-50 flex items-center justify-center p-4"
|
||||
onclick={(e) => closeFullscreen(e)}
|
||||
@@ -226,7 +513,7 @@
|
||||
×
|
||||
</button>
|
||||
<img
|
||||
src={generatedImage}
|
||||
src={generatedImages[fullscreenIndex]}
|
||||
alt="AI generated content"
|
||||
class="max-w-full max-h-full object-contain pointer-events-none"
|
||||
/>
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
import type { SdApiTxt2ImgRequest, SdApiResponse, SdApiLora } from "./types";
|
||||
|
||||
export async function generateSdImage(
|
||||
request: SdApiTxt2ImgRequest,
|
||||
signal?: AbortSignal
|
||||
): Promise<SdApiResponse> {
|
||||
const response = await fetch("/sdapi/v1/txt2img", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(request),
|
||||
signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`SDAPI error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
export async function fetchSdLoras(
|
||||
model: string,
|
||||
signal?: AbortSignal
|
||||
): Promise<SdApiLora[]> {
|
||||
const response = await fetch(
|
||||
`/sdapi/v1/loras?model=${encodeURIComponent(model)}`,
|
||||
{ signal }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`SDAPI loras error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
@@ -115,6 +115,40 @@ export interface ImageGenerationResponse {
|
||||
}>;
|
||||
}
|
||||
|
||||
// SDAPI types (stable-diffusion.cpp)
|
||||
export type ImageApiMode = "openai" | "sdapi";
|
||||
|
||||
export interface SdApiLora {
|
||||
name: string;
|
||||
path: string;
|
||||
}
|
||||
|
||||
export interface SdApiLoraRef {
|
||||
path: string;
|
||||
multiplier: number;
|
||||
}
|
||||
|
||||
export interface SdApiTxt2ImgRequest {
|
||||
model?: string;
|
||||
prompt: string;
|
||||
negative_prompt?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
steps?: number;
|
||||
cfg_scale?: number;
|
||||
seed?: number;
|
||||
batch_size?: number;
|
||||
sampler_name?: string;
|
||||
scheduler?: string;
|
||||
lora?: SdApiLoraRef[];
|
||||
}
|
||||
|
||||
export interface SdApiResponse {
|
||||
images: string[];
|
||||
parameters: Record<string, unknown>;
|
||||
info: string;
|
||||
}
|
||||
|
||||
export interface AudioTranscriptionRequest {
|
||||
file: File;
|
||||
model: string;
|
||||
|
||||
@@ -62,7 +62,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
||||
const newModels = JSON.parse(message.data) as Model[];
|
||||
// Sort models by name and id
|
||||
newModels.sort((a, b) => {
|
||||
return (a.name + a.id).localeCompare(b.name + b.id);
|
||||
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric : true} );
|
||||
});
|
||||
models.set(newModels);
|
||||
break;
|
||||
|
||||
@@ -32,6 +32,7 @@ export default defineConfig({
|
||||
"/upstream": "http://localhost:8080",
|
||||
"/unload": "http://localhost:8080",
|
||||
"/v1": "http://localhost:8080",
|
||||
"/sdapi": "http://localhost:8080",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user