Compare commits

..

2 Commits

Author SHA1 Message Date
Benson Wong a9d840ffd7 proxy,proxy/config: restore timeouts to pre PR 619 (#648)
Reset the default ResponseHeader timeout to 0 (no timeout) which was set
to 60 seconds in PR #619.

Fixes #647
2026-04-11 20:42:13 -07:00
Benson Wong 7b2b82777f docker/unified: derive rootless image from root container (#644)
Build the root image once, then derive the rootless variant from it
using a small inline Dockerfile that adds the non-root user and chowns
the writable directories. This halves the number of CI jobs (4 → 2) and
eliminates the redundant full CUDA compilation for the rootless variant.

- remove RUN_UID build arg from build-image.sh
- derive rootless image inline after root build completes
- collapse variant matrix out of unified-docker.yml
- push both root and rootless tags in a single CI job

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-10 22:59:54 -07:00
11 changed files with 108 additions and 97 deletions
+12 -14
View File
@@ -68,13 +68,6 @@ jobs:
fail-fast: false
matrix:
backend: ${{ fromJSON(needs.setup.outputs.matrix) }}
variant:
- name: root
uid: "0"
suffix: ""
- name: rootless
uid: "10001"
suffix: "-rootless"
steps:
- name: Checkout code
uses: actions/checkout@v4
@@ -106,15 +99,14 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build unified Docker image (${{ matrix.backend }}, ${{ matrix.variant.name }})
- name: Build unified Docker image (${{ matrix.backend }})
env:
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
SD_REF: ${{ inputs.sd_ref || 'master' }}
IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }}
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
RUN_UID: ${{ matrix.variant.uid }}
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
# When running under act, use the local builder that has warm ccache.
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
# created by setup-buildx-action above.
@@ -126,8 +118,14 @@ jobs:
- name: Push to GitHub Container Registry
if: ${{ !env.ACT }}
run: |
TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}"
docker push "${TAG}"
BASE_TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}"
DATE_TAG=$(date -u +%Y-%m-%d)
docker tag "${TAG}" "${TAG}-${DATE_TAG}"
docker push "${TAG}-${DATE_TAG}"
docker push "${BASE_TAG}"
docker tag "${BASE_TAG}" "${BASE_TAG}-${DATE_TAG}"
docker push "${BASE_TAG}-${DATE_TAG}"
ROOTLESS_TAG="${BASE_TAG}-rootless"
docker push "${ROOTLESS_TAG}"
docker tag "${ROOTLESS_TAG}" "${ROOTLESS_TAG}-${DATE_TAG}"
docker push "${ROOTLESS_TAG}-${DATE_TAG}"
+23 -11
View File
@@ -47,31 +47,37 @@
"type": "integer",
"minimum": 0,
"default": 30,
"description": "TCP connection timeout in seconds. Set to 0 to disable (not recommended)."
"description": "TCP connection timeout in seconds. Set to 0 to disable."
},
"keepalive": {
"type": "integer",
"minimum": 0,
"default": 30,
"description": "TCP keepalive timeout in seconds. Set to 0 to disable."
},
"responseHeader": {
"type": "integer",
"minimum": 0,
"default": 60,
"description": "Time to wait for response headers in seconds. Set to 0 to disable (not recommended)."
"default": 0,
"description": "Time to wait for response headers in seconds. Set to 0 to disable."
},
"tlsHandshake": {
"type": "integer",
"minimum": 0,
"default": 10,
"description": "TLS handshake timeout in seconds. Set to 0 to disable (not recommended)."
"description": "TLS handshake timeout in seconds. Set to 0 to disable."
},
"expectContinue": {
"type": "integer",
"minimum": 0,
"default": 1,
"description": "Expect-Continue timeout in seconds. Set to 0 to disable (not recommended)."
"description": "Expect-Continue timeout in seconds. Set to 0 to disable."
},
"idleConn": {
"type": "integer",
"minimum": 0,
"default": 90,
"description": "Idle connection timeout in seconds. Set to 0 to disable (not recommended)."
"description": "Idle connection timeout in seconds. Set to 0 to disable."
}
},
"additionalProperties": false,
@@ -413,25 +419,31 @@
"properties": {
"connect": {
"type": "integer",
"minimum": 1,
"minimum": 0,
"default": 30,
"description": "TCP connection timeout in seconds."
},
"keepalive": {
"type": "integer",
"minimum": 0,
"default": 30,
"description": "TCP keepalive connection timeout in seconds."
},
"responseHeader": {
"type": "integer",
"minimum": 1,
"default": 60,
"minimum": 0,
"default": 0,
"description": "Time to wait for response headers in seconds."
},
"tlsHandshake": {
"type": "integer",
"minimum": 1,
"minimum": 0,
"default": 10,
"description": "TLS handshake timeout in seconds."
},
"idleConn": {
"type": "integer",
"minimum": 1,
"minimum": 0,
"default": 90,
"description": "Idle connection timeout in seconds."
}
+7 -5
View File
@@ -287,14 +287,15 @@ models:
# timeouts: configure proxy connection timeouts for this model
# - optional, defaults shown below
# - useful for models running on slower hardware that need longer timeouts
# - connect: TCP connection timeout in seconds
# - responseHeader: time to wait for response headers in seconds
# (increasing this helps avoid 502 errors on slow hardware)
# - tlsHandshake: TLS handshake timeout in seconds
# - idleConn: idle connection timeout in seconds
# - connect: TCP dial connection timeout in seconds, default: 30 seconds
# - keepalive: TCP connection keepalive timeout, default: 30 seconds
# - responseHeader: time to wait for response headers in seconds, default: 0 (no timeout)
# - tlsHandshake: TLS handshake timeout in seconds, default: 10 seconds
# - idleConn: idle connection timeout in seconds, default: 90 seconds
# - set any value to 0 to disable that timeout (not recommended)
timeouts:
connect: 30
keepalive: 0
responseHeader: 60
tlsHandshake: 10
idleConn: 90
@@ -447,6 +448,7 @@ peers:
# - set any value to 0 to disable that timeout (not recommended)
timeouts:
connect: 30
keepalive: 30
responseHeader: 60
tlsHandshake: 10
idleConn: 90
+22 -2
View File
@@ -201,7 +201,6 @@ BUILD_ARGS=(
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
--build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}"
--build-arg "LS_VERSION=${LS_HASH}"
--build-arg "RUN_UID=${RUN_UID:-0}"
-t "${DOCKER_IMAGE_TAG}"
-f "${SCRIPT_DIR}/Dockerfile"
)
@@ -255,12 +254,33 @@ if [[ "$BACKEND" == "cuda" ]]; then
fi
echo "All expected binaries verified: ${VERIFIED_LIST}"
echo ""
echo "=========================================="
echo "Building rootless image..."
echo "=========================================="
echo ""
ROOTLESS_TAG="${DOCKER_IMAGE_TAG}-rootless"
docker buildx build --load -t "${ROOTLESS_TAG}" - <<EOF
FROM ${DOCKER_IMAGE_TAG}
USER root
RUN groupadd --system --gid 10001 llama-swap && \\
useradd --system --uid 10001 --gid 10001 \\
--home /app --shell /sbin/nologin llama-swap && \\
chown -R 10001:10001 /etc/llama-swap /models
USER 10001
EOF
echo "Rootless image built: ${ROOTLESS_TAG}"
echo ""
echo "=========================================="
echo "Build complete!"
echo "=========================================="
echo ""
echo "Image tag: ${DOCKER_IMAGE_TAG}"
echo "Image tags:"
echo " ${DOCKER_IMAGE_TAG}"
echo " ${ROOTLESS_TAG}"
echo ""
echo "Built with:"
echo " llama.cpp: ${LLAMA_HASH}"
+13 -28
View File
@@ -163,6 +163,15 @@ groups:
modelLoadingState := false
defaultTimeout := TimeoutsConfig{
Connect: 30,
KeepAlive: 30,
ResponseHeader: 0,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
}
expected := Config{
LogLevel: "info",
LogTimeFormat: "",
@@ -187,13 +196,7 @@ groups:
Name: "Model 1",
Description: "This is model 1",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
"model2": {
Cmd: "path/to/server --arg1 one",
@@ -202,13 +205,7 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
"model3": {
Cmd: "path/to/cmd --arg1 one",
@@ -217,13 +214,7 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
"model4": {
Cmd: "path/to/cmd --arg1 one",
@@ -232,13 +223,7 @@ groups:
Aliases: []string{},
Env: []string{},
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
},
HealthCheckTimeout: 15,
+1 -1
View File
@@ -1475,7 +1475,7 @@ models:
// Default values should be set during unmarshaling
assert.Equal(t, 30, modelConfig.Timeouts.Connect)
assert.Equal(t, 60, modelConfig.Timeouts.ResponseHeader)
assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
+13 -28
View File
@@ -155,6 +155,15 @@ groups:
modelLoadingState := false
defaultTimeout := TimeoutsConfig{
Connect: 30,
KeepAlive: 30,
ResponseHeader: 0,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
}
expected := Config{
LogLevel: "info",
LogTimeFormat: "",
@@ -173,13 +182,7 @@ groups:
Env: []string{"VAR1=value1", "VAR2=value2"},
CheckEndpoint: "/health",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
"model2": {
Cmd: "path/to/server --arg1 one",
@@ -189,13 +192,7 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
"model3": {
Cmd: "path/to/cmd --arg1 one",
@@ -205,13 +202,7 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
"model4": {
Cmd: "path/to/cmd --arg1 one",
@@ -221,13 +212,7 @@ groups:
Aliases: []string{},
Env: []string{},
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
Timeouts: defaultTimeout,
},
},
HealthCheckTimeout: 15,
+11 -6
View File
@@ -10,12 +10,14 @@ const (
)
// TimeoutsConfig holds timeout settings for proxy connections
// 0 = no timeout
type TimeoutsConfig struct {
Connect int `yaml:"connect"` // seconds, 0 = no timeout (not recommended)
ResponseHeader int `yaml:"responseHeader"` // seconds, 0 = no timeout (not recommended)
TLSHandshake int `yaml:"tlsHandshake"` // seconds, 0 = no timeout (not recommended)
ExpectContinue int `yaml:"expectContinue"` // seconds, 0 = no timeout (not recommended)
IdleConn int `yaml:"idleConn"` // seconds, 0 = no timeout (not recommended)
Connect int `yaml:"connect"`
KeepAlive int `yaml:"keepalive"`
ResponseHeader int `yaml:"responseHeader"`
TLSHandshake int `yaml:"tlsHandshake"`
ExpectContinue int `yaml:"expectContinue"`
IdleConn int `yaml:"idleConn"`
}
type ModelConfig struct {
@@ -69,9 +71,12 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
ConcurrencyLimit: 0,
Name: "",
Description: "",
// matches http.DefaultTransport
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
KeepAlive: 30,
ResponseHeader: 0,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
+4
View File
@@ -24,8 +24,12 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
ApiKey: "",
Models: []string{},
Filters: Filters{},
// mostly matches http.DefaultTransport but with a 60s ResponseHeader timeout
// to match the pre PR #619 functionality
Timeouts: TimeoutsConfig{
Connect: 30,
KeepAlive: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
+1 -1
View File
@@ -42,7 +42,7 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(peer.Timeouts.Connect) * time.Second,
KeepAlive: 30 * time.Second,
KeepAlive: time.Duration(peer.Timeouts.KeepAlive) * time.Second,
}).DialContext,
TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
+1 -1
View File
@@ -102,7 +102,7 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(config.Timeouts.Connect) * time.Second,
KeepAlive: 30 * time.Second,
KeepAlive: time.Duration(config.Timeouts.KeepAlive) * time.Second,
}).DialContext,
TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second,
ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,