Compare commits

...

2 Commits

Author SHA1 Message Date
Benson Wong a9d840ffd7 proxy,proxy/config: restore timeouts to pre PR 619 (#648)
Reset the default ResponseHeader timeout to 0 (no timeout) which was set
to 60 seconds in PR #619.

Fixes #647
2026-04-11 20:42:13 -07:00
Benson Wong 7b2b82777f docker/unified: derive rootless image from root container (#644)
Build the root image once, then derive the rootless variant from it
using a small inline Dockerfile that adds the non-root user and chowns
the writable directories. This halves the number of CI jobs (4 → 2) and
eliminates the redundant full CUDA compilation for the rootless variant.

- remove RUN_UID build arg from build-image.sh
- derive rootless image inline after root build completes
- collapse variant matrix out of unified-docker.yml
- push both root and rootless tags in a single CI job

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-10 22:59:54 -07:00
11 changed files with 108 additions and 97 deletions
+12 -14
View File
@@ -68,13 +68,6 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
backend: ${{ fromJSON(needs.setup.outputs.matrix) }} backend: ${{ fromJSON(needs.setup.outputs.matrix) }}
variant:
- name: root
uid: "0"
suffix: ""
- name: rootless
uid: "10001"
suffix: "-rootless"
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -106,15 +99,14 @@ jobs:
username: ${{ github.actor }} username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }} password: ${{ secrets.GITHUB_TOKEN }}
- name: Build unified Docker image (${{ matrix.backend }}, ${{ matrix.variant.name }}) - name: Build unified Docker image (${{ matrix.backend }})
env: env:
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }} LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }} WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
SD_REF: ${{ inputs.sd_ref || 'master' }} SD_REF: ${{ inputs.sd_ref || 'master' }}
IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }} IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }}
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }} LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
RUN_UID: ${{ matrix.variant.uid }} DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}
# When running under act, use the local builder that has warm ccache. # When running under act, use the local builder that has warm ccache.
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder # On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
# created by setup-buildx-action above. # created by setup-buildx-action above.
@@ -126,8 +118,14 @@ jobs:
- name: Push to GitHub Container Registry - name: Push to GitHub Container Registry
if: ${{ !env.ACT }} if: ${{ !env.ACT }}
run: | run: |
TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}" BASE_TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}"
docker push "${TAG}"
DATE_TAG=$(date -u +%Y-%m-%d) DATE_TAG=$(date -u +%Y-%m-%d)
docker tag "${TAG}" "${TAG}-${DATE_TAG}"
docker push "${TAG}-${DATE_TAG}" docker push "${BASE_TAG}"
docker tag "${BASE_TAG}" "${BASE_TAG}-${DATE_TAG}"
docker push "${BASE_TAG}-${DATE_TAG}"
ROOTLESS_TAG="${BASE_TAG}-rootless"
docker push "${ROOTLESS_TAG}"
docker tag "${ROOTLESS_TAG}" "${ROOTLESS_TAG}-${DATE_TAG}"
docker push "${ROOTLESS_TAG}-${DATE_TAG}"
+23 -11
View File
@@ -47,31 +47,37 @@
"type": "integer", "type": "integer",
"minimum": 0, "minimum": 0,
"default": 30, "default": 30,
"description": "TCP connection timeout in seconds. Set to 0 to disable (not recommended)." "description": "TCP connection timeout in seconds. Set to 0 to disable."
},
"keepalive": {
"type": "integer",
"minimum": 0,
"default": 30,
"description": "TCP keepalive timeout in seconds. Set to 0 to disable."
}, },
"responseHeader": { "responseHeader": {
"type": "integer", "type": "integer",
"minimum": 0, "minimum": 0,
"default": 60, "default": 0,
"description": "Time to wait for response headers in seconds. Set to 0 to disable (not recommended)." "description": "Time to wait for response headers in seconds. Set to 0 to disable."
}, },
"tlsHandshake": { "tlsHandshake": {
"type": "integer", "type": "integer",
"minimum": 0, "minimum": 0,
"default": 10, "default": 10,
"description": "TLS handshake timeout in seconds. Set to 0 to disable (not recommended)." "description": "TLS handshake timeout in seconds. Set to 0 to disable."
}, },
"expectContinue": { "expectContinue": {
"type": "integer", "type": "integer",
"minimum": 0, "minimum": 0,
"default": 1, "default": 1,
"description": "Expect-Continue timeout in seconds. Set to 0 to disable (not recommended)." "description": "Expect-Continue timeout in seconds. Set to 0 to disable."
}, },
"idleConn": { "idleConn": {
"type": "integer", "type": "integer",
"minimum": 0, "minimum": 0,
"default": 90, "default": 90,
"description": "Idle connection timeout in seconds. Set to 0 to disable (not recommended)." "description": "Idle connection timeout in seconds. Set to 0 to disable."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@@ -413,25 +419,31 @@
"properties": { "properties": {
"connect": { "connect": {
"type": "integer", "type": "integer",
"minimum": 1, "minimum": 0,
"default": 30, "default": 30,
"description": "TCP connection timeout in seconds." "description": "TCP connection timeout in seconds."
}, },
"keepalive": {
"type": "integer",
"minimum": 0,
"default": 30,
"description": "TCP keepalive connection timeout in seconds."
},
"responseHeader": { "responseHeader": {
"type": "integer", "type": "integer",
"minimum": 1, "minimum": 0,
"default": 60, "default": 0,
"description": "Time to wait for response headers in seconds." "description": "Time to wait for response headers in seconds."
}, },
"tlsHandshake": { "tlsHandshake": {
"type": "integer", "type": "integer",
"minimum": 1, "minimum": 0,
"default": 10, "default": 10,
"description": "TLS handshake timeout in seconds." "description": "TLS handshake timeout in seconds."
}, },
"idleConn": { "idleConn": {
"type": "integer", "type": "integer",
"minimum": 1, "minimum": 0,
"default": 90, "default": 90,
"description": "Idle connection timeout in seconds." "description": "Idle connection timeout in seconds."
} }
+7 -5
View File
@@ -287,14 +287,15 @@ models:
# timeouts: configure proxy connection timeouts for this model # timeouts: configure proxy connection timeouts for this model
# - optional, defaults shown below # - optional, defaults shown below
# - useful for models running on slower hardware that need longer timeouts # - useful for models running on slower hardware that need longer timeouts
# - connect: TCP connection timeout in seconds # - connect: TCP dial connection timeout in seconds, default: 30 seconds
# - responseHeader: time to wait for response headers in seconds # - keepalive: TCP connection keepalive timeout, default: 30 seconds
# (increasing this helps avoid 502 errors on slow hardware) # - responseHeader: time to wait for response headers in seconds, default: 0 (no timeout)
# - tlsHandshake: TLS handshake timeout in seconds # - tlsHandshake: TLS handshake timeout in seconds, default: 10 seconds
# - idleConn: idle connection timeout in seconds # - idleConn: idle connection timeout in seconds, default: 90 seconds
# - set any value to 0 to disable that timeout (not recommended) # - set any value to 0 to disable that timeout (not recommended)
timeouts: timeouts:
connect: 30 connect: 30
keepalive: 0
responseHeader: 60 responseHeader: 60
tlsHandshake: 10 tlsHandshake: 10
idleConn: 90 idleConn: 90
@@ -447,6 +448,7 @@ peers:
# - set any value to 0 to disable that timeout (not recommended) # - set any value to 0 to disable that timeout (not recommended)
timeouts: timeouts:
connect: 30 connect: 30
keepalive: 30
responseHeader: 60 responseHeader: 60
tlsHandshake: 10 tlsHandshake: 10
idleConn: 90 idleConn: 90
+22 -2
View File
@@ -201,7 +201,6 @@ BUILD_ARGS=(
--build-arg "SD_COMMIT_HASH=${SD_HASH}" --build-arg "SD_COMMIT_HASH=${SD_HASH}"
--build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}" --build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}"
--build-arg "LS_VERSION=${LS_HASH}" --build-arg "LS_VERSION=${LS_HASH}"
--build-arg "RUN_UID=${RUN_UID:-0}"
-t "${DOCKER_IMAGE_TAG}" -t "${DOCKER_IMAGE_TAG}"
-f "${SCRIPT_DIR}/Dockerfile" -f "${SCRIPT_DIR}/Dockerfile"
) )
@@ -255,12 +254,33 @@ if [[ "$BACKEND" == "cuda" ]]; then
fi fi
echo "All expected binaries verified: ${VERIFIED_LIST}" echo "All expected binaries verified: ${VERIFIED_LIST}"
echo ""
echo "=========================================="
echo "Building rootless image..."
echo "=========================================="
echo ""
ROOTLESS_TAG="${DOCKER_IMAGE_TAG}-rootless"
docker buildx build --load -t "${ROOTLESS_TAG}" - <<EOF
FROM ${DOCKER_IMAGE_TAG}
USER root
RUN groupadd --system --gid 10001 llama-swap && \\
useradd --system --uid 10001 --gid 10001 \\
--home /app --shell /sbin/nologin llama-swap && \\
chown -R 10001:10001 /etc/llama-swap /models
USER 10001
EOF
echo "Rootless image built: ${ROOTLESS_TAG}"
echo "" echo ""
echo "==========================================" echo "=========================================="
echo "Build complete!" echo "Build complete!"
echo "==========================================" echo "=========================================="
echo "" echo ""
echo "Image tag: ${DOCKER_IMAGE_TAG}" echo "Image tags:"
echo " ${DOCKER_IMAGE_TAG}"
echo " ${ROOTLESS_TAG}"
echo "" echo ""
echo "Built with:" echo "Built with:"
echo " llama.cpp: ${LLAMA_HASH}" echo " llama.cpp: ${LLAMA_HASH}"
+13 -28
View File
@@ -163,6 +163,15 @@ groups:
modelLoadingState := false modelLoadingState := false
defaultTimeout := TimeoutsConfig{
Connect: 30,
KeepAlive: 30,
ResponseHeader: 0,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
}
expected := Config{ expected := Config{
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
@@ -187,13 +196,7 @@ groups:
Name: "Model 1", Name: "Model 1",
Description: "This is model 1", Description: "This is model 1",
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
"model2": { "model2": {
Cmd: "path/to/server --arg1 one", Cmd: "path/to/server --arg1 one",
@@ -202,13 +205,7 @@ groups:
Env: []string{}, Env: []string{},
CheckEndpoint: "/", CheckEndpoint: "/",
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
"model3": { "model3": {
Cmd: "path/to/cmd --arg1 one", Cmd: "path/to/cmd --arg1 one",
@@ -217,13 +214,7 @@ groups:
Env: []string{}, Env: []string{},
CheckEndpoint: "/", CheckEndpoint: "/",
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
"model4": { "model4": {
Cmd: "path/to/cmd --arg1 one", Cmd: "path/to/cmd --arg1 one",
@@ -232,13 +223,7 @@ groups:
Aliases: []string{}, Aliases: []string{},
Env: []string{}, Env: []string{},
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
}, },
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
+1 -1
View File
@@ -1475,7 +1475,7 @@ models:
// Default values should be set during unmarshaling // Default values should be set during unmarshaling
assert.Equal(t, 30, modelConfig.Timeouts.Connect) assert.Equal(t, 30, modelConfig.Timeouts.Connect)
assert.Equal(t, 60, modelConfig.Timeouts.ResponseHeader) assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake) assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue) assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
assert.Equal(t, 90, modelConfig.Timeouts.IdleConn) assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
+13 -28
View File
@@ -155,6 +155,15 @@ groups:
modelLoadingState := false modelLoadingState := false
defaultTimeout := TimeoutsConfig{
Connect: 30,
KeepAlive: 30,
ResponseHeader: 0,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
}
expected := Config{ expected := Config{
LogLevel: "info", LogLevel: "info",
LogTimeFormat: "", LogTimeFormat: "",
@@ -173,13 +182,7 @@ groups:
Env: []string{"VAR1=value1", "VAR2=value2"}, Env: []string{"VAR1=value1", "VAR2=value2"},
CheckEndpoint: "/health", CheckEndpoint: "/health",
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
"model2": { "model2": {
Cmd: "path/to/server --arg1 one", Cmd: "path/to/server --arg1 one",
@@ -189,13 +192,7 @@ groups:
Env: []string{}, Env: []string{},
CheckEndpoint: "/", CheckEndpoint: "/",
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
"model3": { "model3": {
Cmd: "path/to/cmd --arg1 one", Cmd: "path/to/cmd --arg1 one",
@@ -205,13 +202,7 @@ groups:
Env: []string{}, Env: []string{},
CheckEndpoint: "/", CheckEndpoint: "/",
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
"model4": { "model4": {
Cmd: "path/to/cmd --arg1 one", Cmd: "path/to/cmd --arg1 one",
@@ -221,13 +212,7 @@ groups:
Aliases: []string{}, Aliases: []string{},
Env: []string{}, Env: []string{},
SendLoadingState: &modelLoadingState, SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{ Timeouts: defaultTimeout,
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}, },
}, },
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
+11 -6
View File
@@ -10,12 +10,14 @@ const (
) )
// TimeoutsConfig holds timeout settings for proxy connections // TimeoutsConfig holds timeout settings for proxy connections
// 0 = no timeout
type TimeoutsConfig struct { type TimeoutsConfig struct {
Connect int `yaml:"connect"` // seconds, 0 = no timeout (not recommended) Connect int `yaml:"connect"`
ResponseHeader int `yaml:"responseHeader"` // seconds, 0 = no timeout (not recommended) KeepAlive int `yaml:"keepalive"`
TLSHandshake int `yaml:"tlsHandshake"` // seconds, 0 = no timeout (not recommended) ResponseHeader int `yaml:"responseHeader"`
ExpectContinue int `yaml:"expectContinue"` // seconds, 0 = no timeout (not recommended) TLSHandshake int `yaml:"tlsHandshake"`
IdleConn int `yaml:"idleConn"` // seconds, 0 = no timeout (not recommended) ExpectContinue int `yaml:"expectContinue"`
IdleConn int `yaml:"idleConn"`
} }
type ModelConfig struct { type ModelConfig struct {
@@ -69,9 +71,12 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
ConcurrencyLimit: 0, ConcurrencyLimit: 0,
Name: "", Name: "",
Description: "", Description: "",
// matches http.DefaultTransport
Timeouts: TimeoutsConfig{ Timeouts: TimeoutsConfig{
Connect: 30, Connect: 30,
ResponseHeader: 60, KeepAlive: 30,
ResponseHeader: 0,
TLSHandshake: 10, TLSHandshake: 10,
ExpectContinue: 1, ExpectContinue: 1,
IdleConn: 90, IdleConn: 90,
+4
View File
@@ -24,8 +24,12 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
ApiKey: "", ApiKey: "",
Models: []string{}, Models: []string{},
Filters: Filters{}, Filters: Filters{},
// mostly matches http.DefaultTransport but with a 60s ResponseHeader timeout
// to match the pre PR #619 functionality
Timeouts: TimeoutsConfig{ Timeouts: TimeoutsConfig{
Connect: 30, Connect: 30,
KeepAlive: 30,
ResponseHeader: 60, ResponseHeader: 60,
TLSHandshake: 10, TLSHandshake: 10,
ExpectContinue: 1, ExpectContinue: 1,
+1 -1
View File
@@ -42,7 +42,7 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
Proxy: http.ProxyFromEnvironment, Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{ DialContext: (&net.Dialer{
Timeout: time.Duration(peer.Timeouts.Connect) * time.Second, Timeout: time.Duration(peer.Timeouts.Connect) * time.Second,
KeepAlive: 30 * time.Second, KeepAlive: time.Duration(peer.Timeouts.KeepAlive) * time.Second,
}).DialContext, }).DialContext,
TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second, TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second, ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
+1 -1
View File
@@ -102,7 +102,7 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
Proxy: http.ProxyFromEnvironment, Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{ DialContext: (&net.Dialer{
Timeout: time.Duration(config.Timeouts.Connect) * time.Second, Timeout: time.Duration(config.Timeouts.Connect) * time.Second,
KeepAlive: 30 * time.Second, KeepAlive: time.Duration(config.Timeouts.KeepAlive) * time.Second,
}).DialContext, }).DialContext,
TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second, TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second,
ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second, ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,