Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a9d840ffd7 | |||
| 7b2b82777f |
@@ -68,13 +68,6 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
backend: ${{ fromJSON(needs.setup.outputs.matrix) }}
|
||||
variant:
|
||||
- name: root
|
||||
uid: "0"
|
||||
suffix: ""
|
||||
- name: rootless
|
||||
uid: "10001"
|
||||
suffix: "-rootless"
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
@@ -106,15 +99,14 @@ jobs:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build unified Docker image (${{ matrix.backend }}, ${{ matrix.variant.name }})
|
||||
- name: Build unified Docker image (${{ matrix.backend }})
|
||||
env:
|
||||
LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }}
|
||||
WHISPER_REF: ${{ inputs.whisper_ref || 'master' }}
|
||||
SD_REF: ${{ inputs.sd_ref || 'master' }}
|
||||
IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }}
|
||||
LS_VERSION: ${{ inputs.llama_swap_version || 'main' }}
|
||||
RUN_UID: ${{ matrix.variant.uid }}
|
||||
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}
|
||||
DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}
|
||||
# When running under act, use the local builder that has warm ccache.
|
||||
# On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder
|
||||
# created by setup-buildx-action above.
|
||||
@@ -126,8 +118,14 @@ jobs:
|
||||
- name: Push to GitHub Container Registry
|
||||
if: ${{ !env.ACT }}
|
||||
run: |
|
||||
TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}${{ matrix.variant.suffix }}"
|
||||
docker push "${TAG}"
|
||||
BASE_TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}"
|
||||
DATE_TAG=$(date -u +%Y-%m-%d)
|
||||
docker tag "${TAG}" "${TAG}-${DATE_TAG}"
|
||||
docker push "${TAG}-${DATE_TAG}"
|
||||
|
||||
docker push "${BASE_TAG}"
|
||||
docker tag "${BASE_TAG}" "${BASE_TAG}-${DATE_TAG}"
|
||||
docker push "${BASE_TAG}-${DATE_TAG}"
|
||||
|
||||
ROOTLESS_TAG="${BASE_TAG}-rootless"
|
||||
docker push "${ROOTLESS_TAG}"
|
||||
docker tag "${ROOTLESS_TAG}" "${ROOTLESS_TAG}-${DATE_TAG}"
|
||||
docker push "${ROOTLESS_TAG}-${DATE_TAG}"
|
||||
|
||||
+23
-11
@@ -47,31 +47,37 @@
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 30,
|
||||
"description": "TCP connection timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
"description": "TCP connection timeout in seconds. Set to 0 to disable."
|
||||
},
|
||||
"keepalive": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 30,
|
||||
"description": "TCP keepalive timeout in seconds. Set to 0 to disable."
|
||||
},
|
||||
"responseHeader": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 60,
|
||||
"description": "Time to wait for response headers in seconds. Set to 0 to disable (not recommended)."
|
||||
"default": 0,
|
||||
"description": "Time to wait for response headers in seconds. Set to 0 to disable."
|
||||
},
|
||||
"tlsHandshake": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 10,
|
||||
"description": "TLS handshake timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
"description": "TLS handshake timeout in seconds. Set to 0 to disable."
|
||||
},
|
||||
"expectContinue": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 1,
|
||||
"description": "Expect-Continue timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
"description": "Expect-Continue timeout in seconds. Set to 0 to disable."
|
||||
},
|
||||
"idleConn": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 90,
|
||||
"description": "Idle connection timeout in seconds. Set to 0 to disable (not recommended)."
|
||||
"description": "Idle connection timeout in seconds. Set to 0 to disable."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
@@ -413,25 +419,31 @@
|
||||
"properties": {
|
||||
"connect": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"minimum": 0,
|
||||
"default": 30,
|
||||
"description": "TCP connection timeout in seconds."
|
||||
},
|
||||
"keepalive": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 30,
|
||||
"description": "TCP keepalive connection timeout in seconds."
|
||||
},
|
||||
"responseHeader": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"default": 60,
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Time to wait for response headers in seconds."
|
||||
},
|
||||
"tlsHandshake": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"minimum": 0,
|
||||
"default": 10,
|
||||
"description": "TLS handshake timeout in seconds."
|
||||
},
|
||||
"idleConn": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"minimum": 0,
|
||||
"default": 90,
|
||||
"description": "Idle connection timeout in seconds."
|
||||
}
|
||||
|
||||
+7
-5
@@ -287,14 +287,15 @@ models:
|
||||
# timeouts: configure proxy connection timeouts for this model
|
||||
# - optional, defaults shown below
|
||||
# - useful for models running on slower hardware that need longer timeouts
|
||||
# - connect: TCP connection timeout in seconds
|
||||
# - responseHeader: time to wait for response headers in seconds
|
||||
# (increasing this helps avoid 502 errors on slow hardware)
|
||||
# - tlsHandshake: TLS handshake timeout in seconds
|
||||
# - idleConn: idle connection timeout in seconds
|
||||
# - connect: TCP dial connection timeout in seconds, default: 30 seconds
|
||||
# - keepalive: TCP connection keepalive timeout, default: 30 seconds
|
||||
# - responseHeader: time to wait for response headers in seconds, default: 0 (no timeout)
|
||||
# - tlsHandshake: TLS handshake timeout in seconds, default: 10 seconds
|
||||
# - idleConn: idle connection timeout in seconds, default: 90 seconds
|
||||
# - set any value to 0 to disable that timeout (not recommended)
|
||||
timeouts:
|
||||
connect: 30
|
||||
keepalive: 0
|
||||
responseHeader: 60
|
||||
tlsHandshake: 10
|
||||
idleConn: 90
|
||||
@@ -447,6 +448,7 @@ peers:
|
||||
# - set any value to 0 to disable that timeout (not recommended)
|
||||
timeouts:
|
||||
connect: 30
|
||||
keepalive: 30
|
||||
responseHeader: 60
|
||||
tlsHandshake: 10
|
||||
idleConn: 90
|
||||
|
||||
@@ -201,7 +201,6 @@ BUILD_ARGS=(
|
||||
--build-arg "SD_COMMIT_HASH=${SD_HASH}"
|
||||
--build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}"
|
||||
--build-arg "LS_VERSION=${LS_HASH}"
|
||||
--build-arg "RUN_UID=${RUN_UID:-0}"
|
||||
-t "${DOCKER_IMAGE_TAG}"
|
||||
-f "${SCRIPT_DIR}/Dockerfile"
|
||||
)
|
||||
@@ -255,12 +254,33 @@ if [[ "$BACKEND" == "cuda" ]]; then
|
||||
fi
|
||||
echo "All expected binaries verified: ${VERIFIED_LIST}"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Building rootless image..."
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
ROOTLESS_TAG="${DOCKER_IMAGE_TAG}-rootless"
|
||||
docker buildx build --load -t "${ROOTLESS_TAG}" - <<EOF
|
||||
FROM ${DOCKER_IMAGE_TAG}
|
||||
USER root
|
||||
RUN groupadd --system --gid 10001 llama-swap && \\
|
||||
useradd --system --uid 10001 --gid 10001 \\
|
||||
--home /app --shell /sbin/nologin llama-swap && \\
|
||||
chown -R 10001:10001 /etc/llama-swap /models
|
||||
USER 10001
|
||||
EOF
|
||||
|
||||
echo "Rootless image built: ${ROOTLESS_TAG}"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Build complete!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Image tag: ${DOCKER_IMAGE_TAG}"
|
||||
echo "Image tags:"
|
||||
echo " ${DOCKER_IMAGE_TAG}"
|
||||
echo " ${ROOTLESS_TAG}"
|
||||
echo ""
|
||||
echo "Built with:"
|
||||
echo " llama.cpp: ${LLAMA_HASH}"
|
||||
|
||||
@@ -163,6 +163,15 @@ groups:
|
||||
|
||||
modelLoadingState := false
|
||||
|
||||
defaultTimeout := TimeoutsConfig{
|
||||
Connect: 30,
|
||||
KeepAlive: 30,
|
||||
ResponseHeader: 0,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
}
|
||||
|
||||
expected := Config{
|
||||
LogLevel: "info",
|
||||
LogTimeFormat: "",
|
||||
@@ -187,13 +196,7 @@ groups:
|
||||
Name: "Model 1",
|
||||
Description: "This is model 1",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
"model2": {
|
||||
Cmd: "path/to/server --arg1 one",
|
||||
@@ -202,13 +205,7 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
"model3": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -217,13 +214,7 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
"model4": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -232,13 +223,7 @@ groups:
|
||||
Aliases: []string{},
|
||||
Env: []string{},
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
},
|
||||
HealthCheckTimeout: 15,
|
||||
|
||||
@@ -1475,7 +1475,7 @@ models:
|
||||
|
||||
// Default values should be set during unmarshaling
|
||||
assert.Equal(t, 30, modelConfig.Timeouts.Connect)
|
||||
assert.Equal(t, 60, modelConfig.Timeouts.ResponseHeader)
|
||||
assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
|
||||
assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
|
||||
assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
|
||||
assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
|
||||
|
||||
@@ -155,6 +155,15 @@ groups:
|
||||
|
||||
modelLoadingState := false
|
||||
|
||||
defaultTimeout := TimeoutsConfig{
|
||||
Connect: 30,
|
||||
KeepAlive: 30,
|
||||
ResponseHeader: 0,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
}
|
||||
|
||||
expected := Config{
|
||||
LogLevel: "info",
|
||||
LogTimeFormat: "",
|
||||
@@ -173,13 +182,7 @@ groups:
|
||||
Env: []string{"VAR1=value1", "VAR2=value2"},
|
||||
CheckEndpoint: "/health",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
"model2": {
|
||||
Cmd: "path/to/server --arg1 one",
|
||||
@@ -189,13 +192,7 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
"model3": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -205,13 +202,7 @@ groups:
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/",
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
"model4": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
@@ -221,13 +212,7 @@ groups:
|
||||
Aliases: []string{},
|
||||
Env: []string{},
|
||||
SendLoadingState: &modelLoadingState,
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
},
|
||||
Timeouts: defaultTimeout,
|
||||
},
|
||||
},
|
||||
HealthCheckTimeout: 15,
|
||||
|
||||
@@ -10,12 +10,14 @@ const (
|
||||
)
|
||||
|
||||
// TimeoutsConfig holds timeout settings for proxy connections
|
||||
// 0 = no timeout
|
||||
type TimeoutsConfig struct {
|
||||
Connect int `yaml:"connect"` // seconds, 0 = no timeout (not recommended)
|
||||
ResponseHeader int `yaml:"responseHeader"` // seconds, 0 = no timeout (not recommended)
|
||||
TLSHandshake int `yaml:"tlsHandshake"` // seconds, 0 = no timeout (not recommended)
|
||||
ExpectContinue int `yaml:"expectContinue"` // seconds, 0 = no timeout (not recommended)
|
||||
IdleConn int `yaml:"idleConn"` // seconds, 0 = no timeout (not recommended)
|
||||
Connect int `yaml:"connect"`
|
||||
KeepAlive int `yaml:"keepalive"`
|
||||
ResponseHeader int `yaml:"responseHeader"`
|
||||
TLSHandshake int `yaml:"tlsHandshake"`
|
||||
ExpectContinue int `yaml:"expectContinue"`
|
||||
IdleConn int `yaml:"idleConn"`
|
||||
}
|
||||
|
||||
type ModelConfig struct {
|
||||
@@ -69,9 +71,12 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
ConcurrencyLimit: 0,
|
||||
Name: "",
|
||||
Description: "",
|
||||
|
||||
// matches http.DefaultTransport
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
ResponseHeader: 60,
|
||||
KeepAlive: 30,
|
||||
ResponseHeader: 0,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
IdleConn: 90,
|
||||
|
||||
@@ -24,8 +24,12 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
ApiKey: "",
|
||||
Models: []string{},
|
||||
Filters: Filters{},
|
||||
|
||||
// mostly matches http.DefaultTransport but with a 60s ResponseHeader timeout
|
||||
// to match the pre PR #619 functionality
|
||||
Timeouts: TimeoutsConfig{
|
||||
Connect: 30,
|
||||
KeepAlive: 30,
|
||||
ResponseHeader: 60,
|
||||
TLSHandshake: 10,
|
||||
ExpectContinue: 1,
|
||||
|
||||
+1
-1
@@ -42,7 +42,7 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(peer.Timeouts.Connect) * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
KeepAlive: time.Duration(peer.Timeouts.KeepAlive) * time.Second,
|
||||
}).DialContext,
|
||||
TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
|
||||
ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
|
||||
|
||||
+1
-1
@@ -102,7 +102,7 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(config.Timeouts.Connect) * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
KeepAlive: time.Duration(config.Timeouts.KeepAlive) * time.Second,
|
||||
}).DialContext,
|
||||
TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second,
|
||||
ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,
|
||||
|
||||
Reference in New Issue
Block a user