proxy: add configurable HTTP timeouts for models and peers (#619)
Add configurable HTTP timeout settings to both models and peers to support installations that requires longer timeouts than the current hardcoded defaults. Closes #618
This commit is contained in:
@@ -21,6 +21,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
|
|||||||
|
|
||||||
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
||||||
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
||||||
|
- Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
|
||||||
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||||
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||||
|
|
||||||
|
|||||||
@@ -39,6 +39,43 @@
|
|||||||
},
|
},
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "A dictionary of string substitutions. Macros are reusable snippets used in model cmd, cmdStop, proxy, checkEndpoint, filters.stripParams. Macro names must be <64 chars, match ^[a-zA-Z0-9_-]+$, and not be PORT or MODEL_ID. Values can be string, number, or boolean. Macros can reference other macros defined before them."
|
"description": "A dictionary of string substitutions. Macros are reusable snippets used in model cmd, cmdStop, proxy, checkEndpoint, filters.stripParams. Macro names must be <64 chars, match ^[a-zA-Z0-9_-]+$, and not be PORT or MODEL_ID. Values can be string, number, or boolean. Macros can reference other macros defined before them."
|
||||||
|
},
|
||||||
|
"timeouts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"connect": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 30,
|
||||||
|
"description": "TCP connection timeout in seconds. Set to 0 to disable (not recommended)."
|
||||||
|
},
|
||||||
|
"responseHeader": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 60,
|
||||||
|
"description": "Time to wait for response headers in seconds. Set to 0 to disable (not recommended)."
|
||||||
|
},
|
||||||
|
"tlsHandshake": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 10,
|
||||||
|
"description": "TLS handshake timeout in seconds. Set to 0 to disable (not recommended)."
|
||||||
|
},
|
||||||
|
"expectContinue": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 1,
|
||||||
|
"description": "Expect-Continue timeout in seconds. Set to 0 to disable (not recommended)."
|
||||||
|
},
|
||||||
|
"idleConn": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 90,
|
||||||
|
"description": "Idle connection timeout in seconds. Set to 0 to disable (not recommended)."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"description": "Timeout settings for proxy connections."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -241,6 +278,9 @@
|
|||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"default": false,
|
"default": false,
|
||||||
"description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
|
"description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
|
||||||
|
},
|
||||||
|
"timeouts": {
|
||||||
|
"$ref": "#/definitions/timeouts"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -367,6 +407,37 @@
|
|||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
|
"description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
|
||||||
|
},
|
||||||
|
"timeouts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"connect": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 1,
|
||||||
|
"default": 30,
|
||||||
|
"description": "TCP connection timeout in seconds."
|
||||||
|
},
|
||||||
|
"responseHeader": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 1,
|
||||||
|
"default": 60,
|
||||||
|
"description": "Time to wait for response headers in seconds."
|
||||||
|
},
|
||||||
|
"tlsHandshake": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 1,
|
||||||
|
"default": 10,
|
||||||
|
"description": "TLS handshake timeout in seconds."
|
||||||
|
},
|
||||||
|
"idleConn": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 1,
|
||||||
|
"default": 90,
|
||||||
|
"description": "Idle connection timeout in seconds."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"description": "Timeout settings for proxy connections to this peer."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -284,6 +284,21 @@ models:
|
|||||||
# - optional, default: undefined (use global setting)
|
# - optional, default: undefined (use global setting)
|
||||||
sendLoadingState: false
|
sendLoadingState: false
|
||||||
|
|
||||||
|
# timeouts: configure proxy connection timeouts for this model
|
||||||
|
# - optional, defaults shown below
|
||||||
|
# - useful for models running on slower hardware that need longer timeouts
|
||||||
|
# - connect: TCP connection timeout in seconds
|
||||||
|
# - responseHeader: time to wait for response headers in seconds
|
||||||
|
# (increasing this helps avoid 502 errors on slow hardware)
|
||||||
|
# - tlsHandshake: TLS handshake timeout in seconds
|
||||||
|
# - idleConn: idle connection timeout in seconds
|
||||||
|
# - set any value to 0 to disable that timeout (not recommended)
|
||||||
|
timeouts:
|
||||||
|
connect: 30
|
||||||
|
responseHeader: 60
|
||||||
|
tlsHandshake: 10
|
||||||
|
idleConn: 90
|
||||||
|
|
||||||
# Unlisted model example:
|
# Unlisted model example:
|
||||||
"qwen-unlisted":
|
"qwen-unlisted":
|
||||||
# unlisted: boolean, true or false
|
# unlisted: boolean, true or false
|
||||||
@@ -426,6 +441,16 @@ peers:
|
|||||||
- z-ai/glm-4.7
|
- z-ai/glm-4.7
|
||||||
- moonshotai/kimi-k2-0905
|
- moonshotai/kimi-k2-0905
|
||||||
- minimax/minimax-m2.1
|
- minimax/minimax-m2.1
|
||||||
|
# timeouts: configure proxy connection timeouts for this peer
|
||||||
|
# - optional, defaults shown below
|
||||||
|
# - useful when the peer runs on slower hardware
|
||||||
|
# - set any value to 0 to disable that timeout (not recommended)
|
||||||
|
timeouts:
|
||||||
|
connect: 30
|
||||||
|
responseHeader: 60
|
||||||
|
tlsHandshake: 10
|
||||||
|
idleConn: 90
|
||||||
|
|
||||||
# filters: a dictionary of filter settings for peer requests
|
# filters: a dictionary of filter settings for peer requests
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - same capabilities as model filters (stripParams, setParams)
|
# - same capabilities as model filters (stripParams, setParams)
|
||||||
|
|||||||
@@ -319,6 +319,29 @@ models:
|
|||||||
# - recommended to be omitted and the default used
|
# - recommended to be omitted and the default used
|
||||||
concurrencyLimit: 0
|
concurrencyLimit: 0
|
||||||
|
|
||||||
|
# timeouts: configure proxy connection timeouts for this model
|
||||||
|
# - optional, defaults shown below
|
||||||
|
# - useful for models on slower hardware that need longer timeouts
|
||||||
|
# - increase responseHeader to avoid "timeout awaiting response headers" errors
|
||||||
|
# - set any value to 0 to disable that timeout (not recommended)
|
||||||
|
timeouts:
|
||||||
|
# connect: TCP connection timeout in seconds
|
||||||
|
# - default: 30
|
||||||
|
connect: 30
|
||||||
|
|
||||||
|
# responseHeader: time to wait for response headers in seconds
|
||||||
|
# - default: 60
|
||||||
|
# - for slow image generation or large models, consider increasing to 300+ seconds
|
||||||
|
responseHeader: 60
|
||||||
|
|
||||||
|
# tlsHandshake: TLS handshake timeout in seconds
|
||||||
|
# - default: 10
|
||||||
|
tlsHandshake: 10
|
||||||
|
|
||||||
|
# idleConn: idle connection timeout in seconds
|
||||||
|
# - default: 90
|
||||||
|
idleConn: 90
|
||||||
|
|
||||||
# sendLoadingState: overrides the global sendLoadingState setting for this model
|
# sendLoadingState: overrides the global sendLoadingState setting for this model
|
||||||
# - optional, default: undefined (use global setting)
|
# - optional, default: undefined (use global setting)
|
||||||
sendLoadingState: false
|
sendLoadingState: false
|
||||||
@@ -444,6 +467,17 @@ peers:
|
|||||||
# - required
|
# - required
|
||||||
# - requested path to llama-swap will be appended to the end of the proxy value
|
# - requested path to llama-swap will be appended to the end of the proxy value
|
||||||
proxy: http://192.168.1.23
|
proxy: http://192.168.1.23
|
||||||
|
|
||||||
|
# timeouts: configure proxy connection timeouts for this peer
|
||||||
|
# - optional, defaults shown below
|
||||||
|
# - useful when the peer runs on slower hardware
|
||||||
|
# - set any value to 0 to disable that timeout (not recommended)
|
||||||
|
timeouts:
|
||||||
|
connect: 30
|
||||||
|
responseHeader: 60
|
||||||
|
tlsHandshake: 10
|
||||||
|
idleConn: 90
|
||||||
|
|
||||||
# models: a list of models served by the peer
|
# models: a list of models served by the peer
|
||||||
# - required
|
# - required
|
||||||
models:
|
models:
|
||||||
|
|||||||
@@ -187,6 +187,13 @@ groups:
|
|||||||
Name: "Model 1",
|
Name: "Model 1",
|
||||||
Description: "This is model 1",
|
Description: "This is model 1",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"model2": {
|
"model2": {
|
||||||
Cmd: "path/to/server --arg1 one",
|
Cmd: "path/to/server --arg1 one",
|
||||||
@@ -195,6 +202,13 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"model3": {
|
"model3": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -203,6 +217,13 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"model4": {
|
"model4": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -211,6 +232,13 @@ groups:
|
|||||||
Aliases: []string{},
|
Aliases: []string{},
|
||||||
Env: []string{},
|
Env: []string{},
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestConfig_GroupMemberIsUnique(t *testing.T) {
|
func TestConfig_GroupMemberIsUnique(t *testing.T) {
|
||||||
@@ -1438,3 +1439,108 @@ models:
|
|||||||
})
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_TimeoutsParsing(t *testing.T) {
|
||||||
|
configYaml := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: test-server --port ${PORT}
|
||||||
|
timeouts:
|
||||||
|
connect: 45
|
||||||
|
responseHeader: 120
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
modelConfig, found := config.Models["model1"]
|
||||||
|
require.True(t, found, "model1 should exist in config")
|
||||||
|
|
||||||
|
assert.Equal(t, 45, modelConfig.Timeouts.Connect)
|
||||||
|
assert.Equal(t, 120, modelConfig.Timeouts.ResponseHeader)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_TimeoutsDefaults(t *testing.T) {
|
||||||
|
configYaml := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: test-server --port ${PORT}
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
modelConfig, found := config.Models["model1"]
|
||||||
|
require.True(t, found, "model1 should exist in config")
|
||||||
|
|
||||||
|
// Default values should be set during unmarshaling
|
||||||
|
assert.Equal(t, 30, modelConfig.Timeouts.Connect)
|
||||||
|
assert.Equal(t, 60, modelConfig.Timeouts.ResponseHeader)
|
||||||
|
assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
|
||||||
|
assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
|
||||||
|
assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_TimeoutsZeroAllowed(t *testing.T) {
|
||||||
|
configYaml := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: test-server --port ${PORT}
|
||||||
|
timeouts:
|
||||||
|
connect: 0
|
||||||
|
responseHeader: 0
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
modelConfig, found := config.Models["model1"]
|
||||||
|
require.True(t, found, "model1 should exist in config")
|
||||||
|
|
||||||
|
// Explicit 0 should be preserved (disables timeout)
|
||||||
|
assert.Equal(t, 0, modelConfig.Timeouts.Connect)
|
||||||
|
assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_PeerTimeoutsParsing(t *testing.T) {
|
||||||
|
configYaml := `
|
||||||
|
peers:
|
||||||
|
peer1:
|
||||||
|
proxy: http://example.com
|
||||||
|
models: [model1]
|
||||||
|
timeouts:
|
||||||
|
connect: 45
|
||||||
|
responseHeader: 120
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
peerConfig, found := config.Peers["peer1"]
|
||||||
|
require.True(t, found, "peer1 should exist in config")
|
||||||
|
|
||||||
|
assert.Equal(t, 45, peerConfig.Timeouts.Connect)
|
||||||
|
assert.Equal(t, 120, peerConfig.Timeouts.ResponseHeader)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_PeerTimeoutsDefaults(t *testing.T) {
|
||||||
|
configYaml := `
|
||||||
|
peers:
|
||||||
|
peer1:
|
||||||
|
proxy: http://example.com
|
||||||
|
models: [model1]
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
peerConfig, found := config.Peers["peer1"]
|
||||||
|
require.True(t, found, "peer1 should exist in config")
|
||||||
|
|
||||||
|
// Default values should be set during unmarshaling
|
||||||
|
assert.Equal(t, 30, peerConfig.Timeouts.Connect)
|
||||||
|
assert.Equal(t, 60, peerConfig.Timeouts.ResponseHeader)
|
||||||
|
assert.Equal(t, 10, peerConfig.Timeouts.TLSHandshake)
|
||||||
|
assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
|
||||||
|
assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
|
||||||
|
}
|
||||||
|
|||||||
@@ -173,6 +173,13 @@ groups:
|
|||||||
Env: []string{"VAR1=value1", "VAR2=value2"},
|
Env: []string{"VAR1=value1", "VAR2=value2"},
|
||||||
CheckEndpoint: "/health",
|
CheckEndpoint: "/health",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"model2": {
|
"model2": {
|
||||||
Cmd: "path/to/server --arg1 one",
|
Cmd: "path/to/server --arg1 one",
|
||||||
@@ -182,6 +189,13 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"model3": {
|
"model3": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -191,6 +205,13 @@ groups:
|
|||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"model4": {
|
"model4": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
@@ -200,6 +221,13 @@ groups:
|
|||||||
Aliases: []string{},
|
Aliases: []string{},
|
||||||
Env: []string{},
|
Env: []string{},
|
||||||
SendLoadingState: &modelLoadingState,
|
SendLoadingState: &modelLoadingState,
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
|
|||||||
@@ -9,6 +9,15 @@ const (
|
|||||||
MODEL_CONFIG_DEFAULT_TTL = -1
|
MODEL_CONFIG_DEFAULT_TTL = -1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TimeoutsConfig holds timeout settings for proxy connections
|
||||||
|
type TimeoutsConfig struct {
|
||||||
|
Connect int `yaml:"connect"` // seconds, 0 = no timeout (not recommended)
|
||||||
|
ResponseHeader int `yaml:"responseHeader"` // seconds, 0 = no timeout (not recommended)
|
||||||
|
TLSHandshake int `yaml:"tlsHandshake"` // seconds, 0 = no timeout (not recommended)
|
||||||
|
ExpectContinue int `yaml:"expectContinue"` // seconds, 0 = no timeout (not recommended)
|
||||||
|
IdleConn int `yaml:"idleConn"` // seconds, 0 = no timeout (not recommended)
|
||||||
|
}
|
||||||
|
|
||||||
type ModelConfig struct {
|
type ModelConfig struct {
|
||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
CmdStop string `yaml:"cmdStop"`
|
CmdStop string `yaml:"cmdStop"`
|
||||||
@@ -40,6 +49,9 @@ type ModelConfig struct {
|
|||||||
|
|
||||||
// override global setting
|
// override global setting
|
||||||
SendLoadingState *bool `yaml:"sendLoadingState"`
|
SendLoadingState *bool `yaml:"sendLoadingState"`
|
||||||
|
|
||||||
|
// Timeout settings for proxy connections
|
||||||
|
Timeouts TimeoutsConfig `yaml:"timeouts"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
@@ -57,6 +69,13 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
ConcurrencyLimit: 0,
|
ConcurrencyLimit: 0,
|
||||||
Name: "",
|
Name: "",
|
||||||
Description: "",
|
Description: "",
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// the default cmdStop to taskkill /f /t /pid ${PID}
|
// the default cmdStop to taskkill /f /t /pid ${PID}
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ type PeerConfig struct {
|
|||||||
ApiKey string `yaml:"apiKey"`
|
ApiKey string `yaml:"apiKey"`
|
||||||
Models []string `yaml:"models"`
|
Models []string `yaml:"models"`
|
||||||
Filters Filters `yaml:"filters"`
|
Filters Filters `yaml:"filters"`
|
||||||
|
|
||||||
|
// Timeout settings for proxy connections
|
||||||
|
Timeouts TimeoutsConfig `yaml:"timeouts"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
@@ -21,6 +24,13 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
ApiKey: "",
|
ApiKey: "",
|
||||||
Models: []string{},
|
Models: []string{},
|
||||||
Filters: Filters{},
|
Filters: Filters{},
|
||||||
|
Timeouts: TimeoutsConfig{
|
||||||
|
Connect: 30,
|
||||||
|
ResponseHeader: 60,
|
||||||
|
TLSHandshake: 10,
|
||||||
|
ExpectContinue: 1,
|
||||||
|
IdleConn: 90,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := unmarshal(&defaults); err != nil {
|
if err := unmarshal(&defaults); err != nil {
|
||||||
|
|||||||
+17
-15
@@ -34,23 +34,25 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
|
|||||||
}
|
}
|
||||||
sort.Strings(peerIDs)
|
sort.Strings(peerIDs)
|
||||||
|
|
||||||
// Create a shared transport with reasonable timeouts for peer connections
|
|
||||||
// these can be tuned with feedback later
|
|
||||||
peerTransport := &http.Transport{
|
|
||||||
DialContext: (&net.Dialer{
|
|
||||||
Timeout: 30 * time.Second, // Connection timeout
|
|
||||||
KeepAlive: 30 * time.Second,
|
|
||||||
}).DialContext,
|
|
||||||
TLSHandshakeTimeout: 10 * time.Second,
|
|
||||||
ResponseHeaderTimeout: 60 * time.Second, // Time to wait for response headers
|
|
||||||
ExpectContinueTimeout: 1 * time.Second,
|
|
||||||
MaxIdleConns: 100,
|
|
||||||
MaxIdleConnsPerHost: 10,
|
|
||||||
IdleConnTimeout: 90 * time.Second,
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, peerID := range peerIDs {
|
for _, peerID := range peerIDs {
|
||||||
peer := peers[peerID]
|
peer := peers[peerID]
|
||||||
|
|
||||||
|
// Create a transport with per-peer timeout configuration
|
||||||
|
peerTransport := &http.Transport{
|
||||||
|
Proxy: http.ProxyFromEnvironment,
|
||||||
|
DialContext: (&net.Dialer{
|
||||||
|
Timeout: time.Duration(peer.Timeouts.Connect) * time.Second,
|
||||||
|
KeepAlive: 30 * time.Second,
|
||||||
|
}).DialContext,
|
||||||
|
TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
|
||||||
|
ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
|
||||||
|
ExpectContinueTimeout: time.Duration(peer.Timeouts.ExpectContinue) * time.Second,
|
||||||
|
ForceAttemptHTTP2: true,
|
||||||
|
MaxIdleConns: 100,
|
||||||
|
MaxIdleConnsPerHost: 10,
|
||||||
|
IdleConnTimeout: time.Duration(peer.Timeouts.IdleConn) * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
// Create reverse proxy for this peer
|
// Create reverse proxy for this peer
|
||||||
reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
|
reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
|
||||||
reverseProxy.Transport = peerTransport
|
reverseProxy.Transport = peerTransport
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
@@ -266,3 +267,45 @@ func TestProxyRequest_SSEHeaderModification(t *testing.T) {
|
|||||||
// The X-Accel-Buffering header should be set to "no" for SSE
|
// The X-Accel-Buffering header should be set to "no" for SSE
|
||||||
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNewPeerProxy_CustomTimeouts(t *testing.T) {
|
||||||
|
proxyURL, _ := url.Parse("http://localhost:8080")
|
||||||
|
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"test-peer": config.PeerConfig{
|
||||||
|
Proxy: "http://localhost:8080",
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
Models: []string{"model1"},
|
||||||
|
Timeouts: config.TimeoutsConfig{
|
||||||
|
Connect: 45,
|
||||||
|
ResponseHeader: 300,
|
||||||
|
TLSHandshake: 15,
|
||||||
|
ExpectContinue: 2,
|
||||||
|
IdleConn: 120,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
peerProxy, err := NewPeerProxy(peers, testLogger)
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, peerProxy)
|
||||||
|
assert.True(t, peerProxy.HasPeerModel("model1"))
|
||||||
|
|
||||||
|
// Verify the timeout values are actually applied to the transport
|
||||||
|
member, found := peerProxy.proxyMap["model1"]
|
||||||
|
require.True(t, found, "model1 should exist in proxyMap")
|
||||||
|
assert.NotNil(t, member.reverseProxy)
|
||||||
|
assert.NotNil(t, member.reverseProxy.Transport)
|
||||||
|
|
||||||
|
transport, ok := member.reverseProxy.Transport.(*http.Transport)
|
||||||
|
require.True(t, ok, "Transport should be *http.Transport")
|
||||||
|
|
||||||
|
// Verify all timeout values are correctly applied
|
||||||
|
assert.Equal(t, 300*time.Second, transport.ResponseHeaderTimeout)
|
||||||
|
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
|
||||||
|
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
|
||||||
|
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
|
||||||
|
// ForceAttemptHTTP2 should be enabled
|
||||||
|
assert.True(t, transport.ForceAttemptHTTP2)
|
||||||
|
}
|
||||||
|
|||||||
@@ -96,6 +96,24 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
|
|||||||
var reverseProxy *httputil.ReverseProxy
|
var reverseProxy *httputil.ReverseProxy
|
||||||
if proxyURL != nil {
|
if proxyURL != nil {
|
||||||
reverseProxy = httputil.NewSingleHostReverseProxy(proxyURL)
|
reverseProxy = httputil.NewSingleHostReverseProxy(proxyURL)
|
||||||
|
|
||||||
|
// Create custom transport with configured timeouts
|
||||||
|
transport := &http.Transport{
|
||||||
|
Proxy: http.ProxyFromEnvironment,
|
||||||
|
DialContext: (&net.Dialer{
|
||||||
|
Timeout: time.Duration(config.Timeouts.Connect) * time.Second,
|
||||||
|
KeepAlive: 30 * time.Second,
|
||||||
|
}).DialContext,
|
||||||
|
TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second,
|
||||||
|
ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,
|
||||||
|
ExpectContinueTimeout: time.Duration(config.Timeouts.ExpectContinue) * time.Second,
|
||||||
|
ForceAttemptHTTP2: true,
|
||||||
|
MaxIdleConns: 100,
|
||||||
|
MaxIdleConnsPerHost: 10,
|
||||||
|
IdleConnTimeout: time.Duration(config.Timeouts.IdleConn) * time.Second,
|
||||||
|
}
|
||||||
|
reverseProxy.Transport = transport
|
||||||
|
|
||||||
reverseProxy.ModifyResponse = func(resp *http.Response) error {
|
reverseProxy.ModifyResponse = func(resp *http.Response) error {
|
||||||
// prevent nginx from buffering streaming responses (e.g., SSE)
|
// prevent nginx from buffering streaming responses (e.g., SSE)
|
||||||
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
|
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package proxy
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
@@ -569,3 +570,39 @@ func (w *panicOnWriteResponseWriter) Write(b []byte) (int, error) {
|
|||||||
}
|
}
|
||||||
return w.ResponseRecorder.Write(b)
|
return w.ResponseRecorder.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProcess_CustomTimeouts(t *testing.T) {
|
||||||
|
modelConfig := config.ModelConfig{
|
||||||
|
Cmd: "echo test",
|
||||||
|
Proxy: "http://localhost:8080",
|
||||||
|
CheckEndpoint: "/health",
|
||||||
|
Timeouts: config.TimeoutsConfig{
|
||||||
|
Connect: 45,
|
||||||
|
ResponseHeader: 120,
|
||||||
|
TLSHandshake: 15,
|
||||||
|
ExpectContinue: 2,
|
||||||
|
IdleConn: 120,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
debugLogger := NewLogMonitorWriter(io.Discard)
|
||||||
|
process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
|
||||||
|
|
||||||
|
// Verify the process was created successfully
|
||||||
|
assert.NotNil(t, process)
|
||||||
|
assert.Equal(t, "test-model", process.ID)
|
||||||
|
assert.NotNil(t, process.reverseProxy)
|
||||||
|
assert.NotNil(t, process.reverseProxy.Transport)
|
||||||
|
|
||||||
|
// Verify it's using http.Transport (not some other type)
|
||||||
|
transport, ok := process.reverseProxy.Transport.(*http.Transport)
|
||||||
|
assert.True(t, ok, "Transport should be *http.Transport")
|
||||||
|
assert.NotNil(t, transport)
|
||||||
|
|
||||||
|
// Verify the timeouts are correctly applied
|
||||||
|
assert.Equal(t, 120*time.Second, transport.ResponseHeaderTimeout)
|
||||||
|
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
|
||||||
|
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
|
||||||
|
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
|
||||||
|
assert.True(t, transport.ForceAttemptHTTP2)
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user