proxy: add configurable HTTP timeouts for models and peers (#619)

Add configurable HTTP timeout settings to both models and peers to support installations that requires longer timeouts than the current hardcoded defaults. Closes #618
2026-04-06 04:30:27 -07:00
parent 981910d734
commit a37b4866d8
13 changed files with 437 additions and 15 deletions
@@ -21,6 +21,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
 - Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
 - Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
 - Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
 - Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
 - Use `make test-all` before completing work. This includes long running concurrency tests.
@@ -39,6 +39,43 @@
            },
            "default": {},
            "description": "A dictionary of string substitutions. Macros are reusable snippets used in model cmd, cmdStop, proxy, checkEndpoint, filters.stripParams. Macro names must be <64 chars, match ^[a-zA-Z0-9_-]+$, and not be PORT or MODEL_ID. Values can be string, number, or boolean. Macros can reference other macros defined before them."
        },
        "timeouts": {
            "type": "object",
            "properties": {
                "connect": {
                    "type": "integer",
                    "minimum": 0,
                    "default": 30,
                    "description": "TCP connection timeout in seconds. Set to 0 to disable (not recommended)."
                },
                "responseHeader": {
                    "type": "integer",
                    "minimum": 0,
                    "default": 60,
                    "description": "Time to wait for response headers in seconds. Set to 0 to disable (not recommended)."
                },
                "tlsHandshake": {
                    "type": "integer",
                    "minimum": 0,
                    "default": 10,
                    "description": "TLS handshake timeout in seconds. Set to 0 to disable (not recommended)."
                },
                "expectContinue": {
                    "type": "integer",
                    "minimum": 0,
                    "default": 1,
                    "description": "Expect-Continue timeout in seconds. Set to 0 to disable (not recommended)."
                },
                "idleConn": {
                    "type": "integer",
                    "minimum": 0,
                    "default": 90,
                    "description": "Idle connection timeout in seconds. Set to 0 to disable (not recommended)."
                }
            },
            "additionalProperties": false,
            "description": "Timeout settings for proxy connections."
        }
    },
    "properties": {
@@ -241,6 +278,9 @@
                        "type": "boolean",
                        "default": false,
                        "description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
                    },
                    "timeouts": {
                        "$ref": "#/definitions/timeouts"
                    }
                }
            }
@@ -367,6 +407,37 @@
                        "additionalProperties": false,
                        "default": {},
                        "description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
                    },
                    "timeouts": {
                        "type": "object",
                        "properties": {
                            "connect": {
                                "type": "integer",
                                "minimum": 1,
                                "default": 30,
                                "description": "TCP connection timeout in seconds."
                            },
                            "responseHeader": {
                                "type": "integer",
                                "minimum": 1,
                                "default": 60,
                                "description": "Time to wait for response headers in seconds."
                            },
                            "tlsHandshake": {
                                "type": "integer",
                                "minimum": 1,
                                "default": 10,
                                "description": "TLS handshake timeout in seconds."
                            },
                            "idleConn": {
                                "type": "integer",
                                "minimum": 1,
                                "default": 90,
                                "description": "Idle connection timeout in seconds."
                            }
                        },
                        "additionalProperties": false,
                        "description": "Timeout settings for proxy connections to this peer."
                    }
                }
            },
@@ -284,6 +284,21 @@ models:
    # - optional, default: undefined (use global setting)
    sendLoadingState: false
    # timeouts: configure proxy connection timeouts for this model
    # - optional, defaults shown below
    # - useful for models running on slower hardware that need longer timeouts
    # - connect: TCP connection timeout in seconds
    # - responseHeader: time to wait for response headers in seconds
    #   (increasing this helps avoid 502 errors on slow hardware)
    # - tlsHandshake: TLS handshake timeout in seconds
    # - idleConn: idle connection timeout in seconds
    # - set any value to 0 to disable that timeout (not recommended)
    timeouts:
      connect: 30
      responseHeader: 60
      tlsHandshake: 10
      idleConn: 90
  # Unlisted model example:
  "qwen-unlisted":
    # unlisted: boolean, true or false
@@ -426,6 +441,16 @@ peers:
      - z-ai/glm-4.7
      - moonshotai/kimi-k2-0905
      - minimax/minimax-m2.1
    # timeouts: configure proxy connection timeouts for this peer
    # - optional, defaults shown below
    # - useful when the peer runs on slower hardware
    # - set any value to 0 to disable that timeout (not recommended)
    timeouts:
      connect: 30
      responseHeader: 60
      tlsHandshake: 10
      idleConn: 90
    # filters: a dictionary of filter settings for peer requests
    # - optional, default: empty dictionary
    # - same capabilities as model filters (stripParams, setParams)
@@ -319,6 +319,29 @@ models:
    # - recommended to be omitted and the default used
    concurrencyLimit: 0
    # timeouts: configure proxy connection timeouts for this model
    # - optional, defaults shown below
    # - useful for models on slower hardware that need longer timeouts
    # - increase responseHeader to avoid "timeout awaiting response headers" errors
    # - set any value to 0 to disable that timeout (not recommended)
    timeouts:
      # connect: TCP connection timeout in seconds
      # - default: 30
      connect: 30
      # responseHeader: time to wait for response headers in seconds
      # - default: 60
      # - for slow image generation or large models, consider increasing to 300+ seconds
      responseHeader: 60
      # tlsHandshake: TLS handshake timeout in seconds
      # - default: 10
      tlsHandshake: 10
      # idleConn: idle connection timeout in seconds
      # - default: 90
      idleConn: 90
    # sendLoadingState: overrides the global sendLoadingState setting for this model
    # - optional, default: undefined (use global setting)
    sendLoadingState: false
@@ -444,6 +467,17 @@ peers:
    # - required
    # - requested path to llama-swap will be appended to the end of the proxy value
    proxy: http://192.168.1.23
    # timeouts: configure proxy connection timeouts for this peer
    # - optional, defaults shown below
    # - useful when the peer runs on slower hardware
    # - set any value to 0 to disable that timeout (not recommended)
    timeouts:
      connect: 30
      responseHeader: 60
      tlsHandshake: 10
      idleConn: 90
    # models: a list of models served by the peer
    # - required
    models:
@@ -187,6 +187,13 @@ groups:
 				Name:             "Model 1",
 				Description:      "This is model 1",
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 			"model2": {
 				Cmd:              "path/to/server --arg1 one",
@@ -195,6 +202,13 @@ groups:
 				Env:              []string{},
 				CheckEndpoint:    "/",
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 			"model3": {
 				Cmd:              "path/to/cmd --arg1 one",
@@ -203,6 +217,13 @@ groups:
 				Env:              []string{},
 				CheckEndpoint:    "/",
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 			"model4": {
 				Cmd:              "path/to/cmd --arg1 one",
@@ -211,6 +232,13 @@ groups:
 				Aliases:          []string{},
 				Env:              []string{},
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 		},
 		HealthCheckTimeout: 15,
@@ -6,6 +6,7 @@ import (
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 func TestConfig_GroupMemberIsUnique(t *testing.T) {
@@ -1438,3 +1439,108 @@ models:
 	})
 }
 func TestConfig_TimeoutsParsing(t *testing.T) {
 	configYaml := `
 models:
  model1:
    cmd: test-server --port ${PORT}
    timeouts:
      connect: 45
      responseHeader: 120
 `
 	config, err := LoadConfigFromReader(strings.NewReader(configYaml))
 	require.NoError(t, err)
 	modelConfig, found := config.Models["model1"]
 	require.True(t, found, "model1 should exist in config")
 	assert.Equal(t, 45, modelConfig.Timeouts.Connect)
 	assert.Equal(t, 120, modelConfig.Timeouts.ResponseHeader)
 }
 func TestConfig_TimeoutsDefaults(t *testing.T) {
 	configYaml := `
 models:
  model1:
    cmd: test-server --port ${PORT}
 `
 	config, err := LoadConfigFromReader(strings.NewReader(configYaml))
 	require.NoError(t, err)
 	modelConfig, found := config.Models["model1"]
 	require.True(t, found, "model1 should exist in config")
 	// Default values should be set during unmarshaling
 	assert.Equal(t, 30, modelConfig.Timeouts.Connect)
 	assert.Equal(t, 60, modelConfig.Timeouts.ResponseHeader)
 	assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
 	assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
 	assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
 }
 func TestConfig_TimeoutsZeroAllowed(t *testing.T) {
 	configYaml := `
 models:
  model1:
    cmd: test-server --port ${PORT}
    timeouts:
      connect: 0
      responseHeader: 0
 `
 	config, err := LoadConfigFromReader(strings.NewReader(configYaml))
 	require.NoError(t, err)
 	modelConfig, found := config.Models["model1"]
 	require.True(t, found, "model1 should exist in config")
 	// Explicit 0 should be preserved (disables timeout)
 	assert.Equal(t, 0, modelConfig.Timeouts.Connect)
 	assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
 }
 func TestConfig_PeerTimeoutsParsing(t *testing.T) {
 	configYaml := `
 peers:
  peer1:
    proxy: http://example.com
    models: [model1]
    timeouts:
      connect: 45
      responseHeader: 120
 `
 	config, err := LoadConfigFromReader(strings.NewReader(configYaml))
 	require.NoError(t, err)
 	peerConfig, found := config.Peers["peer1"]
 	require.True(t, found, "peer1 should exist in config")
 	assert.Equal(t, 45, peerConfig.Timeouts.Connect)
 	assert.Equal(t, 120, peerConfig.Timeouts.ResponseHeader)
 }
 func TestConfig_PeerTimeoutsDefaults(t *testing.T) {
 	configYaml := `
 peers:
  peer1:
    proxy: http://example.com
    models: [model1]
 `
 	config, err := LoadConfigFromReader(strings.NewReader(configYaml))
 	require.NoError(t, err)
 	peerConfig, found := config.Peers["peer1"]
 	require.True(t, found, "peer1 should exist in config")
 	// Default values should be set during unmarshaling
 	assert.Equal(t, 30, peerConfig.Timeouts.Connect)
 	assert.Equal(t, 60, peerConfig.Timeouts.ResponseHeader)
 	assert.Equal(t, 10, peerConfig.Timeouts.TLSHandshake)
 	assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
 	assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
 }
@@ -173,6 +173,13 @@ groups:
 				Env:              []string{"VAR1=value1", "VAR2=value2"},
 				CheckEndpoint:    "/health",
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 			"model2": {
 				Cmd:              "path/to/server --arg1 one",
@@ -182,6 +189,13 @@ groups:
 				Env:              []string{},
 				CheckEndpoint:    "/",
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 			"model3": {
 				Cmd:              "path/to/cmd --arg1 one",
@@ -191,6 +205,13 @@ groups:
 				Env:              []string{},
 				CheckEndpoint:    "/",
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 			"model4": {
 				Cmd:              "path/to/cmd --arg1 one",
@@ -200,6 +221,13 @@ groups:
 				Aliases:          []string{},
 				Env:              []string{},
 				SendLoadingState: &modelLoadingState,
 				Timeouts: TimeoutsConfig{
 					Connect:        30,
 					ResponseHeader: 60,
 					TLSHandshake:   10,
 					ExpectContinue: 1,
 					IdleConn:       90,
 				},
 			},
 		},
 		HealthCheckTimeout: 15,
@@ -9,6 +9,15 @@ const (
 	MODEL_CONFIG_DEFAULT_TTL = -1
 )
 // TimeoutsConfig holds timeout settings for proxy connections
 type TimeoutsConfig struct {
 	Connect        int `yaml:"connect"`        // seconds, 0 = no timeout (not recommended)
 	ResponseHeader int `yaml:"responseHeader"` // seconds, 0 = no timeout (not recommended)
 	TLSHandshake   int `yaml:"tlsHandshake"`   // seconds, 0 = no timeout (not recommended)
 	ExpectContinue int `yaml:"expectContinue"` // seconds, 0 = no timeout (not recommended)
 	IdleConn       int `yaml:"idleConn"`       // seconds, 0 = no timeout (not recommended)
 }
 type ModelConfig struct {
 	Cmd           string   `yaml:"cmd"`
 	CmdStop       string   `yaml:"cmdStop"`
@@ -40,6 +49,9 @@ type ModelConfig struct {
 	// override global setting
 	SendLoadingState *bool `yaml:"sendLoadingState"`
 	// Timeout settings for proxy connections
 	Timeouts TimeoutsConfig `yaml:"timeouts"`
 }
 func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -57,6 +69,13 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
 		ConcurrencyLimit: 0,
 		Name:             "",
 		Description:      "",
 		Timeouts: TimeoutsConfig{
 			Connect:        30,
 			ResponseHeader: 60,
 			TLSHandshake:   10,
 			ExpectContinue: 1,
 			IdleConn:       90,
 		},
 	}
 	// the default cmdStop to taskkill /f /t /pid ${PID}
@@ -12,6 +12,9 @@ type PeerConfig struct {
 	ApiKey   string   `yaml:"apiKey"`
 	Models   []string `yaml:"models"`
 	Filters  Filters  `yaml:"filters"`
 	// Timeout settings for proxy connections
 	Timeouts TimeoutsConfig `yaml:"timeouts"`
 }
 func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -21,6 +24,13 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
 		ApiKey:  "",
 		Models:  []string{},
 		Filters: Filters{},
 		Timeouts: TimeoutsConfig{
 			Connect:        30,
 			ResponseHeader: 60,
 			TLSHandshake:   10,
 			ExpectContinue: 1,
 			IdleConn:       90,
 		},
 	}
 	if err := unmarshal(&defaults); err != nil {
@@ -34,23 +34,25 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
 	}
 	sort.Strings(peerIDs)
 	// Create a shared transport with reasonable timeouts for peer connections
 	// these can be tuned with feedback later
 	peerTransport := &http.Transport{
 		DialContext: (&net.Dialer{
 			Timeout:   30 * time.Second, // Connection timeout
 			KeepAlive: 30 * time.Second,
 		}).DialContext,
 		TLSHandshakeTimeout:   10 * time.Second,
 		ResponseHeaderTimeout: 60 * time.Second, // Time to wait for response headers
 		ExpectContinueTimeout: 1 * time.Second,
 		MaxIdleConns:          100,
 		MaxIdleConnsPerHost:   10,
 		IdleConnTimeout:       90 * time.Second,
 	}
 	for _, peerID := range peerIDs {
 		peer := peers[peerID]
 		// Create a transport with per-peer timeout configuration
 		peerTransport := &http.Transport{
 			Proxy: http.ProxyFromEnvironment,
 			DialContext: (&net.Dialer{
 				Timeout:   time.Duration(peer.Timeouts.Connect) * time.Second,
 				KeepAlive: 30 * time.Second,
 			}).DialContext,
 			TLSHandshakeTimeout:   time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
 			ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
 			ExpectContinueTimeout: time.Duration(peer.Timeouts.ExpectContinue) * time.Second,
 			ForceAttemptHTTP2:     true,
 			MaxIdleConns:          100,
 			MaxIdleConnsPerHost:   10,
 			IdleConnTimeout:       time.Duration(peer.Timeouts.IdleConn) * time.Second,
 		}
 		// Create reverse proxy for this peer
 		reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
 		reverseProxy.Transport = peerTransport
@@ -6,6 +6,7 @@ import (
 	"net/url"
 	"strings"
 	"testing"
 	"time"
 	"github.com/mostlygeek/llama-swap/proxy/config"
 	"github.com/stretchr/testify/assert"
@@ -266,3 +267,45 @@ func TestProxyRequest_SSEHeaderModification(t *testing.T) {
 	// The X-Accel-Buffering header should be set to "no" for SSE
 	assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
 }
 func TestNewPeerProxy_CustomTimeouts(t *testing.T) {
 	proxyURL, _ := url.Parse("http://localhost:8080")
 	peers := config.PeerDictionaryConfig{
 		"test-peer": config.PeerConfig{
 			Proxy:    "http://localhost:8080",
 			ProxyURL: proxyURL,
 			Models:   []string{"model1"},
 			Timeouts: config.TimeoutsConfig{
 				Connect:        45,
 				ResponseHeader: 300,
 				TLSHandshake:   15,
 				ExpectContinue: 2,
 				IdleConn:       120,
 			},
 		},
 	}
 	peerProxy, err := NewPeerProxy(peers, testLogger)
 	assert.NoError(t, err)
 	assert.NotNil(t, peerProxy)
 	assert.True(t, peerProxy.HasPeerModel("model1"))
 	// Verify the timeout values are actually applied to the transport
 	member, found := peerProxy.proxyMap["model1"]
 	require.True(t, found, "model1 should exist in proxyMap")
 	assert.NotNil(t, member.reverseProxy)
 	assert.NotNil(t, member.reverseProxy.Transport)
 	transport, ok := member.reverseProxy.Transport.(*http.Transport)
 	require.True(t, ok, "Transport should be *http.Transport")
 	// Verify all timeout values are correctly applied
 	assert.Equal(t, 300*time.Second, transport.ResponseHeaderTimeout)
 	assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
 	assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
 	assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
 	// ForceAttemptHTTP2 should be enabled
 	assert.True(t, transport.ForceAttemptHTTP2)
 }
@@ -96,6 +96,24 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
 	var reverseProxy *httputil.ReverseProxy
 	if proxyURL != nil {
 		reverseProxy = httputil.NewSingleHostReverseProxy(proxyURL)
 		// Create custom transport with configured timeouts
 		transport := &http.Transport{
 			Proxy: http.ProxyFromEnvironment,
 			DialContext: (&net.Dialer{
 				Timeout:   time.Duration(config.Timeouts.Connect) * time.Second,
 				KeepAlive: 30 * time.Second,
 			}).DialContext,
 			TLSHandshakeTimeout:   time.Duration(config.Timeouts.TLSHandshake) * time.Second,
 			ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,
 			ExpectContinueTimeout: time.Duration(config.Timeouts.ExpectContinue) * time.Second,
 			ForceAttemptHTTP2:     true,
 			MaxIdleConns:          100,
 			MaxIdleConnsPerHost:   10,
 			IdleConnTimeout:       time.Duration(config.Timeouts.IdleConn) * time.Second,
 		}
 		reverseProxy.Transport = transport
 		reverseProxy.ModifyResponse = func(resp *http.Response) error {
 			// prevent nginx from buffering streaming responses (e.g., SSE)
 			if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
@@ -2,6 +2,7 @@ package proxy
 import (
 	"fmt"
 	"io"
 	"net/http"
 	"net/http/httptest"
 	"os"
@@ -569,3 +570,39 @@ func (w *panicOnWriteResponseWriter) Write(b []byte) (int, error) {
 	}
 	return w.ResponseRecorder.Write(b)
 }
 func TestProcess_CustomTimeouts(t *testing.T) {
 	modelConfig := config.ModelConfig{
 		Cmd:           "echo test",
 		Proxy:         "http://localhost:8080",
 		CheckEndpoint: "/health",
 		Timeouts: config.TimeoutsConfig{
 			Connect:        45,
 			ResponseHeader: 120,
 			TLSHandshake:   15,
 			ExpectContinue: 2,
 			IdleConn:       120,
 		},
 	}
 	debugLogger := NewLogMonitorWriter(io.Discard)
 	process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
 	// Verify the process was created successfully
 	assert.NotNil(t, process)
 	assert.Equal(t, "test-model", process.ID)
 	assert.NotNil(t, process.reverseProxy)
 	assert.NotNil(t, process.reverseProxy.Transport)
 	// Verify it's using http.Transport (not some other type)
 	transport, ok := process.reverseProxy.Transport.(*http.Transport)
 	assert.True(t, ok, "Transport should be *http.Transport")
 	assert.NotNil(t, transport)
 	// Verify the timeouts are correctly applied
 	assert.Equal(t, 120*time.Second, transport.ResponseHeaderTimeout)
 	assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
 	assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
 	assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
 	assert.True(t, transport.ForceAttemptHTTP2)
 }