Compare commits

...

3 Commits

Author SHA1 Message Date
Benson Wong 19fb5f35e9 proxy: implement setParamsByID filter (#535)
Add setParamsByID filter that applies different request parameters based
on the requested model ID, enabling per-alias behaviour for a single
loaded model.

- add SetParamsByID field to Filters struct and SanitizedSetParamsByID
method
- substitute ${MODEL_ID} and other macros in setParamsByID keys and
values
- validate no unknown macros remain in keys or values after substitution
- apply setParamsByID in proxyInferenceHandler after setParams (can
override it)
- update config-schema.json with setParamsByID definition
- update UI to show aliases and make them selectable in the Playground

closes #534
2026-02-19 22:21:10 -08:00
Benson Wong b45102bde8 ui: smart auto-scroll in LogPanel (#530)
Pause auto-scroll when the user scrolls up to review logs, and resume
when they scroll back to the bottom.

- add `userScrolledUp` state variable
- add `handleScroll` to detect scroll position with 40px threshold
- guard the auto-scroll effect with `!userScrolledUp`

closes #529
2026-02-18 19:47:37 -08:00
Brian Mendonca 1688bdd1e9 proxy, ui: add pending requests count to the main dashboard (#516)
add a real time counter of pending (inflight) requests to the UI.
2026-02-16 09:41:15 -08:00
16 changed files with 530 additions and 48 deletions
+10 -1
View File
@@ -200,11 +200,20 @@
"additionalProperties": true, "additionalProperties": true,
"default": {}, "default": {},
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects." "description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
},
"setParamsByID": {
"type": "object",
"additionalProperties": {
"type": "object",
"additionalProperties": true
},
"default": {},
"description": "Dictionary mapping requested model IDs (or aliases) to parameters to set/override in requests. Applied after setParams and can override those values. Useful with aliases to vary behaviour depending on which alias the client used (e.g. different reasoning_effort per alias). Keys support ${MODEL_ID} macro substitution. Protected params like 'model' cannot be overridden."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"default": {}, "default": {},
"description": "Dictionary of filter settings. Supports stripParams and setParams." "description": "Dictionary of filter settings. Supports stripParams, setParams, and setParamsByID."
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
+32 -13
View File
@@ -126,7 +126,7 @@ apiKeys:
# - below are examples of the all the settings a model can have # - below are examples of the all the settings a model can have
models: models:
# keys are the model names used in API requests # keys are the model names used in API requests
"llama": "gpt-oss-120b":
# macros: a dictionary of string substitutions specific to this model # macros: a dictionary of string substitutions specific to this model
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - macros defined here override macros defined in the global macros section # - macros defined here override macros defined in the global macros section
@@ -143,7 +143,7 @@ models:
cmd: | cmd: |
# ${latest-llama} is a macro that is defined above # ${latest-llama} is a macro that is defined above
${latest-llama} ${latest-llama}
--model path/to/llama-8B-Q4_K_M.gguf --model path/to/gpt-oss-120B.gguf
--ctx-size ${default_ctx} --ctx-size ${default_ctx}
--temperature ${temp} --temperature ${temp}
@@ -151,13 +151,13 @@ models:
# - optional, default: empty string # - optional, default: empty string
# - if set, it will be used in the v1/models API response # - if set, it will be used in the v1/models API response
# - if not set, it will be omitted in the JSON model record # - if not set, it will be omitted in the JSON model record
name: "llama 3.1 8B" name: "gpt-oss 120B"
# description: a description for the model # description: a description for the model
# - optional, default: empty string # - optional, default: empty string
# - if set, it will be used in the v1/models API response # - if set, it will be used in the v1/models API response
# - if not set, it will be omitted in the JSON model record # - if not set, it will be omitted in the JSON model record
description: "A small but capable model used for quick testing" description: "A thinking model from OpenAI"
# env: define an array of environment variables to inject into cmd's environment # env: define an array of environment variables to inject into cmd's environment
# - optional, default: empty array # - optional, default: empty array
@@ -172,14 +172,6 @@ models:
# - if you use a custom port in cmd this *must* be set # - if you use a custom port in cmd this *must* be set
proxy: http://127.0.0.1:8999 proxy: http://127.0.0.1:8999
# aliases: alternative model names that this model configuration is used for
# - optional, default: empty array
# - aliases must be unique globally
# - useful for impersonating a specific model
aliases:
- "gpt-4o-mini"
- "gpt-3.5-turbo"
# checkEndpoint: URL path to check if the server is ready # checkEndpoint: URL path to check if the server is ready
# - optional, default: /health # - optional, default: /health
# - endpoint is expected to return an HTTP 200 response # - endpoint is expected to return an HTTP 200 response
@@ -197,7 +189,7 @@ models:
# - optional, default: "" # - optional, default: ""
# - useful for when the upstream server expects a specific model name that # - useful for when the upstream server expects a specific model name that
# is different from the model's ID # is different from the model's ID
useModelName: "qwen:qwq" useModelName: "openai/gpt-oss-120B"
# filters: a dictionary of filter settings # filters: a dictionary of filter settings
# - optional, default: empty dictionary # - optional, default: empty dictionary
@@ -216,11 +208,38 @@ models:
# - useful for enforcing specific parameter values # - useful for enforcing specific parameter values
# - protected params like "model" cannot be overridden # - protected params like "model" cannot be overridden
# - values can be strings, numbers, booleans, arrays, or objects # - values can be strings, numbers, booleans, arrays, or objects
# - always runs for the model
setParams: setParams:
# Example: enforce specific sampling parameters # Example: enforce specific sampling parameters
temperature: 0.7 temperature: 0.7
top_p: 0.9 top_p: 0.9
# setParamsByID: a dictionary of parameters to set based the model ID
# - optional, default: empty dictionary
# - combine with aliases to create variant behaviour without reloading the model
# - parameters are set in the request body JSON
# - run after setParams so it will override any settings
# - protected params like "model" cannot be overridden
# - values can be strings, numbers, booleans, arrays, or objects
# - model aliases will be automatically created for each key
setParamsByID:
"${MODEL_ID}":
chat_template_kwargs:
reasoning_effort: medium
"${MODEL_ID}:high":
chat_template_kwargs:
reasoning_effort: high
"${MODEL_ID}:low":
chat_template_kwargs:
reasoning_effort: low
# aliases: alternative model names that this model configuration is used for
# - optional, default: empty array
# - aliases must be unique globally
# - useful for impersonating a specific model
aliases:
- "gpt-4o-mini"
# metadata: a dictionary of arbitrary values that are included in /v1/models # metadata: a dictionary of arbitrary values that are included in /v1/models
# - optional, default: empty dictionary # - optional, default: empty dictionary
# - while metadata can contains complex types it is recommended to keep it simple # - while metadata can contains complex types it is recommended to keep it simple
+46
View File
@@ -294,6 +294,24 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr) modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr) modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
// Substitute macros in SetParamsByID keys and values
if len(modelConfig.Filters.SetParamsByID) > 0 {
newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID))
for key, paramMap := range modelConfig.Filters.SetParamsByID {
newKey := strings.ReplaceAll(key, macroSlug, macroStr)
newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value)
if err != nil {
return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error())
}
newParamMap, ok := newValAny.(map[string]any)
if !ok {
return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId)
}
newSetParamsByID[newKey] = newParamMap
}
modelConfig.Filters.SetParamsByID = newSetParamsByID
}
// Substitute in metadata (type-preserving) // Substitute in metadata (type-preserving)
if len(modelConfig.Metadata) > 0 { if len(modelConfig.Metadata) > 0 {
result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value) result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value)
@@ -359,6 +377,34 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
} }
} }
// Validate SetParamsByID keys and values
for key, paramMap := range modelConfig.Filters.SetParamsByID {
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId)
}
if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil {
return Config{}, err
}
}
// Auto-register setParamsByID keys as aliases (skip the model's own ID)
for key := range modelConfig.Filters.SetParamsByID {
if key == modelId {
continue
}
if _, exists := config.Models[key]; exists {
return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key)
}
if existingModel, exists := config.aliases[key]; exists {
if existingModel != modelId {
return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel)
}
continue // already registered as explicit alias for this model
}
config.aliases[key] = modelId
modelConfig.Aliases = append(modelConfig.Aliases, key)
}
if _, err := url.Parse(modelConfig.Proxy); err != nil { if _, err := url.Parse(modelConfig.Proxy); err != nil {
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err) return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
} }
+33
View File
@@ -20,6 +20,12 @@ type Filters struct {
// SetParams is a dictionary of parameters to set/override in requests // SetParams is a dictionary of parameters to set/override in requests
// Protected params (like "model") cannot be set // Protected params (like "model") cannot be set
SetParams map[string]any `yaml:"setParams"` SetParams map[string]any `yaml:"setParams"`
// SetParamsByID maps requested model IDs to parameters to set/override in requests.
// Useful with aliases: a single loaded model can behave differently depending on
// which alias the client used. Applied after SetParams, so it can override those values.
// Protected params (like "model") cannot be set.
SetParamsByID map[string]map[string]any `yaml:"setParamsByID"`
} }
// SanitizedStripParams returns a sorted list of parameters to strip, // SanitizedStripParams returns a sorted list of parameters to strip,
@@ -51,6 +57,33 @@ func (f Filters) SanitizedStripParams() []string {
return cleaned return cleaned
} }
// SanitizedSetParamsByID returns the params to set for the given requestedModelID,
// with protected params removed and keys sorted for consistent iteration order.
// Returns nil if the ID has no entry or all its params are protected.
func (f Filters) SanitizedSetParamsByID(requestedModelID string) (map[string]any, []string) {
if len(f.SetParamsByID) == 0 {
return nil, nil
}
params, found := f.SetParamsByID[requestedModelID]
if !found || len(params) == 0 {
return nil, nil
}
result := make(map[string]any, len(params))
keys := make([]string, 0, len(params))
for key, value := range params {
if slices.Contains(ProtectedParams, key) {
continue
}
result[key] = value
keys = append(keys, key)
}
sort.Strings(keys)
if len(result) == 0 {
return nil, nil
}
return result, keys
}
// SanitizedSetParams returns a copy of SetParams with protected params removed // SanitizedSetParams returns a copy of SetParams with protected params removed
// and keys sorted for consistent iteration order // and keys sorted for consistent iteration order
func (f Filters) SanitizedSetParams() (map[string]any, []string) { func (f Filters) SanitizedSetParams() (map[string]any, []string) {
+117
View File
@@ -162,6 +162,123 @@ func TestFilters_SanitizedSetParams(t *testing.T) {
} }
} }
func TestFilters_SanitizedSetParamsByID(t *testing.T) {
tests := []struct {
name string
setParamsByID map[string]map[string]any
requestedModelID string
wantParams map[string]any
wantKeys []string
}{
{
name: "empty SetParamsByID returns nil",
setParamsByID: nil,
requestedModelID: "model1",
wantParams: nil,
wantKeys: nil,
},
{
name: "empty map returns nil",
setParamsByID: map[string]map[string]any{},
requestedModelID: "model1",
wantParams: nil,
wantKeys: nil,
},
{
name: "non-matching model ID returns nil",
setParamsByID: map[string]map[string]any{
"model2": {"temperature": 0.9},
},
requestedModelID: "model1",
wantParams: nil,
wantKeys: nil,
},
{
name: "matching model ID returns correct params",
setParamsByID: map[string]map[string]any{
"model1": {"temperature": 0.7, "top_p": 0.9},
"model2": {"temperature": 0.5},
},
requestedModelID: "model1",
wantParams: map[string]any{
"temperature": 0.7,
"top_p": 0.9,
},
wantKeys: []string{"temperature", "top_p"},
},
{
name: "protected param model is filtered out",
setParamsByID: map[string]map[string]any{
"model1": {
"model": "should-be-filtered",
"temperature": 0.7,
},
},
requestedModelID: "model1",
wantParams: map[string]any{
"temperature": 0.7,
},
wantKeys: []string{"temperature"},
},
{
name: "only protected param returns nil",
setParamsByID: map[string]map[string]any{
"model1": {
"model": "should-be-filtered",
},
},
requestedModelID: "model1",
wantParams: nil,
wantKeys: nil,
},
{
name: "keys are sorted",
setParamsByID: map[string]map[string]any{
"model1": {
"z_param": "z",
"a_param": "a",
"m_param": "m",
},
},
requestedModelID: "model1",
wantParams: map[string]any{
"z_param": "z",
"a_param": "a",
"m_param": "m",
},
wantKeys: []string{"a_param", "m_param", "z_param"},
},
{
name: "alias style key lookup",
setParamsByID: map[string]map[string]any{
"model1:high": {"reasoning_effort": "high"},
"model1:low": {"reasoning_effort": "low"},
},
requestedModelID: "model1:high",
wantParams: map[string]any{
"reasoning_effort": "high",
},
wantKeys: []string{"reasoning_effort"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f := Filters{SetParamsByID: tt.setParamsByID}
gotParams, gotKeys := f.SanitizedSetParamsByID(tt.requestedModelID)
if tt.wantParams == nil {
assert.Nil(t, gotParams)
assert.Nil(t, gotKeys)
return
}
assert.Equal(t, tt.wantKeys, gotKeys)
assert.Equal(t, tt.wantParams, gotParams)
})
}
}
func TestProtectedParams(t *testing.T) { func TestProtectedParams(t *testing.T) {
// Verify that "model" is protected // Verify that "model" is protected
assert.Contains(t, ProtectedParams, "model") assert.Contains(t, ProtectedParams, "model")
+66
View File
@@ -73,6 +73,72 @@ models:
} }
} }
func TestConfig_SetParamsByIDAutoAlias(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
filters:
setParamsByID:
"${MODEL_ID}:high":
reasoning_effort: high
"${MODEL_ID}:low":
reasoning_effort: low
`
cfg, err := LoadConfigFromReader(strings.NewReader(content))
assert.NoError(t, err)
// Keys (other than the model's own ID) should be registered as aliases
realName, found := cfg.RealModelName("model1:high")
assert.True(t, found, "model1:high should be an auto-registered alias")
assert.Equal(t, "model1", realName)
realName, found = cfg.RealModelName("model1:low")
assert.True(t, found, "model1:low should be an auto-registered alias")
assert.Equal(t, "model1", realName)
// Auto-aliases should also appear in modelConfig.Aliases
aliases := cfg.Models["model1"].Aliases
assert.Contains(t, aliases, "model1:high")
assert.Contains(t, aliases, "model1:low")
}
func TestConfig_SetParamsByIDAutoAliasConflictWithModelID(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
filters:
setParamsByID:
model2:
reasoning_effort: high
model2:
cmd: path/to/cmd --port ${PORT}
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.ErrorContains(t, err, "conflicts with an existing model ID")
}
func TestConfig_SetParamsByIDAutoAliasConflictWithOtherModel(t *testing.T) {
content := `
models:
model1:
cmd: path/to/cmd --port ${PORT}
filters:
setParamsByID:
"shared-alias":
reasoning_effort: high
model2:
cmd: path/to/cmd --port ${PORT}
filters:
setParamsByID:
"shared-alias":
reasoning_effort: low
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.ErrorContains(t, err, "duplicate alias")
}
func TestConfig_ModelFiltersWithSetParams(t *testing.T) { func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
content := ` content := `
models: models:
+9
View File
@@ -8,6 +8,7 @@ const ConfigFileChangedEventID = 0x03
const LogDataEventID = 0x04 const LogDataEventID = 0x04
const TokenMetricsEventID = 0x05 const TokenMetricsEventID = 0x05
const ModelPreloadedEventID = 0x06 const ModelPreloadedEventID = 0x06
const InFlightRequestsEventID = 0x07
type ProcessStateChangeEvent struct { type ProcessStateChangeEvent struct {
ProcessName string ProcessName string
@@ -58,3 +59,11 @@ type ModelPreloadedEvent struct {
func (e ModelPreloadedEvent) Type() uint32 { func (e ModelPreloadedEvent) Type() uint32 {
return ModelPreloadedEventID return ModelPreloadedEventID
} }
type InFlightRequestsEvent struct {
Total int
}
func (e InFlightRequestsEvent) Type() uint32 {
return InFlightRequestsEventID
}
+76 -19
View File
@@ -28,6 +28,40 @@ const (
type proxyCtxKey string type proxyCtxKey string
type InflightCounter struct {
mu sync.Mutex
total int
}
func newInflightCounter() *InflightCounter {
return &InflightCounter{}
}
func (ic *InflightCounter) Current() int {
ic.mu.Lock()
total := ic.total
ic.mu.Unlock()
return total
}
func (ic *InflightCounter) Increment() int {
ic.mu.Lock()
ic.total++
total := ic.total
ic.mu.Unlock()
return total
}
func (ic *InflightCounter) Decrement() int {
ic.mu.Lock()
if ic.total > 0 {
ic.total--
}
total := ic.total
ic.mu.Unlock()
return total
}
type ProxyManager struct { type ProxyManager struct {
sync.Mutex sync.Mutex
@@ -43,6 +77,8 @@ type ProxyManager struct {
processGroups map[string]*ProcessGroup processGroups map[string]*ProcessGroup
inFlightCounter *InflightCounter
// shutdown signaling // shutdown signaling
shutdownCtx context.Context shutdownCtx context.Context
shutdownCancel context.CancelFunc shutdownCancel context.CancelFunc
@@ -155,6 +191,8 @@ func New(proxyConfig config.Config) *ProxyManager {
processGroups: make(map[string]*ProcessGroup), processGroups: make(map[string]*ProcessGroup),
inFlightCounter: newInflightCounter(),
shutdownCtx: shutdownCtx, shutdownCtx: shutdownCtx,
shutdownCancel: shutdownCancel, shutdownCancel: shutdownCancel,
@@ -276,37 +314,37 @@ func (pm *ProxyManager) setupGinEngine() {
// Set up routes using the Gin engine // Set up routes using the Gin engine
// Protected routes use pm.apiKeyAuth() middleware // Protected routes use pm.apiKeyAuth() middleware
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support legacy /v1/completions api, see issue #12 // Support legacy /v1/completions api, see issue #12
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570) // Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support anthropic count_tokens API (Also added in the above PR) // Support anthropic count_tokens API (Also added in the above PR)
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support embeddings and reranking // Support embeddings and reranking
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// llama-server's /reranking endpoint + aliases // llama-server's /reranking endpoint + aliases
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// llama-server's /infill endpoint for code infilling // llama-server's /infill endpoint for code infilling
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// llama-server's /completion endpoint // llama-server's /completion endpoint
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support audio/speech endpoint // Support audio/speech endpoint
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyGETModelHandler) pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler) pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler) pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler) pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
@@ -325,7 +363,7 @@ func (pm *ProxyManager) setupGinEngine() {
pm.ginEngine.GET("/upstream", func(c *gin.Context) { pm.ginEngine.GET("/upstream", func(c *gin.Context) {
c.Redirect(http.StatusFound, "/ui/models") c.Redirect(http.StatusFound, "/ui/models")
}) })
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.proxyToUpstream) pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyToUpstream)
pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler) pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler)
pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler) pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler)
pm.ginEngine.GET("/health", func(c *gin.Context) { pm.ginEngine.GET("/health", func(c *gin.Context) {
@@ -389,6 +427,14 @@ func (pm *ProxyManager) setupGinEngine() {
gin.DisableConsoleColor() gin.DisableConsoleColor()
} }
func (pm *ProxyManager) trackInflight() gin.HandlerFunc {
return func(c *gin.Context) {
event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Increment()})
defer event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Decrement()})
c.Next()
}
}
// ServeHTTP implements http.Handler interface // ServeHTTP implements http.Handler interface
func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) { func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
pm.ginEngine.ServeHTTP(w, r) pm.ginEngine.ServeHTTP(w, r)
@@ -674,6 +720,17 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
} }
} }
// setParamsByID: set params based on the requested model ID (runs after setParams, can override it)
setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel)
for _, key := range setParamsByIDKeys {
pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key)
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key])
if err != nil {
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
return
}
}
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel) pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
nextHandler = processGroup.ProxyRequest nextHandler = processGroup.ProxyRequest
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) { } else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
+29 -6
View File
@@ -14,12 +14,13 @@ import (
) )
type Model struct { type Model struct {
Id string `json:"id"` Id string `json:"id"`
Name string `json:"name"` Name string `json:"name"`
Description string `json:"description"` Description string `json:"description"`
State string `json:"state"` State string `json:"state"`
Unlisted bool `json:"unlisted"` Unlisted bool `json:"unlisted"`
PeerID string `json:"peerID"` PeerID string `json:"peerID"`
Aliases []string `json:"aliases,omitempty"`
} }
func addApiHandlers(pm *ProxyManager) { func addApiHandlers(pm *ProxyManager) {
@@ -83,6 +84,7 @@ func (pm *ProxyManager) getModelStatus() []Model {
Description: pm.config.Models[modelID].Description, Description: pm.config.Models[modelID].Description,
State: state, State: state,
Unlisted: pm.config.Models[modelID].Unlisted, Unlisted: pm.config.Models[modelID].Unlisted,
Aliases: pm.config.Models[modelID].Aliases,
}) })
} }
@@ -107,6 +109,7 @@ const (
msgTypeModelStatus messageType = "modelStatus" msgTypeModelStatus messageType = "modelStatus"
msgTypeLogData messageType = "logData" msgTypeLogData messageType = "logData"
msgTypeMetrics messageType = "metrics" msgTypeMetrics messageType = "metrics"
msgTypeInFlight messageType = "inflight"
) )
type messageEnvelope struct { type messageEnvelope struct {
@@ -166,6 +169,18 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
} }
} }
sendInFlight := func(total int) {
jsonData, err := json.Marshal(gin.H{"total": total})
if err == nil {
select {
case sendBuffer <- messageEnvelope{Type: msgTypeInFlight, Data: string(jsonData)}:
case <-ctx.Done():
return
default:
}
}
}
/** /**
* Send updated models list * Send updated models list
*/ */
@@ -193,11 +208,19 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
sendMetrics([]TokenMetrics{e.Metrics}) sendMetrics([]TokenMetrics{e.Metrics})
})() })()
/**
* Send in-flight request stats related to token stats "Waiting: N" count.
*/
defer event.On(func(e InFlightRequestsEvent) {
sendInFlight(e.Total)
})()
// send initial batch of data // send initial batch of data
sendLogData("proxy", pm.proxyLogger.GetHistory()) sendLogData("proxy", pm.proxyLogger.GetHistory())
sendLogData("upstream", pm.upstreamLogger.GetHistory()) sendLogData("upstream", pm.upstreamLogger.GetHistory())
sendModels() sendModels()
sendMetrics(pm.metricsMonitor.getMetrics()) sendMetrics(pm.metricsMonitor.getMetrics())
sendInFlight(pm.inFlightCounter.Current())
for { for {
select { select {
+55
View File
@@ -1046,6 +1046,61 @@ func TestProxyManager_FiltersStripParams(t *testing.T) {
// t.Logf("%v", response) // t.Logf("%v", response)
} }
func TestProxyManager_FiltersSetParamsByID(t *testing.T) {
// no explicit aliases — setParamsByID keys are auto-registered as aliases
configStr := strings.Replace(`
logLevel: error
models:
model1:
cmd: 'SRPATH --port ${PORT} --silent --respond model1'
proxy: "http://127.0.0.1:${PORT}"
filters:
setParams:
reasoning_effort: medium
setParamsByID:
"${MODEL_ID}:high":
reasoning_effort: high
"${MODEL_ID}:low":
reasoning_effort: low
`, "SRPATH", simpleResponderPath, -1)
cfg, err := config.LoadConfigFromReader(strings.NewReader(configStr))
if !assert.NoError(t, err, "invalid test configuration") {
return
}
proxy := New(cfg)
defer proxy.StopProcesses(StopWaitForInflightRequest)
tests := []struct {
requestedModel string
wantEffort string
}{
// setParams applies, no setParamsByID match
{requestedModel: "model1", wantEffort: "medium"},
// setParamsByID overrides setParams
{requestedModel: "model1:high", wantEffort: "high"},
{requestedModel: "model1:low", wantEffort: "low"},
}
for _, tt := range tests {
t.Run(tt.requestedModel, func(t *testing.T) {
reqBody := fmt.Sprintf(`{"model":%q}`, tt.requestedModel)
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
w := CreateTestResponseRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
var response map[string]interface{}
assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response))
requestBody, _ := response["request_body"].(string)
gotEffort := gjson.Get(requestBody, "reasoning_effort").String()
assert.Equal(t, tt.wantEffort, gotEffort, "reasoning_effort mismatch for model %s", tt.requestedModel)
})
}
}
func TestProxyManager_HealthEndpoint(t *testing.T) { func TestProxyManager_HealthEndpoint(t *testing.T) {
config := config.AddDefaultGroupToConfig(config.Config{ config := config.AddDefaultGroupToConfig(config.Config{
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
+10 -3
View File
@@ -65,10 +65,17 @@
}); });
let preElement: HTMLPreElement; let preElement: HTMLPreElement;
let userScrolledUp = $state(false);
// Auto scroll to bottom when logs change function handleScroll() {
if (!preElement) return;
const { scrollTop, scrollHeight, clientHeight } = preElement;
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
}
// Auto scroll to bottom when logs change, unless user has scrolled up
$effect(() => { $effect(() => {
if (preElement && filteredLogs) { if (preElement && filteredLogs && !userScrolledUp) {
preElement.scrollTop = preElement.scrollHeight; preElement.scrollTop = preElement.scrollHeight;
} }
}); });
@@ -127,6 +134,6 @@
{/if} {/if}
</div> </div>
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden"> <div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
<pre bind:this={preElement} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre> <pre bind:this={preElement} onscroll={handleScroll} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
</div> </div>
</div> </div>
@@ -165,6 +165,9 @@
{#if model.description} {#if model.description}
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p> <p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
{/if} {/if}
{#if model.aliases && model.aliases.length > 0}
<p class="text-xs text-txtsecondary">Aliases: {model.aliases.join(", ")}</p>
{/if}
</td> </td>
<td class="w-12"> <td class="w-12">
{#if model.state === "stopped"} {#if model.state === "stopped"}
+24 -4
View File
@@ -1,5 +1,5 @@
<script lang="ts"> <script lang="ts">
import { metrics } from "../stores/api"; import { inFlightRequests, metrics } from "../stores/api";
import TokenHistogram from "./TokenHistogram.svelte"; import TokenHistogram from "./TokenHistogram.svelte";
interface HistogramData { interface HistogramData {
@@ -15,7 +15,14 @@
let stats = $derived.by(() => { let stats = $derived.by(() => {
const totalRequests = $metrics.length; const totalRequests = $metrics.length;
if (totalRequests === 0) { if (totalRequests === 0) {
return { totalRequests: 0, totalInputTokens: 0, totalOutputTokens: 0, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null }; return {
totalRequests: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
inFlightRequests: $inFlightRequests,
tokenStats: { p99: "0", p95: "0", p50: "0" },
histogramData: null,
};
} }
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0); const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
@@ -24,7 +31,14 @@
// Calculate token statistics using output_tokens and duration_ms // Calculate token statistics using output_tokens and duration_ms
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0); const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
if (validMetrics.length === 0) { if (validMetrics.length === 0) {
return { totalRequests, totalInputTokens, totalOutputTokens, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null }; return {
totalRequests,
totalInputTokens,
totalOutputTokens,
inFlightRequests: $inFlightRequests,
tokenStats: { p99: "0", p95: "0", p50: "0" },
histogramData: null,
};
} }
// Calculate tokens/second for each valid metric // Calculate tokens/second for each valid metric
@@ -63,6 +77,7 @@
totalRequests, totalRequests,
totalInputTokens, totalInputTokens,
totalOutputTokens, totalOutputTokens,
inFlightRequests: $inFlightRequests,
tokenStats: { tokenStats: {
p99: p99.toFixed(2), p99: p99.toFixed(2),
p95: p95.toFixed(2), p95: p95.toFixed(2),
@@ -95,7 +110,12 @@
<tbody class="bg-surface divide-y divide-card-border-inner"> <tbody class="bg-surface divide-y divide-card-border-inner">
<tr class="hover:bg-secondary"> <tr class="hover:bg-secondary">
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">{stats.totalRequests}</td> <td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">
<div class="flex flex-col gap-1">
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Completed: {nf.format(stats.totalRequests)}</span>
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Waiting: {nf.format(stats.inFlightRequests)}</span>
</div>
</td>
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10"> <td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
<div class="flex items-center gap-2"> <div class="flex items-center gap-2">
@@ -25,6 +25,11 @@
<optgroup label="Local"> <optgroup label="Local">
{#each grouped.local as model (model.id)} {#each grouped.local as model (model.id)}
<option value={model.id}>{model.id}</option> <option value={model.id}>{model.id}</option>
{#if model.aliases}
{#each model.aliases as alias (alias)}
<option value={alias}> {alias}</option>
{/each}
{/if}
{/each} {/each}
</optgroup> </optgroup>
{/if} {/if}
+6 -1
View File
@@ -9,6 +9,7 @@ export interface Model {
description: string; description: string;
unlisted: boolean; unlisted: boolean;
peerID: string; peerID: string;
aliases?: string[];
} }
export interface Metrics { export interface Metrics {
@@ -38,8 +39,12 @@ export interface LogData {
data: string; data: string;
} }
export interface InFlightStats {
total: number;
}
export interface APIEventEnvelope { export interface APIEventEnvelope {
type: "modelStatus" | "logData" | "metrics"; type: "modelStatus" | "logData" | "metrics" | "inflight";
data: string; data: string;
} }
+9 -1
View File
@@ -1,5 +1,5 @@
import { writable } from "svelte/store"; import { writable } from "svelte/store";
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture } from "../lib/types"; import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture, InFlightStats } from "../lib/types";
import { connectionState } from "./theme"; import { connectionState } from "./theme";
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */ const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
@@ -9,6 +9,7 @@ export const models = writable<Model[]>([]);
export const proxyLogs = writable<string>(""); export const proxyLogs = writable<string>("");
export const upstreamLogs = writable<string>(""); export const upstreamLogs = writable<string>("");
export const metrics = writable<Metrics[]>([]); export const metrics = writable<Metrics[]>([]);
export const inFlightRequests = writable<number>(0);
export const versionInfo = writable<VersionInfo>({ export const versionInfo = writable<VersionInfo>({
build_date: "unknown", build_date: "unknown",
commit: "unknown", commit: "unknown",
@@ -29,6 +30,7 @@ export function enableAPIEvents(enabled: boolean): void {
apiEventSource?.close(); apiEventSource?.close();
apiEventSource = null; apiEventSource = null;
metrics.set([]); metrics.set([]);
inFlightRequests.set(0);
return; return;
} }
@@ -46,6 +48,7 @@ export function enableAPIEvents(enabled: boolean): void {
proxyLogs.set(""); proxyLogs.set("");
upstreamLogs.set(""); upstreamLogs.set("");
metrics.set([]); metrics.set([]);
inFlightRequests.set(0);
models.set([]); models.set([]);
retryCount = 0; retryCount = 0;
connectionState.set("connected"); connectionState.set("connected");
@@ -83,6 +86,11 @@ export function enableAPIEvents(enabled: boolean): void {
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]); metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
break; break;
} }
case "inflight": {
const stats = JSON.parse(message.data) as InFlightStats;
inFlightRequests.set(stats.total ?? 0);
break;
}
} }
} catch (err) { } catch (err) {
console.error(e.data, err); console.error(e.data, err);