From 19fb5f35e937fc0083c6c56bf74e19fc44334fff Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Thu, 19 Feb 2026 22:21:10 -0800 Subject: [PATCH] proxy: implement setParamsByID filter (#535) Add setParamsByID filter that applies different request parameters based on the requested model ID, enabling per-alias behaviour for a single loaded model. - add SetParamsByID field to Filters struct and SanitizedSetParamsByID method - substitute ${MODEL_ID} and other macros in setParamsByID keys and values - validate no unknown macros remain in keys or values after substitution - apply setParamsByID in proxyInferenceHandler after setParams (can override it) - update config-schema.json with setParamsByID definition - update UI to show aliases and make them selectable in the Playground closes #534 --- config-schema.json | 11 +- config.example.yaml | 45 +++++-- proxy/config/config.go | 46 +++++++ proxy/config/filters.go | 33 +++++ proxy/config/filters_test.go | 117 ++++++++++++++++++ proxy/config/model_config_test.go | 66 ++++++++++ proxy/proxymanager.go | 11 ++ proxy/proxymanager_api.go | 14 ++- proxy/proxymanager_test.go | 55 ++++++++ ui-svelte/src/components/ModelsPanel.svelte | 3 + .../playground/ModelSelector.svelte | 5 + ui-svelte/src/lib/types.ts | 1 + 12 files changed, 387 insertions(+), 20 deletions(-) diff --git a/config-schema.json b/config-schema.json index 3de95fc2..58613ced 100644 --- a/config-schema.json +++ b/config-schema.json @@ -200,11 +200,20 @@ "additionalProperties": true, "default": {}, "description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects." + }, + "setParamsByID": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": true + }, + "default": {}, + "description": "Dictionary mapping requested model IDs (or aliases) to parameters to set/override in requests. Applied after setParams and can override those values. Useful with aliases to vary behaviour depending on which alias the client used (e.g. different reasoning_effort per alias). Keys support ${MODEL_ID} macro substitution. Protected params like 'model' cannot be overridden." } }, "additionalProperties": false, "default": {}, - "description": "Dictionary of filter settings. Supports stripParams and setParams." + "description": "Dictionary of filter settings. Supports stripParams, setParams, and setParamsByID." }, "metadata": { "type": "object", diff --git a/config.example.yaml b/config.example.yaml index fc1fee88..cc076fa6 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -126,7 +126,7 @@ apiKeys: # - below are examples of the all the settings a model can have models: # keys are the model names used in API requests - "llama": + "gpt-oss-120b": # macros: a dictionary of string substitutions specific to this model # - optional, default: empty dictionary # - macros defined here override macros defined in the global macros section @@ -143,7 +143,7 @@ models: cmd: | # ${latest-llama} is a macro that is defined above ${latest-llama} - --model path/to/llama-8B-Q4_K_M.gguf + --model path/to/gpt-oss-120B.gguf --ctx-size ${default_ctx} --temperature ${temp} @@ -151,13 +151,13 @@ models: # - optional, default: empty string # - if set, it will be used in the v1/models API response # - if not set, it will be omitted in the JSON model record - name: "llama 3.1 8B" + name: "gpt-oss 120B" # description: a description for the model # - optional, default: empty string # - if set, it will be used in the v1/models API response # - if not set, it will be omitted in the JSON model record - description: "A small but capable model used for quick testing" + description: "A thinking model from OpenAI" # env: define an array of environment variables to inject into cmd's environment # - optional, default: empty array @@ -172,14 +172,6 @@ models: # - if you use a custom port in cmd this *must* be set proxy: http://127.0.0.1:8999 - # aliases: alternative model names that this model configuration is used for - # - optional, default: empty array - # - aliases must be unique globally - # - useful for impersonating a specific model - aliases: - - "gpt-4o-mini" - - "gpt-3.5-turbo" - # checkEndpoint: URL path to check if the server is ready # - optional, default: /health # - endpoint is expected to return an HTTP 200 response @@ -197,7 +189,7 @@ models: # - optional, default: "" # - useful for when the upstream server expects a specific model name that # is different from the model's ID - useModelName: "qwen:qwq" + useModelName: "openai/gpt-oss-120B" # filters: a dictionary of filter settings # - optional, default: empty dictionary @@ -216,11 +208,38 @@ models: # - useful for enforcing specific parameter values # - protected params like "model" cannot be overridden # - values can be strings, numbers, booleans, arrays, or objects + # - always runs for the model setParams: # Example: enforce specific sampling parameters temperature: 0.7 top_p: 0.9 + # setParamsByID: a dictionary of parameters to set based the model ID + # - optional, default: empty dictionary + # - combine with aliases to create variant behaviour without reloading the model + # - parameters are set in the request body JSON + # - run after setParams so it will override any settings + # - protected params like "model" cannot be overridden + # - values can be strings, numbers, booleans, arrays, or objects + # - model aliases will be automatically created for each key + setParamsByID: + "${MODEL_ID}": + chat_template_kwargs: + reasoning_effort: medium + "${MODEL_ID}:high": + chat_template_kwargs: + reasoning_effort: high + "${MODEL_ID}:low": + chat_template_kwargs: + reasoning_effort: low + + # aliases: alternative model names that this model configuration is used for + # - optional, default: empty array + # - aliases must be unique globally + # - useful for impersonating a specific model + aliases: + - "gpt-4o-mini" + # metadata: a dictionary of arbitrary values that are included in /v1/models # - optional, default: empty dictionary # - while metadata can contains complex types it is recommended to keep it simple diff --git a/proxy/config/config.go b/proxy/config/config.go index ed74110c..4d1e6818 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -294,6 +294,24 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr) modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr) + // Substitute macros in SetParamsByID keys and values + if len(modelConfig.Filters.SetParamsByID) > 0 { + newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID)) + for key, paramMap := range modelConfig.Filters.SetParamsByID { + newKey := strings.ReplaceAll(key, macroSlug, macroStr) + newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value) + if err != nil { + return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error()) + } + newParamMap, ok := newValAny.(map[string]any) + if !ok { + return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId) + } + newSetParamsByID[newKey] = newParamMap + } + modelConfig.Filters.SetParamsByID = newSetParamsByID + } + // Substitute in metadata (type-preserving) if len(modelConfig.Metadata) > 0 { result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value) @@ -359,6 +377,34 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { } } + // Validate SetParamsByID keys and values + for key, paramMap := range modelConfig.Filters.SetParamsByID { + if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 { + return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId) + } + if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil { + return Config{}, err + } + } + + // Auto-register setParamsByID keys as aliases (skip the model's own ID) + for key := range modelConfig.Filters.SetParamsByID { + if key == modelId { + continue + } + if _, exists := config.Models[key]; exists { + return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key) + } + if existingModel, exists := config.aliases[key]; exists { + if existingModel != modelId { + return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel) + } + continue // already registered as explicit alias for this model + } + config.aliases[key] = modelId + modelConfig.Aliases = append(modelConfig.Aliases, key) + } + if _, err := url.Parse(modelConfig.Proxy); err != nil { return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err) } diff --git a/proxy/config/filters.go b/proxy/config/filters.go index 39900075..2591be21 100644 --- a/proxy/config/filters.go +++ b/proxy/config/filters.go @@ -20,6 +20,12 @@ type Filters struct { // SetParams is a dictionary of parameters to set/override in requests // Protected params (like "model") cannot be set SetParams map[string]any `yaml:"setParams"` + + // SetParamsByID maps requested model IDs to parameters to set/override in requests. + // Useful with aliases: a single loaded model can behave differently depending on + // which alias the client used. Applied after SetParams, so it can override those values. + // Protected params (like "model") cannot be set. + SetParamsByID map[string]map[string]any `yaml:"setParamsByID"` } // SanitizedStripParams returns a sorted list of parameters to strip, @@ -51,6 +57,33 @@ func (f Filters) SanitizedStripParams() []string { return cleaned } +// SanitizedSetParamsByID returns the params to set for the given requestedModelID, +// with protected params removed and keys sorted for consistent iteration order. +// Returns nil if the ID has no entry or all its params are protected. +func (f Filters) SanitizedSetParamsByID(requestedModelID string) (map[string]any, []string) { + if len(f.SetParamsByID) == 0 { + return nil, nil + } + params, found := f.SetParamsByID[requestedModelID] + if !found || len(params) == 0 { + return nil, nil + } + result := make(map[string]any, len(params)) + keys := make([]string, 0, len(params)) + for key, value := range params { + if slices.Contains(ProtectedParams, key) { + continue + } + result[key] = value + keys = append(keys, key) + } + sort.Strings(keys) + if len(result) == 0 { + return nil, nil + } + return result, keys +} + // SanitizedSetParams returns a copy of SetParams with protected params removed // and keys sorted for consistent iteration order func (f Filters) SanitizedSetParams() (map[string]any, []string) { diff --git a/proxy/config/filters_test.go b/proxy/config/filters_test.go index d1f54dcd..83b5f46d 100644 --- a/proxy/config/filters_test.go +++ b/proxy/config/filters_test.go @@ -162,6 +162,123 @@ func TestFilters_SanitizedSetParams(t *testing.T) { } } +func TestFilters_SanitizedSetParamsByID(t *testing.T) { + tests := []struct { + name string + setParamsByID map[string]map[string]any + requestedModelID string + wantParams map[string]any + wantKeys []string + }{ + { + name: "empty SetParamsByID returns nil", + setParamsByID: nil, + requestedModelID: "model1", + wantParams: nil, + wantKeys: nil, + }, + { + name: "empty map returns nil", + setParamsByID: map[string]map[string]any{}, + requestedModelID: "model1", + wantParams: nil, + wantKeys: nil, + }, + { + name: "non-matching model ID returns nil", + setParamsByID: map[string]map[string]any{ + "model2": {"temperature": 0.9}, + }, + requestedModelID: "model1", + wantParams: nil, + wantKeys: nil, + }, + { + name: "matching model ID returns correct params", + setParamsByID: map[string]map[string]any{ + "model1": {"temperature": 0.7, "top_p": 0.9}, + "model2": {"temperature": 0.5}, + }, + requestedModelID: "model1", + wantParams: map[string]any{ + "temperature": 0.7, + "top_p": 0.9, + }, + wantKeys: []string{"temperature", "top_p"}, + }, + { + name: "protected param model is filtered out", + setParamsByID: map[string]map[string]any{ + "model1": { + "model": "should-be-filtered", + "temperature": 0.7, + }, + }, + requestedModelID: "model1", + wantParams: map[string]any{ + "temperature": 0.7, + }, + wantKeys: []string{"temperature"}, + }, + { + name: "only protected param returns nil", + setParamsByID: map[string]map[string]any{ + "model1": { + "model": "should-be-filtered", + }, + }, + requestedModelID: "model1", + wantParams: nil, + wantKeys: nil, + }, + { + name: "keys are sorted", + setParamsByID: map[string]map[string]any{ + "model1": { + "z_param": "z", + "a_param": "a", + "m_param": "m", + }, + }, + requestedModelID: "model1", + wantParams: map[string]any{ + "z_param": "z", + "a_param": "a", + "m_param": "m", + }, + wantKeys: []string{"a_param", "m_param", "z_param"}, + }, + { + name: "alias style key lookup", + setParamsByID: map[string]map[string]any{ + "model1:high": {"reasoning_effort": "high"}, + "model1:low": {"reasoning_effort": "low"}, + }, + requestedModelID: "model1:high", + wantParams: map[string]any{ + "reasoning_effort": "high", + }, + wantKeys: []string{"reasoning_effort"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := Filters{SetParamsByID: tt.setParamsByID} + gotParams, gotKeys := f.SanitizedSetParamsByID(tt.requestedModelID) + + if tt.wantParams == nil { + assert.Nil(t, gotParams) + assert.Nil(t, gotKeys) + return + } + + assert.Equal(t, tt.wantKeys, gotKeys) + assert.Equal(t, tt.wantParams, gotParams) + }) + } +} + func TestProtectedParams(t *testing.T) { // Verify that "model" is protected assert.Contains(t, ProtectedParams, "model") diff --git a/proxy/config/model_config_test.go b/proxy/config/model_config_test.go index 32392952..7c76421f 100644 --- a/proxy/config/model_config_test.go +++ b/proxy/config/model_config_test.go @@ -73,6 +73,72 @@ models: } } +func TestConfig_SetParamsByIDAutoAlias(t *testing.T) { + content := ` +models: + model1: + cmd: path/to/cmd --port ${PORT} + filters: + setParamsByID: + "${MODEL_ID}:high": + reasoning_effort: high + "${MODEL_ID}:low": + reasoning_effort: low +` + cfg, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NoError(t, err) + + // Keys (other than the model's own ID) should be registered as aliases + realName, found := cfg.RealModelName("model1:high") + assert.True(t, found, "model1:high should be an auto-registered alias") + assert.Equal(t, "model1", realName) + + realName, found = cfg.RealModelName("model1:low") + assert.True(t, found, "model1:low should be an auto-registered alias") + assert.Equal(t, "model1", realName) + + // Auto-aliases should also appear in modelConfig.Aliases + aliases := cfg.Models["model1"].Aliases + assert.Contains(t, aliases, "model1:high") + assert.Contains(t, aliases, "model1:low") +} + +func TestConfig_SetParamsByIDAutoAliasConflictWithModelID(t *testing.T) { + content := ` +models: + model1: + cmd: path/to/cmd --port ${PORT} + filters: + setParamsByID: + model2: + reasoning_effort: high + model2: + cmd: path/to/cmd --port ${PORT} +` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.ErrorContains(t, err, "conflicts with an existing model ID") +} + +func TestConfig_SetParamsByIDAutoAliasConflictWithOtherModel(t *testing.T) { + content := ` +models: + model1: + cmd: path/to/cmd --port ${PORT} + filters: + setParamsByID: + "shared-alias": + reasoning_effort: high + model2: + cmd: path/to/cmd --port ${PORT} + filters: + setParamsByID: + "shared-alias": + reasoning_effort: low +` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.ErrorContains(t, err, "duplicate alias") +} + func TestConfig_ModelFiltersWithSetParams(t *testing.T) { content := ` models: diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 358d23df..c5042bab 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -720,6 +720,17 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) { } } + // setParamsByID: set params based on the requested model ID (runs after setParams, can override it) + setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel) + for _, key := range setParamsByIDKeys { + pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key) + bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key]) + if err != nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key)) + return + } + } + pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel) nextHandler = processGroup.ProxyRequest } else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) { diff --git a/proxy/proxymanager_api.go b/proxy/proxymanager_api.go index 0e660d03..00897c65 100644 --- a/proxy/proxymanager_api.go +++ b/proxy/proxymanager_api.go @@ -14,12 +14,13 @@ import ( ) type Model struct { - Id string `json:"id"` - Name string `json:"name"` - Description string `json:"description"` - State string `json:"state"` - Unlisted bool `json:"unlisted"` - PeerID string `json:"peerID"` + Id string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + State string `json:"state"` + Unlisted bool `json:"unlisted"` + PeerID string `json:"peerID"` + Aliases []string `json:"aliases,omitempty"` } func addApiHandlers(pm *ProxyManager) { @@ -83,6 +84,7 @@ func (pm *ProxyManager) getModelStatus() []Model { Description: pm.config.Models[modelID].Description, State: state, Unlisted: pm.config.Models[modelID].Unlisted, + Aliases: pm.config.Models[modelID].Aliases, }) } diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go index 652f2e16..a147e5ea 100644 --- a/proxy/proxymanager_test.go +++ b/proxy/proxymanager_test.go @@ -1046,6 +1046,61 @@ func TestProxyManager_FiltersStripParams(t *testing.T) { // t.Logf("%v", response) } +func TestProxyManager_FiltersSetParamsByID(t *testing.T) { + // no explicit aliases — setParamsByID keys are auto-registered as aliases + configStr := strings.Replace(` +logLevel: error +models: + model1: + cmd: 'SRPATH --port ${PORT} --silent --respond model1' + proxy: "http://127.0.0.1:${PORT}" + filters: + setParams: + reasoning_effort: medium + setParamsByID: + "${MODEL_ID}:high": + reasoning_effort: high + "${MODEL_ID}:low": + reasoning_effort: low +`, "SRPATH", simpleResponderPath, -1) + + cfg, err := config.LoadConfigFromReader(strings.NewReader(configStr)) + if !assert.NoError(t, err, "invalid test configuration") { + return + } + + proxy := New(cfg) + defer proxy.StopProcesses(StopWaitForInflightRequest) + + tests := []struct { + requestedModel string + wantEffort string + }{ + // setParams applies, no setParamsByID match + {requestedModel: "model1", wantEffort: "medium"}, + // setParamsByID overrides setParams + {requestedModel: "model1:high", wantEffort: "high"}, + {requestedModel: "model1:low", wantEffort: "low"}, + } + + for _, tt := range tests { + t.Run(tt.requestedModel, func(t *testing.T) { + reqBody := fmt.Sprintf(`{"model":%q}`, tt.requestedModel) + req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody)) + w := CreateTestResponseRecorder() + proxy.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + + var response map[string]interface{} + assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response)) + + requestBody, _ := response["request_body"].(string) + gotEffort := gjson.Get(requestBody, "reasoning_effort").String() + assert.Equal(t, tt.wantEffort, gotEffort, "reasoning_effort mismatch for model %s", tt.requestedModel) + }) + } +} + func TestProxyManager_HealthEndpoint(t *testing.T) { config := config.AddDefaultGroupToConfig(config.Config{ HealthCheckTimeout: 15, diff --git a/ui-svelte/src/components/ModelsPanel.svelte b/ui-svelte/src/components/ModelsPanel.svelte index b4115e47..090befe0 100644 --- a/ui-svelte/src/components/ModelsPanel.svelte +++ b/ui-svelte/src/components/ModelsPanel.svelte @@ -165,6 +165,9 @@ {#if model.description}

{model.description}

{/if} + {#if model.aliases && model.aliases.length > 0} +

Aliases: {model.aliases.join(", ")}

+ {/if} {#if model.state === "stopped"} diff --git a/ui-svelte/src/components/playground/ModelSelector.svelte b/ui-svelte/src/components/playground/ModelSelector.svelte index e8d9b51c..21172b24 100644 --- a/ui-svelte/src/components/playground/ModelSelector.svelte +++ b/ui-svelte/src/components/playground/ModelSelector.svelte @@ -25,6 +25,11 @@ {#each grouped.local as model (model.id)} + {#if model.aliases} + {#each model.aliases as alias (alias)} + + {/each} + {/if} {/each} {/if} diff --git a/ui-svelte/src/lib/types.ts b/ui-svelte/src/lib/types.ts index 84fc9f98..890f41b0 100644 --- a/ui-svelte/src/lib/types.ts +++ b/ui-svelte/src/lib/types.ts @@ -9,6 +9,7 @@ export interface Model { description: string; unlisted: boolean; peerID: string; + aliases?: string[]; } export interface Metrics {