proxy: implement setParamsByID filter (#535)
Add setParamsByID filter that applies different request parameters based
on the requested model ID, enabling per-alias behaviour for a single
loaded model.
- add SetParamsByID field to Filters struct and SanitizedSetParamsByID
method
- substitute ${MODEL_ID} and other macros in setParamsByID keys and
values
- validate no unknown macros remain in keys or values after substitution
- apply setParamsByID in proxyInferenceHandler after setParams (can
override it)
- update config-schema.json with setParamsByID definition
- update UI to show aliases and make them selectable in the Playground
closes #534
This commit is contained in:
+10
-1
@@ -200,11 +200,20 @@
|
|||||||
"additionalProperties": true,
|
"additionalProperties": true,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
||||||
|
},
|
||||||
|
"setParamsByID": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"default": {},
|
||||||
|
"description": "Dictionary mapping requested model IDs (or aliases) to parameters to set/override in requests. Applied after setParams and can override those values. Useful with aliases to vary behaviour depending on which alias the client used (e.g. different reasoning_effort per alias). Keys support ${MODEL_ID} macro substitution. Protected params like 'model' cannot be overridden."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of filter settings. Supports stripParams and setParams."
|
"description": "Dictionary of filter settings. Supports stripParams, setParams, and setParamsByID."
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|||||||
+32
-13
@@ -126,7 +126,7 @@ apiKeys:
|
|||||||
# - below are examples of the all the settings a model can have
|
# - below are examples of the all the settings a model can have
|
||||||
models:
|
models:
|
||||||
# keys are the model names used in API requests
|
# keys are the model names used in API requests
|
||||||
"llama":
|
"gpt-oss-120b":
|
||||||
# macros: a dictionary of string substitutions specific to this model
|
# macros: a dictionary of string substitutions specific to this model
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros defined here override macros defined in the global macros section
|
# - macros defined here override macros defined in the global macros section
|
||||||
@@ -143,7 +143,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
# ${latest-llama} is a macro that is defined above
|
# ${latest-llama} is a macro that is defined above
|
||||||
${latest-llama}
|
${latest-llama}
|
||||||
--model path/to/llama-8B-Q4_K_M.gguf
|
--model path/to/gpt-oss-120B.gguf
|
||||||
--ctx-size ${default_ctx}
|
--ctx-size ${default_ctx}
|
||||||
--temperature ${temp}
|
--temperature ${temp}
|
||||||
|
|
||||||
@@ -151,13 +151,13 @@ models:
|
|||||||
# - optional, default: empty string
|
# - optional, default: empty string
|
||||||
# - if set, it will be used in the v1/models API response
|
# - if set, it will be used in the v1/models API response
|
||||||
# - if not set, it will be omitted in the JSON model record
|
# - if not set, it will be omitted in the JSON model record
|
||||||
name: "llama 3.1 8B"
|
name: "gpt-oss 120B"
|
||||||
|
|
||||||
# description: a description for the model
|
# description: a description for the model
|
||||||
# - optional, default: empty string
|
# - optional, default: empty string
|
||||||
# - if set, it will be used in the v1/models API response
|
# - if set, it will be used in the v1/models API response
|
||||||
# - if not set, it will be omitted in the JSON model record
|
# - if not set, it will be omitted in the JSON model record
|
||||||
description: "A small but capable model used for quick testing"
|
description: "A thinking model from OpenAI"
|
||||||
|
|
||||||
# env: define an array of environment variables to inject into cmd's environment
|
# env: define an array of environment variables to inject into cmd's environment
|
||||||
# - optional, default: empty array
|
# - optional, default: empty array
|
||||||
@@ -172,14 +172,6 @@ models:
|
|||||||
# - if you use a custom port in cmd this *must* be set
|
# - if you use a custom port in cmd this *must* be set
|
||||||
proxy: http://127.0.0.1:8999
|
proxy: http://127.0.0.1:8999
|
||||||
|
|
||||||
# aliases: alternative model names that this model configuration is used for
|
|
||||||
# - optional, default: empty array
|
|
||||||
# - aliases must be unique globally
|
|
||||||
# - useful for impersonating a specific model
|
|
||||||
aliases:
|
|
||||||
- "gpt-4o-mini"
|
|
||||||
- "gpt-3.5-turbo"
|
|
||||||
|
|
||||||
# checkEndpoint: URL path to check if the server is ready
|
# checkEndpoint: URL path to check if the server is ready
|
||||||
# - optional, default: /health
|
# - optional, default: /health
|
||||||
# - endpoint is expected to return an HTTP 200 response
|
# - endpoint is expected to return an HTTP 200 response
|
||||||
@@ -197,7 +189,7 @@ models:
|
|||||||
# - optional, default: ""
|
# - optional, default: ""
|
||||||
# - useful for when the upstream server expects a specific model name that
|
# - useful for when the upstream server expects a specific model name that
|
||||||
# is different from the model's ID
|
# is different from the model's ID
|
||||||
useModelName: "qwen:qwq"
|
useModelName: "openai/gpt-oss-120B"
|
||||||
|
|
||||||
# filters: a dictionary of filter settings
|
# filters: a dictionary of filter settings
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
@@ -216,11 +208,38 @@ models:
|
|||||||
# - useful for enforcing specific parameter values
|
# - useful for enforcing specific parameter values
|
||||||
# - protected params like "model" cannot be overridden
|
# - protected params like "model" cannot be overridden
|
||||||
# - values can be strings, numbers, booleans, arrays, or objects
|
# - values can be strings, numbers, booleans, arrays, or objects
|
||||||
|
# - always runs for the model
|
||||||
setParams:
|
setParams:
|
||||||
# Example: enforce specific sampling parameters
|
# Example: enforce specific sampling parameters
|
||||||
temperature: 0.7
|
temperature: 0.7
|
||||||
top_p: 0.9
|
top_p: 0.9
|
||||||
|
|
||||||
|
# setParamsByID: a dictionary of parameters to set based the model ID
|
||||||
|
# - optional, default: empty dictionary
|
||||||
|
# - combine with aliases to create variant behaviour without reloading the model
|
||||||
|
# - parameters are set in the request body JSON
|
||||||
|
# - run after setParams so it will override any settings
|
||||||
|
# - protected params like "model" cannot be overridden
|
||||||
|
# - values can be strings, numbers, booleans, arrays, or objects
|
||||||
|
# - model aliases will be automatically created for each key
|
||||||
|
setParamsByID:
|
||||||
|
"${MODEL_ID}":
|
||||||
|
chat_template_kwargs:
|
||||||
|
reasoning_effort: medium
|
||||||
|
"${MODEL_ID}:high":
|
||||||
|
chat_template_kwargs:
|
||||||
|
reasoning_effort: high
|
||||||
|
"${MODEL_ID}:low":
|
||||||
|
chat_template_kwargs:
|
||||||
|
reasoning_effort: low
|
||||||
|
|
||||||
|
# aliases: alternative model names that this model configuration is used for
|
||||||
|
# - optional, default: empty array
|
||||||
|
# - aliases must be unique globally
|
||||||
|
# - useful for impersonating a specific model
|
||||||
|
aliases:
|
||||||
|
- "gpt-4o-mini"
|
||||||
|
|
||||||
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - while metadata can contains complex types it is recommended to keep it simple
|
# - while metadata can contains complex types it is recommended to keep it simple
|
||||||
|
|||||||
@@ -294,6 +294,24 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||||
|
|
||||||
|
// Substitute macros in SetParamsByID keys and values
|
||||||
|
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
||||||
|
newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID))
|
||||||
|
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||||
|
newKey := strings.ReplaceAll(key, macroSlug, macroStr)
|
||||||
|
newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value)
|
||||||
|
if err != nil {
|
||||||
|
return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error())
|
||||||
|
}
|
||||||
|
newParamMap, ok := newValAny.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId)
|
||||||
|
}
|
||||||
|
newSetParamsByID[newKey] = newParamMap
|
||||||
|
}
|
||||||
|
modelConfig.Filters.SetParamsByID = newSetParamsByID
|
||||||
|
}
|
||||||
|
|
||||||
// Substitute in metadata (type-preserving)
|
// Substitute in metadata (type-preserving)
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value)
|
result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value)
|
||||||
@@ -359,6 +377,34 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate SetParamsByID keys and values
|
||||||
|
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||||
|
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
|
||||||
|
return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId)
|
||||||
|
}
|
||||||
|
if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil {
|
||||||
|
return Config{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-register setParamsByID keys as aliases (skip the model's own ID)
|
||||||
|
for key := range modelConfig.Filters.SetParamsByID {
|
||||||
|
if key == modelId {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := config.Models[key]; exists {
|
||||||
|
return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key)
|
||||||
|
}
|
||||||
|
if existingModel, exists := config.aliases[key]; exists {
|
||||||
|
if existingModel != modelId {
|
||||||
|
return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel)
|
||||||
|
}
|
||||||
|
continue // already registered as explicit alias for this model
|
||||||
|
}
|
||||||
|
config.aliases[key] = modelId
|
||||||
|
modelConfig.Aliases = append(modelConfig.Aliases, key)
|
||||||
|
}
|
||||||
|
|
||||||
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
||||||
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,12 @@ type Filters struct {
|
|||||||
// SetParams is a dictionary of parameters to set/override in requests
|
// SetParams is a dictionary of parameters to set/override in requests
|
||||||
// Protected params (like "model") cannot be set
|
// Protected params (like "model") cannot be set
|
||||||
SetParams map[string]any `yaml:"setParams"`
|
SetParams map[string]any `yaml:"setParams"`
|
||||||
|
|
||||||
|
// SetParamsByID maps requested model IDs to parameters to set/override in requests.
|
||||||
|
// Useful with aliases: a single loaded model can behave differently depending on
|
||||||
|
// which alias the client used. Applied after SetParams, so it can override those values.
|
||||||
|
// Protected params (like "model") cannot be set.
|
||||||
|
SetParamsByID map[string]map[string]any `yaml:"setParamsByID"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SanitizedStripParams returns a sorted list of parameters to strip,
|
// SanitizedStripParams returns a sorted list of parameters to strip,
|
||||||
@@ -51,6 +57,33 @@ func (f Filters) SanitizedStripParams() []string {
|
|||||||
return cleaned
|
return cleaned
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SanitizedSetParamsByID returns the params to set for the given requestedModelID,
|
||||||
|
// with protected params removed and keys sorted for consistent iteration order.
|
||||||
|
// Returns nil if the ID has no entry or all its params are protected.
|
||||||
|
func (f Filters) SanitizedSetParamsByID(requestedModelID string) (map[string]any, []string) {
|
||||||
|
if len(f.SetParamsByID) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
params, found := f.SetParamsByID[requestedModelID]
|
||||||
|
if !found || len(params) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
result := make(map[string]any, len(params))
|
||||||
|
keys := make([]string, 0, len(params))
|
||||||
|
for key, value := range params {
|
||||||
|
if slices.Contains(ProtectedParams, key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[key] = value
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
if len(result) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return result, keys
|
||||||
|
}
|
||||||
|
|
||||||
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
||||||
// and keys sorted for consistent iteration order
|
// and keys sorted for consistent iteration order
|
||||||
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
||||||
|
|||||||
@@ -162,6 +162,123 @@ func TestFilters_SanitizedSetParams(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFilters_SanitizedSetParamsByID(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setParamsByID map[string]map[string]any
|
||||||
|
requestedModelID string
|
||||||
|
wantParams map[string]any
|
||||||
|
wantKeys []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty SetParamsByID returns nil",
|
||||||
|
setParamsByID: nil,
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty map returns nil",
|
||||||
|
setParamsByID: map[string]map[string]any{},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-matching model ID returns nil",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model2": {"temperature": 0.9},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "matching model ID returns correct params",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {"temperature": 0.7, "top_p": 0.9},
|
||||||
|
"model2": {"temperature": 0.5},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.9,
|
||||||
|
},
|
||||||
|
wantKeys: []string{"temperature", "top_p"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "protected param model is filtered out",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {
|
||||||
|
"model": "should-be-filtered",
|
||||||
|
"temperature": 0.7,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"temperature": 0.7,
|
||||||
|
},
|
||||||
|
wantKeys: []string{"temperature"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "only protected param returns nil",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {
|
||||||
|
"model": "should-be-filtered",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "keys are sorted",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {
|
||||||
|
"z_param": "z",
|
||||||
|
"a_param": "a",
|
||||||
|
"m_param": "m",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"z_param": "z",
|
||||||
|
"a_param": "a",
|
||||||
|
"m_param": "m",
|
||||||
|
},
|
||||||
|
wantKeys: []string{"a_param", "m_param", "z_param"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "alias style key lookup",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1:high": {"reasoning_effort": "high"},
|
||||||
|
"model1:low": {"reasoning_effort": "low"},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1:high",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"reasoning_effort": "high",
|
||||||
|
},
|
||||||
|
wantKeys: []string{"reasoning_effort"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
f := Filters{SetParamsByID: tt.setParamsByID}
|
||||||
|
gotParams, gotKeys := f.SanitizedSetParamsByID(tt.requestedModelID)
|
||||||
|
|
||||||
|
if tt.wantParams == nil {
|
||||||
|
assert.Nil(t, gotParams)
|
||||||
|
assert.Nil(t, gotKeys)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, tt.wantKeys, gotKeys)
|
||||||
|
assert.Equal(t, tt.wantParams, gotParams)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestProtectedParams(t *testing.T) {
|
func TestProtectedParams(t *testing.T) {
|
||||||
// Verify that "model" is protected
|
// Verify that "model" is protected
|
||||||
assert.Contains(t, ProtectedParams, "model")
|
assert.Contains(t, ProtectedParams, "model")
|
||||||
|
|||||||
@@ -73,6 +73,72 @@ models:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_SetParamsByIDAutoAlias(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
"${MODEL_ID}:high":
|
||||||
|
reasoning_effort: high
|
||||||
|
"${MODEL_ID}:low":
|
||||||
|
reasoning_effort: low
|
||||||
|
`
|
||||||
|
cfg, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Keys (other than the model's own ID) should be registered as aliases
|
||||||
|
realName, found := cfg.RealModelName("model1:high")
|
||||||
|
assert.True(t, found, "model1:high should be an auto-registered alias")
|
||||||
|
assert.Equal(t, "model1", realName)
|
||||||
|
|
||||||
|
realName, found = cfg.RealModelName("model1:low")
|
||||||
|
assert.True(t, found, "model1:low should be an auto-registered alias")
|
||||||
|
assert.Equal(t, "model1", realName)
|
||||||
|
|
||||||
|
// Auto-aliases should also appear in modelConfig.Aliases
|
||||||
|
aliases := cfg.Models["model1"].Aliases
|
||||||
|
assert.Contains(t, aliases, "model1:high")
|
||||||
|
assert.Contains(t, aliases, "model1:low")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_SetParamsByIDAutoAliasConflictWithModelID(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
model2:
|
||||||
|
reasoning_effort: high
|
||||||
|
model2:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.ErrorContains(t, err, "conflicts with an existing model ID")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_SetParamsByIDAutoAliasConflictWithOtherModel(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
"shared-alias":
|
||||||
|
reasoning_effort: high
|
||||||
|
model2:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
"shared-alias":
|
||||||
|
reasoning_effort: low
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.ErrorContains(t, err, "duplicate alias")
|
||||||
|
}
|
||||||
|
|
||||||
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
||||||
content := `
|
content := `
|
||||||
models:
|
models:
|
||||||
|
|||||||
@@ -720,6 +720,17 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setParamsByID: set params based on the requested model ID (runs after setParams, can override it)
|
||||||
|
setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel)
|
||||||
|
for _, key := range setParamsByIDKeys {
|
||||||
|
pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key)
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key])
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
||||||
nextHandler = processGroup.ProxyRequest
|
nextHandler = processGroup.ProxyRequest
|
||||||
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
||||||
|
|||||||
@@ -14,12 +14,13 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
Id string `json:"id"`
|
Id string `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
State string `json:"state"`
|
State string `json:"state"`
|
||||||
Unlisted bool `json:"unlisted"`
|
Unlisted bool `json:"unlisted"`
|
||||||
PeerID string `json:"peerID"`
|
PeerID string `json:"peerID"`
|
||||||
|
Aliases []string `json:"aliases,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func addApiHandlers(pm *ProxyManager) {
|
func addApiHandlers(pm *ProxyManager) {
|
||||||
@@ -83,6 +84,7 @@ func (pm *ProxyManager) getModelStatus() []Model {
|
|||||||
Description: pm.config.Models[modelID].Description,
|
Description: pm.config.Models[modelID].Description,
|
||||||
State: state,
|
State: state,
|
||||||
Unlisted: pm.config.Models[modelID].Unlisted,
|
Unlisted: pm.config.Models[modelID].Unlisted,
|
||||||
|
Aliases: pm.config.Models[modelID].Aliases,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1046,6 +1046,61 @@ func TestProxyManager_FiltersStripParams(t *testing.T) {
|
|||||||
// t.Logf("%v", response)
|
// t.Logf("%v", response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_FiltersSetParamsByID(t *testing.T) {
|
||||||
|
// no explicit aliases — setParamsByID keys are auto-registered as aliases
|
||||||
|
configStr := strings.Replace(`
|
||||||
|
logLevel: error
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: 'SRPATH --port ${PORT} --silent --respond model1'
|
||||||
|
proxy: "http://127.0.0.1:${PORT}"
|
||||||
|
filters:
|
||||||
|
setParams:
|
||||||
|
reasoning_effort: medium
|
||||||
|
setParamsByID:
|
||||||
|
"${MODEL_ID}:high":
|
||||||
|
reasoning_effort: high
|
||||||
|
"${MODEL_ID}:low":
|
||||||
|
reasoning_effort: low
|
||||||
|
`, "SRPATH", simpleResponderPath, -1)
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
if !assert.NoError(t, err, "invalid test configuration") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy := New(cfg)
|
||||||
|
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
requestedModel string
|
||||||
|
wantEffort string
|
||||||
|
}{
|
||||||
|
// setParams applies, no setParamsByID match
|
||||||
|
{requestedModel: "model1", wantEffort: "medium"},
|
||||||
|
// setParamsByID overrides setParams
|
||||||
|
{requestedModel: "model1:high", wantEffort: "high"},
|
||||||
|
{requestedModel: "model1:low", wantEffort: "low"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.requestedModel, func(t *testing.T) {
|
||||||
|
reqBody := fmt.Sprintf(`{"model":%q}`, tt.requestedModel)
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
|
||||||
|
var response map[string]interface{}
|
||||||
|
assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response))
|
||||||
|
|
||||||
|
requestBody, _ := response["request_body"].(string)
|
||||||
|
gotEffort := gjson.Get(requestBody, "reasoning_effort").String()
|
||||||
|
assert.Equal(t, tt.wantEffort, gotEffort, "reasoning_effort mismatch for model %s", tt.requestedModel)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
||||||
config := config.AddDefaultGroupToConfig(config.Config{
|
config := config.AddDefaultGroupToConfig(config.Config{
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
|
|||||||
@@ -165,6 +165,9 @@
|
|||||||
{#if model.description}
|
{#if model.description}
|
||||||
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
||||||
{/if}
|
{/if}
|
||||||
|
{#if model.aliases && model.aliases.length > 0}
|
||||||
|
<p class="text-xs text-txtsecondary">Aliases: {model.aliases.join(", ")}</p>
|
||||||
|
{/if}
|
||||||
</td>
|
</td>
|
||||||
<td class="w-12">
|
<td class="w-12">
|
||||||
{#if model.state === "stopped"}
|
{#if model.state === "stopped"}
|
||||||
|
|||||||
@@ -25,6 +25,11 @@
|
|||||||
<optgroup label="Local">
|
<optgroup label="Local">
|
||||||
{#each grouped.local as model (model.id)}
|
{#each grouped.local as model (model.id)}
|
||||||
<option value={model.id}>{model.id}</option>
|
<option value={model.id}>{model.id}</option>
|
||||||
|
{#if model.aliases}
|
||||||
|
{#each model.aliases as alias (alias)}
|
||||||
|
<option value={alias}> ↳ {alias}</option>
|
||||||
|
{/each}
|
||||||
|
{/if}
|
||||||
{/each}
|
{/each}
|
||||||
</optgroup>
|
</optgroup>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ export interface Model {
|
|||||||
description: string;
|
description: string;
|
||||||
unlisted: boolean;
|
unlisted: boolean;
|
||||||
peerID: string;
|
peerID: string;
|
||||||
|
aliases?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Metrics {
|
export interface Metrics {
|
||||||
|
|||||||
Reference in New Issue
Block a user