proxy: implement setParamsByID filter (#535)

Add setParamsByID filter that applies different request parameters based
on the requested model ID, enabling per-alias behaviour for a single
loaded model.

- add SetParamsByID field to Filters struct and SanitizedSetParamsByID
method
- substitute ${MODEL_ID} and other macros in setParamsByID keys and
values
- validate no unknown macros remain in keys or values after substitution
- apply setParamsByID in proxyInferenceHandler after setParams (can
override it)
- update config-schema.json with setParamsByID definition
- update UI to show aliases and make them selectable in the Playground

closes #534
This commit is contained in:
Benson Wong
2026-02-19 22:21:10 -08:00
committed by GitHub
parent b45102bde8
commit 19fb5f35e9
12 changed files with 387 additions and 20 deletions
+32 -13
View File
@@ -126,7 +126,7 @@ apiKeys:
# - below are examples of the all the settings a model can have
models:
# keys are the model names used in API requests
"llama":
"gpt-oss-120b":
# macros: a dictionary of string substitutions specific to this model
# - optional, default: empty dictionary
# - macros defined here override macros defined in the global macros section
@@ -143,7 +143,7 @@ models:
cmd: |
# ${latest-llama} is a macro that is defined above
${latest-llama}
--model path/to/llama-8B-Q4_K_M.gguf
--model path/to/gpt-oss-120B.gguf
--ctx-size ${default_ctx}
--temperature ${temp}
@@ -151,13 +151,13 @@ models:
# - optional, default: empty string
# - if set, it will be used in the v1/models API response
# - if not set, it will be omitted in the JSON model record
name: "llama 3.1 8B"
name: "gpt-oss 120B"
# description: a description for the model
# - optional, default: empty string
# - if set, it will be used in the v1/models API response
# - if not set, it will be omitted in the JSON model record
description: "A small but capable model used for quick testing"
description: "A thinking model from OpenAI"
# env: define an array of environment variables to inject into cmd's environment
# - optional, default: empty array
@@ -172,14 +172,6 @@ models:
# - if you use a custom port in cmd this *must* be set
proxy: http://127.0.0.1:8999
# aliases: alternative model names that this model configuration is used for
# - optional, default: empty array
# - aliases must be unique globally
# - useful for impersonating a specific model
aliases:
- "gpt-4o-mini"
- "gpt-3.5-turbo"
# checkEndpoint: URL path to check if the server is ready
# - optional, default: /health
# - endpoint is expected to return an HTTP 200 response
@@ -197,7 +189,7 @@ models:
# - optional, default: ""
# - useful for when the upstream server expects a specific model name that
# is different from the model's ID
useModelName: "qwen:qwq"
useModelName: "openai/gpt-oss-120B"
# filters: a dictionary of filter settings
# - optional, default: empty dictionary
@@ -216,11 +208,38 @@ models:
# - useful for enforcing specific parameter values
# - protected params like "model" cannot be overridden
# - values can be strings, numbers, booleans, arrays, or objects
# - always runs for the model
setParams:
# Example: enforce specific sampling parameters
temperature: 0.7
top_p: 0.9
# setParamsByID: a dictionary of parameters to set based the model ID
# - optional, default: empty dictionary
# - combine with aliases to create variant behaviour without reloading the model
# - parameters are set in the request body JSON
# - run after setParams so it will override any settings
# - protected params like "model" cannot be overridden
# - values can be strings, numbers, booleans, arrays, or objects
# - model aliases will be automatically created for each key
setParamsByID:
"${MODEL_ID}":
chat_template_kwargs:
reasoning_effort: medium
"${MODEL_ID}:high":
chat_template_kwargs:
reasoning_effort: high
"${MODEL_ID}:low":
chat_template_kwargs:
reasoning_effort: low
# aliases: alternative model names that this model configuration is used for
# - optional, default: empty array
# - aliases must be unique globally
# - useful for impersonating a specific model
aliases:
- "gpt-4o-mini"
# metadata: a dictionary of arbitrary values that are included in /v1/models
# - optional, default: empty dictionary
# - while metadata can contains complex types it is recommended to keep it simple