Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c3c258a55d | |||
| 29a38fde0d | |||
| d569681daa | |||
| 24efdb76b1 | |||
| cc77139ff8 | |||
| 390a35bf93 | |||
| 181f71ca11 | |||
| 49546e2cf2 | |||
| 2c078964f4 | |||
| 175bb36fb1 | |||
| aedb640471 | |||
| 2f377f6dc6 | |||
| 64e4c79fc3 | |||
| 19fb5f35e9 | |||
| b45102bde8 |
@@ -29,7 +29,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
platform: [intel, cuda, vulkan, cpu, musa, rocm]
|
platform: [intel, cuda, cuda13, vulkan, cpu, musa, rocm]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
# llama-swap
|
# llama-swap
|
||||||
|
|
||||||
Run multiple LLM models on your machine and hot-swap between them as needed. llama-swap works with any OpenAI API-compatible server, giving you the flexibility to switch models without restarting your applications.
|
Run multiple generative AI models on your machine and hot-swap between them on demand. llama-swap works with any OpenAI and Anthropic API compatible server and is used by thousands of people to power their local AI workflows.
|
||||||
|
|
||||||
Built in Go for performance and simplicity, llama-swap has zero dependencies and is incredibly easy to set up. Get started in minutes - just one binary and one configuration file.
|
Built in Go for performance and simplicity, llama-swap has zero dependencies and is incredibly easy to set up. Get started in minutes - just one binary and one configuration file.
|
||||||
|
|
||||||
@@ -48,13 +48,27 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
|
|
||||||
### Web UI
|
### Web UI
|
||||||
|
|
||||||
llama-swap includes a real time web interface for monitoring logs and controlling models:
|
llama-swap includes a real time web interface with a playground for testing out all sorts of local models:
|
||||||
|
|
||||||
<img width="1164" height="745" alt="image" src="https://github.com/user-attachments/assets/bacf3f9d-819f-430b-9ed2-1bfaa8d54579" />
|
<img width="1125" height="876" alt="image" src="https://github.com/user-attachments/assets/8ee41947-97af-463d-b0f0-8e9c478fac07" />
|
||||||
|
|
||||||
The Activity Page shows recent requests:
|
View detailed token metrics:
|
||||||
|
|
||||||
|
<img width="1111" height="515" alt="image" src="https://github.com/user-attachments/assets/64bfb280-d7a3-4126-971a-a128fd40410c" />
|
||||||
|
|
||||||
|
Inspect request and responses:
|
||||||
|
|
||||||
|
<img width="1111" height="720" alt="image" src="https://github.com/user-attachments/assets/24fe4aca-1448-4d7c-b9e8-a967589bda6c" />
|
||||||
|
|
||||||
|
Manually load and unload models:
|
||||||
|
|
||||||
|
<img width="1109" height="719" alt="image" src="https://github.com/user-attachments/assets/02b1e1f2-abd0-4050-84ae-facd66ff01c4" />
|
||||||
|
|
||||||
|
|
||||||
|
Real time log streaming:
|
||||||
|
|
||||||
|
<img width="1107" height="559" alt="image" src="https://github.com/user-attachments/assets/39669a10-cff2-409e-836a-5bad8bd0140c" />
|
||||||
|
|
||||||
<img width="1360" height="963" alt="image" src="https://github.com/user-attachments/assets/5f3edee6-d03a-4ae5-ae06-b20ac1f135bd" />
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
+20
-5
@@ -48,6 +48,12 @@
|
|||||||
"default": 120,
|
"default": 120,
|
||||||
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
||||||
},
|
},
|
||||||
|
"globalTTL": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 0,
|
||||||
|
"description": "Default TTL for all models in seconds, 0 means no TTL and models will never be automatically unloaded"
|
||||||
|
},
|
||||||
"logLevel": {
|
"logLevel": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
@@ -177,9 +183,9 @@
|
|||||||
},
|
},
|
||||||
"ttl": {
|
"ttl": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 0,
|
"minimum": -1,
|
||||||
"default": 0,
|
"default": -1,
|
||||||
"description": "Automatically unload the model after ttl seconds. 0 disables unloading. Must be >0 to enable."
|
"description": "Automatically unload the model after ttl seconds. -1 uses the global TTL value, 0 disables unloading. Must be >0 to enable."
|
||||||
},
|
},
|
||||||
"useModelName": {
|
"useModelName": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@@ -200,11 +206,20 @@
|
|||||||
"additionalProperties": true,
|
"additionalProperties": true,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
||||||
|
},
|
||||||
|
"setParamsByID": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"default": {},
|
||||||
|
"description": "Dictionary mapping requested model IDs (or aliases) to parameters to set/override in requests. Applied after setParams and can override those values. Useful with aliases to vary behaviour depending on which alias the client used (e.g. different reasoning_effort per alias). Keys support ${MODEL_ID} macro substitution. Protected params like 'model' cannot be overridden."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of filter settings. Supports stripParams and setParams."
|
"description": "Dictionary of filter settings. Supports stripParams, setParams, and setParamsByID."
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -359,4 +374,4 @@
|
|||||||
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
+41
-15
@@ -75,6 +75,11 @@ sendLoadingState: true
|
|||||||
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
||||||
includeAliasesInList: false
|
includeAliasesInList: false
|
||||||
|
|
||||||
|
# globalTTL: the default TTL in seconds before unloading a model
|
||||||
|
# - optional, default: 0 (never automatically unload)
|
||||||
|
# - must be >= 0
|
||||||
|
globalTTL: 0
|
||||||
|
|
||||||
# macros: a dictionary of string substitutions
|
# macros: a dictionary of string substitutions
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros are reusable snippets
|
# - macros are reusable snippets
|
||||||
@@ -126,7 +131,7 @@ apiKeys:
|
|||||||
# - below are examples of the all the settings a model can have
|
# - below are examples of the all the settings a model can have
|
||||||
models:
|
models:
|
||||||
# keys are the model names used in API requests
|
# keys are the model names used in API requests
|
||||||
"llama":
|
"gpt-oss-120b":
|
||||||
# macros: a dictionary of string substitutions specific to this model
|
# macros: a dictionary of string substitutions specific to this model
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros defined here override macros defined in the global macros section
|
# - macros defined here override macros defined in the global macros section
|
||||||
@@ -143,7 +148,7 @@ models:
|
|||||||
cmd: |
|
cmd: |
|
||||||
# ${latest-llama} is a macro that is defined above
|
# ${latest-llama} is a macro that is defined above
|
||||||
${latest-llama}
|
${latest-llama}
|
||||||
--model path/to/llama-8B-Q4_K_M.gguf
|
--model path/to/gpt-oss-120B.gguf
|
||||||
--ctx-size ${default_ctx}
|
--ctx-size ${default_ctx}
|
||||||
--temperature ${temp}
|
--temperature ${temp}
|
||||||
|
|
||||||
@@ -151,13 +156,13 @@ models:
|
|||||||
# - optional, default: empty string
|
# - optional, default: empty string
|
||||||
# - if set, it will be used in the v1/models API response
|
# - if set, it will be used in the v1/models API response
|
||||||
# - if not set, it will be omitted in the JSON model record
|
# - if not set, it will be omitted in the JSON model record
|
||||||
name: "llama 3.1 8B"
|
name: "gpt-oss 120B"
|
||||||
|
|
||||||
# description: a description for the model
|
# description: a description for the model
|
||||||
# - optional, default: empty string
|
# - optional, default: empty string
|
||||||
# - if set, it will be used in the v1/models API response
|
# - if set, it will be used in the v1/models API response
|
||||||
# - if not set, it will be omitted in the JSON model record
|
# - if not set, it will be omitted in the JSON model record
|
||||||
description: "A small but capable model used for quick testing"
|
description: "A thinking model from OpenAI"
|
||||||
|
|
||||||
# env: define an array of environment variables to inject into cmd's environment
|
# env: define an array of environment variables to inject into cmd's environment
|
||||||
# - optional, default: empty array
|
# - optional, default: empty array
|
||||||
@@ -172,14 +177,6 @@ models:
|
|||||||
# - if you use a custom port in cmd this *must* be set
|
# - if you use a custom port in cmd this *must* be set
|
||||||
proxy: http://127.0.0.1:8999
|
proxy: http://127.0.0.1:8999
|
||||||
|
|
||||||
# aliases: alternative model names that this model configuration is used for
|
|
||||||
# - optional, default: empty array
|
|
||||||
# - aliases must be unique globally
|
|
||||||
# - useful for impersonating a specific model
|
|
||||||
aliases:
|
|
||||||
- "gpt-4o-mini"
|
|
||||||
- "gpt-3.5-turbo"
|
|
||||||
|
|
||||||
# checkEndpoint: URL path to check if the server is ready
|
# checkEndpoint: URL path to check if the server is ready
|
||||||
# - optional, default: /health
|
# - optional, default: /health
|
||||||
# - endpoint is expected to return an HTTP 200 response
|
# - endpoint is expected to return an HTTP 200 response
|
||||||
@@ -188,8 +185,10 @@ models:
|
|||||||
checkEndpoint: /custom-endpoint
|
checkEndpoint: /custom-endpoint
|
||||||
|
|
||||||
# ttl: automatically unload the model after ttl seconds
|
# ttl: automatically unload the model after ttl seconds
|
||||||
# - optional, default: 0
|
# - optional, default: -1 (use global default)
|
||||||
# - ttl values must be a value greater than 0
|
# - ttl values must be a value greater than or equal to 0
|
||||||
|
# - a ttl of -1 will use the global TTL value as the default
|
||||||
|
# - a ttl of 0 will mean never unload
|
||||||
# - a value of 0 disables automatic unloading of the model
|
# - a value of 0 disables automatic unloading of the model
|
||||||
ttl: 60
|
ttl: 60
|
||||||
|
|
||||||
@@ -197,7 +196,7 @@ models:
|
|||||||
# - optional, default: ""
|
# - optional, default: ""
|
||||||
# - useful for when the upstream server expects a specific model name that
|
# - useful for when the upstream server expects a specific model name that
|
||||||
# is different from the model's ID
|
# is different from the model's ID
|
||||||
useModelName: "qwen:qwq"
|
useModelName: "openai/gpt-oss-120B"
|
||||||
|
|
||||||
# filters: a dictionary of filter settings
|
# filters: a dictionary of filter settings
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
@@ -216,11 +215,38 @@ models:
|
|||||||
# - useful for enforcing specific parameter values
|
# - useful for enforcing specific parameter values
|
||||||
# - protected params like "model" cannot be overridden
|
# - protected params like "model" cannot be overridden
|
||||||
# - values can be strings, numbers, booleans, arrays, or objects
|
# - values can be strings, numbers, booleans, arrays, or objects
|
||||||
|
# - always runs for the model
|
||||||
setParams:
|
setParams:
|
||||||
# Example: enforce specific sampling parameters
|
# Example: enforce specific sampling parameters
|
||||||
temperature: 0.7
|
temperature: 0.7
|
||||||
top_p: 0.9
|
top_p: 0.9
|
||||||
|
|
||||||
|
# setParamsByID: a dictionary of parameters to set based the model ID
|
||||||
|
# - optional, default: empty dictionary
|
||||||
|
# - combine with aliases to create variant behaviour without reloading the model
|
||||||
|
# - parameters are set in the request body JSON
|
||||||
|
# - run after setParams so it will override any settings
|
||||||
|
# - protected params like "model" cannot be overridden
|
||||||
|
# - values can be strings, numbers, booleans, arrays, or objects
|
||||||
|
# - model aliases will be automatically created for each key
|
||||||
|
setParamsByID:
|
||||||
|
"${MODEL_ID}":
|
||||||
|
chat_template_kwargs:
|
||||||
|
reasoning_effort: medium
|
||||||
|
"${MODEL_ID}:high":
|
||||||
|
chat_template_kwargs:
|
||||||
|
reasoning_effort: high
|
||||||
|
"${MODEL_ID}:low":
|
||||||
|
chat_template_kwargs:
|
||||||
|
reasoning_effort: low
|
||||||
|
|
||||||
|
# aliases: alternative model names that this model configuration is used for
|
||||||
|
# - optional, default: empty array
|
||||||
|
# - aliases must be unique globally
|
||||||
|
# - useful for impersonating a specific model
|
||||||
|
aliases:
|
||||||
|
- "gpt-4o-mini"
|
||||||
|
|
||||||
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - while metadata can contains complex types it is recommended to keep it simple
|
# - while metadata can contains complex types it is recommended to keep it simple
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ ARCH=$1
|
|||||||
PUSH_IMAGES=${2:-false}
|
PUSH_IMAGES=${2:-false}
|
||||||
|
|
||||||
# List of allowed architectures
|
# List of allowed architectures
|
||||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm")
|
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cuda13" "cpu" "rocm")
|
||||||
|
|
||||||
# Check if ARCH is in the allowed list
|
# Check if ARCH is in the allowed list
|
||||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ type Config struct {
|
|||||||
LogToStdout string `yaml:"logToStdout"`
|
LogToStdout string `yaml:"logToStdout"`
|
||||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||||
CaptureBuffer int `yaml:"captureBuffer"`
|
CaptureBuffer int `yaml:"captureBuffer"`
|
||||||
|
GlobalTTL int `yaml:"globalTTL"`
|
||||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||||
Profiles map[string][]string `yaml:"profiles"`
|
Profiles map[string][]string `yaml:"profiles"`
|
||||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||||
@@ -203,6 +204,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
LogToStdout: LogToStdoutProxy,
|
LogToStdout: LogToStdoutProxy,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
CaptureBuffer: 5,
|
CaptureBuffer: 5,
|
||||||
|
GlobalTTL: 0,
|
||||||
}
|
}
|
||||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
@@ -216,6 +218,10 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.GlobalTTL < 0 {
|
||||||
|
return Config{}, fmt.Errorf("globalTTL must be >= 0")
|
||||||
|
}
|
||||||
|
|
||||||
switch config.LogToStdout {
|
switch config.LogToStdout {
|
||||||
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||||
default:
|
default:
|
||||||
@@ -255,6 +261,15 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||||
|
|
||||||
|
// set model TTL to globalTTL it is the default value
|
||||||
|
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
|
||||||
|
modelConfig.UnloadAfter = config.GlobalTTL
|
||||||
|
}
|
||||||
|
|
||||||
|
if modelConfig.UnloadAfter < 0 {
|
||||||
|
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
|
||||||
|
}
|
||||||
|
|
||||||
// Validate model macros
|
// Validate model macros
|
||||||
for _, macro := range modelConfig.Macros {
|
for _, macro := range modelConfig.Macros {
|
||||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||||
@@ -293,6 +308,26 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||||
|
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||||
|
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||||
|
|
||||||
|
// Substitute macros in SetParamsByID keys and values
|
||||||
|
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
||||||
|
newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID))
|
||||||
|
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||||
|
newKey := strings.ReplaceAll(key, macroSlug, macroStr)
|
||||||
|
newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value)
|
||||||
|
if err != nil {
|
||||||
|
return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error())
|
||||||
|
}
|
||||||
|
newParamMap, ok := newValAny.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId)
|
||||||
|
}
|
||||||
|
newSetParamsByID[newKey] = newParamMap
|
||||||
|
}
|
||||||
|
modelConfig.Filters.SetParamsByID = newSetParamsByID
|
||||||
|
}
|
||||||
|
|
||||||
// Substitute in metadata (type-preserving)
|
// Substitute in metadata (type-preserving)
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
@@ -318,6 +353,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
|
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||||
|
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||||
|
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||||
@@ -337,6 +374,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
"proxy": modelConfig.Proxy,
|
"proxy": modelConfig.Proxy,
|
||||||
"checkEndpoint": modelConfig.CheckEndpoint,
|
"checkEndpoint": modelConfig.CheckEndpoint,
|
||||||
"filters.stripParams": modelConfig.Filters.StripParams,
|
"filters.stripParams": modelConfig.Filters.StripParams,
|
||||||
|
"name": modelConfig.Name,
|
||||||
|
"description": modelConfig.Description,
|
||||||
}
|
}
|
||||||
|
|
||||||
for fieldName, fieldValue := range fieldMap {
|
for fieldName, fieldValue := range fieldMap {
|
||||||
@@ -359,6 +398,34 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate SetParamsByID keys and values
|
||||||
|
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||||
|
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
|
||||||
|
return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId)
|
||||||
|
}
|
||||||
|
if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil {
|
||||||
|
return Config{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-register setParamsByID keys as aliases (skip the model's own ID)
|
||||||
|
for key := range modelConfig.Filters.SetParamsByID {
|
||||||
|
if key == modelId {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := config.Models[key]; exists {
|
||||||
|
return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key)
|
||||||
|
}
|
||||||
|
if existingModel, exists := config.aliases[key]; exists {
|
||||||
|
if existingModel != modelId {
|
||||||
|
return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel)
|
||||||
|
}
|
||||||
|
continue // already registered as explicit alias for this model
|
||||||
|
}
|
||||||
|
config.aliases[key] = modelId
|
||||||
|
modelConfig.Aliases = append(modelConfig.Aliases, key)
|
||||||
|
}
|
||||||
|
|
||||||
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
||||||
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -848,6 +848,71 @@ func TestConfig_APIKeys_EnvMacros(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_GlobalTTL(t *testing.T) {
|
||||||
|
t.Run("globalTTL sets default for models", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: 300
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 300, config.GlobalTTL)
|
||||||
|
assert.Equal(t, 300, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("model ttl=0 overrides globalTTL", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: 300
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
ttl: 0
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("model explicit ttl overrides globalTTL", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: 300
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
ttl: 600
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 600, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("globalTTL defaults to 0", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, config.GlobalTTL)
|
||||||
|
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("negative globalTTL rejected", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
globalTTL: -1
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: server --port ${PORT}
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "globalTTL must be >= 0")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestConfig_EnvMacros(t *testing.T) {
|
func TestConfig_EnvMacros(t *testing.T) {
|
||||||
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
||||||
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
||||||
|
|||||||
@@ -20,6 +20,12 @@ type Filters struct {
|
|||||||
// SetParams is a dictionary of parameters to set/override in requests
|
// SetParams is a dictionary of parameters to set/override in requests
|
||||||
// Protected params (like "model") cannot be set
|
// Protected params (like "model") cannot be set
|
||||||
SetParams map[string]any `yaml:"setParams"`
|
SetParams map[string]any `yaml:"setParams"`
|
||||||
|
|
||||||
|
// SetParamsByID maps requested model IDs to parameters to set/override in requests.
|
||||||
|
// Useful with aliases: a single loaded model can behave differently depending on
|
||||||
|
// which alias the client used. Applied after SetParams, so it can override those values.
|
||||||
|
// Protected params (like "model") cannot be set.
|
||||||
|
SetParamsByID map[string]map[string]any `yaml:"setParamsByID"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SanitizedStripParams returns a sorted list of parameters to strip,
|
// SanitizedStripParams returns a sorted list of parameters to strip,
|
||||||
@@ -51,6 +57,33 @@ func (f Filters) SanitizedStripParams() []string {
|
|||||||
return cleaned
|
return cleaned
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SanitizedSetParamsByID returns the params to set for the given requestedModelID,
|
||||||
|
// with protected params removed and keys sorted for consistent iteration order.
|
||||||
|
// Returns nil if the ID has no entry or all its params are protected.
|
||||||
|
func (f Filters) SanitizedSetParamsByID(requestedModelID string) (map[string]any, []string) {
|
||||||
|
if len(f.SetParamsByID) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
params, found := f.SetParamsByID[requestedModelID]
|
||||||
|
if !found || len(params) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
result := make(map[string]any, len(params))
|
||||||
|
keys := make([]string, 0, len(params))
|
||||||
|
for key, value := range params {
|
||||||
|
if slices.Contains(ProtectedParams, key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[key] = value
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
if len(result) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return result, keys
|
||||||
|
}
|
||||||
|
|
||||||
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
||||||
// and keys sorted for consistent iteration order
|
// and keys sorted for consistent iteration order
|
||||||
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
||||||
|
|||||||
@@ -162,6 +162,123 @@ func TestFilters_SanitizedSetParams(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFilters_SanitizedSetParamsByID(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setParamsByID map[string]map[string]any
|
||||||
|
requestedModelID string
|
||||||
|
wantParams map[string]any
|
||||||
|
wantKeys []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty SetParamsByID returns nil",
|
||||||
|
setParamsByID: nil,
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty map returns nil",
|
||||||
|
setParamsByID: map[string]map[string]any{},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-matching model ID returns nil",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model2": {"temperature": 0.9},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "matching model ID returns correct params",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {"temperature": 0.7, "top_p": 0.9},
|
||||||
|
"model2": {"temperature": 0.5},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.9,
|
||||||
|
},
|
||||||
|
wantKeys: []string{"temperature", "top_p"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "protected param model is filtered out",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {
|
||||||
|
"model": "should-be-filtered",
|
||||||
|
"temperature": 0.7,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"temperature": 0.7,
|
||||||
|
},
|
||||||
|
wantKeys: []string{"temperature"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "only protected param returns nil",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {
|
||||||
|
"model": "should-be-filtered",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "keys are sorted",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1": {
|
||||||
|
"z_param": "z",
|
||||||
|
"a_param": "a",
|
||||||
|
"m_param": "m",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"z_param": "z",
|
||||||
|
"a_param": "a",
|
||||||
|
"m_param": "m",
|
||||||
|
},
|
||||||
|
wantKeys: []string{"a_param", "m_param", "z_param"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "alias style key lookup",
|
||||||
|
setParamsByID: map[string]map[string]any{
|
||||||
|
"model1:high": {"reasoning_effort": "high"},
|
||||||
|
"model1:low": {"reasoning_effort": "low"},
|
||||||
|
},
|
||||||
|
requestedModelID: "model1:high",
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"reasoning_effort": "high",
|
||||||
|
},
|
||||||
|
wantKeys: []string{"reasoning_effort"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
f := Filters{SetParamsByID: tt.setParamsByID}
|
||||||
|
gotParams, gotKeys := f.SanitizedSetParamsByID(tt.requestedModelID)
|
||||||
|
|
||||||
|
if tt.wantParams == nil {
|
||||||
|
assert.Nil(t, gotParams)
|
||||||
|
assert.Nil(t, gotKeys)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, tt.wantKeys, gotKeys)
|
||||||
|
assert.Equal(t, tt.wantParams, gotParams)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestProtectedParams(t *testing.T) {
|
func TestProtectedParams(t *testing.T) {
|
||||||
// Verify that "model" is protected
|
// Verify that "model" is protected
|
||||||
assert.Contains(t, ProtectedParams, "model")
|
assert.Contains(t, ProtectedParams, "model")
|
||||||
|
|||||||
@@ -104,6 +104,62 @@ models:
|
|||||||
assert.Contains(t, err.Error(), "self-reference")
|
assert.Contains(t, err.Error(), "self-reference")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test macro substitution in name and description fields
|
||||||
|
func TestConfig_MacroInNameAndDescription(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
startPort: 10000
|
||||||
|
macros:
|
||||||
|
"VARIANT": "Q4_K_M"
|
||||||
|
"FAMILY": "llama"
|
||||||
|
|
||||||
|
models:
|
||||||
|
my-model:
|
||||||
|
cmd: echo ok
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
name: "${FAMILY} ${VARIANT}"
|
||||||
|
description: "A ${FAMILY} model in ${VARIANT} format"
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "llama Q4_K_M", config.Models["my-model"].Name)
|
||||||
|
assert.Equal(t, "A llama model in Q4_K_M format", config.Models["my-model"].Description)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test MODEL_ID macro in name and description fields
|
||||||
|
func TestConfig_ModelIDInNameAndDescription(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
startPort: 10000
|
||||||
|
models:
|
||||||
|
llama-3b:
|
||||||
|
cmd: echo ok
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
name: "Model: ${MODEL_ID}"
|
||||||
|
description: "Running ${MODEL_ID}"
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "Model: llama-3b", config.Models["llama-3b"].Name)
|
||||||
|
assert.Equal(t, "Running llama-3b", config.Models["llama-3b"].Description)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test unknown macro in name or description returns an error
|
||||||
|
func TestConfig_UnknownMacroInNameDescription(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
startPort: 10000
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: echo ok
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
name: "Model ${UNDEFINED}"
|
||||||
|
`
|
||||||
|
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "UNDEFINED")
|
||||||
|
}
|
||||||
|
|
||||||
// Test undefined macro reference error
|
// Test undefined macro reference error
|
||||||
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
||||||
content := `
|
content := `
|
||||||
|
|||||||
@@ -5,6 +5,10 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
MODEL_CONFIG_DEFAULT_TTL = -1
|
||||||
|
)
|
||||||
|
|
||||||
type ModelConfig struct {
|
type ModelConfig struct {
|
||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
CmdStop string `yaml:"cmdStop"`
|
CmdStop string `yaml:"cmdStop"`
|
||||||
@@ -47,7 +51,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
Aliases: []string{},
|
Aliases: []string{},
|
||||||
Env: []string{},
|
Env: []string{},
|
||||||
CheckEndpoint: "/health",
|
CheckEndpoint: "/health",
|
||||||
UnloadAfter: 0,
|
UnloadAfter: MODEL_CONFIG_DEFAULT_TTL, // use GlobalTTL
|
||||||
Unlisted: false,
|
Unlisted: false,
|
||||||
UseModelName: "",
|
UseModelName: "",
|
||||||
ConcurrencyLimit: 0,
|
ConcurrencyLimit: 0,
|
||||||
|
|||||||
@@ -73,6 +73,72 @@ models:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_SetParamsByIDAutoAlias(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
"${MODEL_ID}:high":
|
||||||
|
reasoning_effort: high
|
||||||
|
"${MODEL_ID}:low":
|
||||||
|
reasoning_effort: low
|
||||||
|
`
|
||||||
|
cfg, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Keys (other than the model's own ID) should be registered as aliases
|
||||||
|
realName, found := cfg.RealModelName("model1:high")
|
||||||
|
assert.True(t, found, "model1:high should be an auto-registered alias")
|
||||||
|
assert.Equal(t, "model1", realName)
|
||||||
|
|
||||||
|
realName, found = cfg.RealModelName("model1:low")
|
||||||
|
assert.True(t, found, "model1:low should be an auto-registered alias")
|
||||||
|
assert.Equal(t, "model1", realName)
|
||||||
|
|
||||||
|
// Auto-aliases should also appear in modelConfig.Aliases
|
||||||
|
aliases := cfg.Models["model1"].Aliases
|
||||||
|
assert.Contains(t, aliases, "model1:high")
|
||||||
|
assert.Contains(t, aliases, "model1:low")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_SetParamsByIDAutoAliasConflictWithModelID(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
model2:
|
||||||
|
reasoning_effort: high
|
||||||
|
model2:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.ErrorContains(t, err, "conflicts with an existing model ID")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_SetParamsByIDAutoAliasConflictWithOtherModel(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
"shared-alias":
|
||||||
|
reasoning_effort: high
|
||||||
|
model2:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
setParamsByID:
|
||||||
|
"shared-alias":
|
||||||
|
reasoning_effort: low
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.ErrorContains(t, err, "duplicate alias")
|
||||||
|
}
|
||||||
|
|
||||||
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
||||||
content := `
|
content := `
|
||||||
models:
|
models:
|
||||||
|
|||||||
@@ -350,6 +350,11 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
|
|||||||
usage := parsed.Get("usage")
|
usage := parsed.Get("usage")
|
||||||
timings := parsed.Get("timings")
|
timings := parsed.Get("timings")
|
||||||
|
|
||||||
|
// v1/responses format nests usage under response.usage
|
||||||
|
if !usage.Exists() {
|
||||||
|
usage = parsed.Get("response.usage")
|
||||||
|
}
|
||||||
|
|
||||||
if usage.Exists() || timings.Exists() {
|
if usage.Exists() || timings.Exists() {
|
||||||
return parseMetrics(modelID, start, usage, timings)
|
return parseMetrics(modelID, start, usage, timings)
|
||||||
}
|
}
|
||||||
@@ -503,9 +508,9 @@ func filterAcceptEncoding(acceptEncoding string) string {
|
|||||||
supported := map[string]bool{"gzip": true, "deflate": true}
|
supported := map[string]bool{"gzip": true, "deflate": true}
|
||||||
var filtered []string
|
var filtered []string
|
||||||
|
|
||||||
for _, part := range strings.Split(acceptEncoding, ",") {
|
for part := range strings.SplitSeq(acceptEncoding, ",") {
|
||||||
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
||||||
encoding := strings.TrimSpace(strings.Split(part, ";")[0])
|
encoding, _, _ := strings.Cut(strings.TrimSpace(part), ";")
|
||||||
if supported[strings.ToLower(encoding)] {
|
if supported[strings.ToLower(encoding)] {
|
||||||
filtered = append(filtered, strings.TrimSpace(part))
|
filtered = append(filtered, strings.TrimSpace(part))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -709,6 +709,35 @@ data: [DONE]
|
|||||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("v1/responses format with nested response.usage", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
// v1/responses SSE format: usage is nested under response.usage
|
||||||
|
responseBody := "event: response.completed\n" +
|
||||||
|
`data: {"type":"response.completed","response":{"id":"resp_abc","object":"response","created_at":1773416985,"status":"completed","model":"test-model","output":[],"usage":{"input_tokens":17,"output_tokens":23,"total_tokens":40}}}` +
|
||||||
|
"\n\n"
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "text/event-stream")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/responses", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 17, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 23, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
|||||||
+10
-10
@@ -117,12 +117,12 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
expectedMessage := "I_sense_imminent_danger"
|
expectedMessage := "I_sense_imminent_danger"
|
||||||
config := getTestSimpleResponderConfig(expectedMessage)
|
conf := getTestSimpleResponderConfig(expectedMessage)
|
||||||
assert.Equal(t, 0, config.UnloadAfter)
|
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||||
config.UnloadAfter = 3 // seconds
|
conf.UnloadAfter = 3 // seconds
|
||||||
assert.Equal(t, 3, config.UnloadAfter)
|
assert.Equal(t, 3, conf.UnloadAfter)
|
||||||
|
|
||||||
process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger)
|
process := NewProcess("ttl_test", 2, conf, debugLogger, debugLogger)
|
||||||
defer process.Stop()
|
defer process.Stop()
|
||||||
|
|
||||||
// this should take 4 seconds
|
// this should take 4 seconds
|
||||||
@@ -159,12 +159,12 @@ func TestProcess_LowTTLValue(t *testing.T) {
|
|||||||
t.Skip("skipping test, edit process_test.go to run it ")
|
t.Skip("skipping test, edit process_test.go to run it ")
|
||||||
}
|
}
|
||||||
|
|
||||||
config := getTestSimpleResponderConfig("fast_ttl")
|
conf := getTestSimpleResponderConfig("fast_ttl")
|
||||||
assert.Equal(t, 0, config.UnloadAfter)
|
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||||
config.UnloadAfter = 1 // second
|
conf.UnloadAfter = 1 // second
|
||||||
assert.Equal(t, 1, config.UnloadAfter)
|
assert.Equal(t, 1, conf.UnloadAfter)
|
||||||
|
|
||||||
process := NewProcess("ttl", 2, config, debugLogger, debugLogger)
|
process := NewProcess("ttl", 2, conf, debugLogger, debugLogger)
|
||||||
defer process.Stop()
|
defer process.Stop()
|
||||||
|
|
||||||
for i := 0; i < 100; i++ {
|
for i := 0; i < 100; i++ {
|
||||||
|
|||||||
@@ -720,6 +720,17 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setParamsByID: set params based on the requested model ID (runs after setParams, can override it)
|
||||||
|
setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel)
|
||||||
|
for _, key := range setParamsByIDKeys {
|
||||||
|
pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key)
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key])
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
||||||
nextHandler = processGroup.ProxyRequest
|
nextHandler = processGroup.ProxyRequest
|
||||||
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
||||||
|
|||||||
@@ -14,12 +14,13 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
Id string `json:"id"`
|
Id string `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
State string `json:"state"`
|
State string `json:"state"`
|
||||||
Unlisted bool `json:"unlisted"`
|
Unlisted bool `json:"unlisted"`
|
||||||
PeerID string `json:"peerID"`
|
PeerID string `json:"peerID"`
|
||||||
|
Aliases []string `json:"aliases,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func addApiHandlers(pm *ProxyManager) {
|
func addApiHandlers(pm *ProxyManager) {
|
||||||
@@ -83,6 +84,7 @@ func (pm *ProxyManager) getModelStatus() []Model {
|
|||||||
Description: pm.config.Models[modelID].Description,
|
Description: pm.config.Models[modelID].Description,
|
||||||
State: state,
|
State: state,
|
||||||
Unlisted: pm.config.Models[modelID].Unlisted,
|
Unlisted: pm.config.Models[modelID].Unlisted,
|
||||||
|
Aliases: pm.config.Models[modelID].Aliases,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -730,7 +730,7 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
|
|||||||
// Verify extended fields are present
|
// Verify extended fields are present
|
||||||
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
||||||
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
||||||
assert.Equal(t, 0, response.Running[0].TTL, "ttl should default to 0")
|
assert.Equal(t, -1, response.Running[0].TTL, "ttl should default to -1 (use globalTTL)")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1046,6 +1046,61 @@ func TestProxyManager_FiltersStripParams(t *testing.T) {
|
|||||||
// t.Logf("%v", response)
|
// t.Logf("%v", response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_FiltersSetParamsByID(t *testing.T) {
|
||||||
|
// no explicit aliases — setParamsByID keys are auto-registered as aliases
|
||||||
|
configStr := strings.Replace(`
|
||||||
|
logLevel: error
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: 'SRPATH --port ${PORT} --silent --respond model1'
|
||||||
|
proxy: "http://127.0.0.1:${PORT}"
|
||||||
|
filters:
|
||||||
|
setParams:
|
||||||
|
reasoning_effort: medium
|
||||||
|
setParamsByID:
|
||||||
|
"${MODEL_ID}:high":
|
||||||
|
reasoning_effort: high
|
||||||
|
"${MODEL_ID}:low":
|
||||||
|
reasoning_effort: low
|
||||||
|
`, "SRPATH", simpleResponderPath, -1)
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
if !assert.NoError(t, err, "invalid test configuration") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy := New(cfg)
|
||||||
|
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
requestedModel string
|
||||||
|
wantEffort string
|
||||||
|
}{
|
||||||
|
// setParams applies, no setParamsByID match
|
||||||
|
{requestedModel: "model1", wantEffort: "medium"},
|
||||||
|
// setParamsByID overrides setParams
|
||||||
|
{requestedModel: "model1:high", wantEffort: "high"},
|
||||||
|
{requestedModel: "model1:low", wantEffort: "low"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.requestedModel, func(t *testing.T) {
|
||||||
|
reqBody := fmt.Sprintf(`{"model":%q}`, tt.requestedModel)
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
|
||||||
|
var response map[string]interface{}
|
||||||
|
assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response))
|
||||||
|
|
||||||
|
requestBody, _ := response["request_body"].(string)
|
||||||
|
gotEffort := gjson.Get(requestBody, "reasoning_effort").String()
|
||||||
|
assert.Equal(t, tt.wantEffort, gotEffort, "reasoning_effort mismatch for model %s", tt.requestedModel)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
||||||
config := config.AddDefaultGroupToConfig(config.Config{
|
config := config.AddDefaultGroupToConfig(config.Config{
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
legacy-peer-deps=true
|
||||||
Generated
+976
-1186
File diff suppressed because it is too large
Load Diff
@@ -12,18 +12,18 @@
|
|||||||
"test:watch": "vitest"
|
"test:watch": "vitest"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
"@sveltejs/vite-plugin-svelte": "^7.0.0",
|
||||||
"@tailwindcss/vite": "^4.1.8",
|
"@tailwindcss/vite": "^4.1.8",
|
||||||
"@tsconfig/svelte": "^5.0.4",
|
"@tsconfig/svelte": "^5.0.4",
|
||||||
"@types/hast": "^3.0.4",
|
"@types/hast": "^3.0.4",
|
||||||
"@types/node": "^25.1.0",
|
"@types/node": "^25.1.0",
|
||||||
"svelte": "^5.19.0",
|
"svelte": "^5.46.4",
|
||||||
"svelte-check": "^4.1.4",
|
"svelte-check": "^4.1.4",
|
||||||
"tailwindcss": "^4.1.8",
|
"tailwindcss": "^4.1.8",
|
||||||
"typescript": "~5.8.3",
|
"typescript": "~5.8.3",
|
||||||
"vite": "^6.3.5",
|
"vite": "^8.0.0",
|
||||||
"vite-plugin-compression2": "^2.4.0",
|
"vite-plugin-compression2": "^2.5.1",
|
||||||
"vitest": "^4.0.18"
|
"vitest": "^4.1.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"highlight.js": "^11.11.1",
|
"highlight.js": "^11.11.1",
|
||||||
|
|||||||
@@ -65,10 +65,17 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
let preElement: HTMLPreElement;
|
let preElement: HTMLPreElement;
|
||||||
|
let userScrolledUp = $state(false);
|
||||||
|
|
||||||
// Auto scroll to bottom when logs change
|
function handleScroll() {
|
||||||
|
if (!preElement) return;
|
||||||
|
const { scrollTop, scrollHeight, clientHeight } = preElement;
|
||||||
|
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto scroll to bottom when logs change, unless user has scrolled up
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
if (preElement && filteredLogs) {
|
if (preElement && filteredLogs && !userScrolledUp) {
|
||||||
preElement.scrollTop = preElement.scrollHeight;
|
preElement.scrollTop = preElement.scrollHeight;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -82,7 +89,7 @@
|
|||||||
<div class="flex gap-2 items-center">
|
<div class="flex gap-2 items-center">
|
||||||
<button class="btn border-0" onclick={toggleFontSize} title="Change font size">
|
<button class="btn border-0" onclick={toggleFontSize} title="Change font size">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||||
<path fill-rule="evenodd" d="M10.5 3.75a6 6 0 0 0-5.98 6.496A5.25 5.25 0 0 0 6.75 20.25H18a4.5 4.5 0 0 0 2.206-8.423 3.75 3.75 0 0 0-4.133-4.303A6.001 6.001 0 0 0 10.5 3.75Zm2.25 6a.75.75 0 0 0-1.5 0v4.94l-1.72-1.72a.75.75 0 0 0-1.06 1.06l3 3a.75.75 0 0 0 1.06 0l3-3a.75.75 0 1 0-1.06-1.06l-1.72 1.72V9.75Z" clip-rule="evenodd" />
|
<path d="M2 4v3h5v12h3V7h5V4H2zm19 5h-9v3h3v7h3v-7h3V9z"/>
|
||||||
</svg>
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
<button class="btn border-0" onclick={toggleWrapText} title="Toggle text wrap">
|
<button class="btn border-0" onclick={toggleWrapText} title="Toggle text wrap">
|
||||||
@@ -127,6 +134,6 @@
|
|||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
|
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
|
||||||
<pre bind:this={preElement} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
<pre bind:this={preElement} onscroll={handleScroll} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -165,6 +165,9 @@
|
|||||||
{#if model.description}
|
{#if model.description}
|
||||||
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
||||||
{/if}
|
{/if}
|
||||||
|
{#if model.aliases && model.aliases.length > 0}
|
||||||
|
<p class="text-xs text-txtsecondary">Aliases: {model.aliases.join(", ")}</p>
|
||||||
|
{/if}
|
||||||
</td>
|
</td>
|
||||||
<td class="w-12">
|
<td class="w-12">
|
||||||
{#if model.state === "stopped"}
|
{#if model.state === "stopped"}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@
|
|||||||
let fileInput = $state<HTMLInputElement | null>(null);
|
let fileInput = $state<HTMLInputElement | null>(null);
|
||||||
let copied = $state(false);
|
let copied = $state(false);
|
||||||
|
|
||||||
const ACCEPTED_FORMATS = ['.mp3', '.wav'];
|
const ACCEPTED_FORMATS = ['.mp3', '.wav', '.ogg'];
|
||||||
const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
|
const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
|
||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
@@ -31,7 +31,7 @@
|
|||||||
const ext = '.' + file.name.split('.').pop()?.toLowerCase();
|
const ext = '.' + file.name.split('.').pop()?.toLowerCase();
|
||||||
|
|
||||||
if (!ACCEPTED_FORMATS.includes(ext)) {
|
if (!ACCEPTED_FORMATS.includes(ext)) {
|
||||||
return { valid: false, error: 'Invalid file type. Accepted: MP3, WAV' };
|
return { valid: false, error: 'Invalid file type. Accepted: MP3, WAV, OGG' };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file.size > MAX_FILE_SIZE) {
|
if (file.size > MAX_FILE_SIZE) {
|
||||||
@@ -208,7 +208,7 @@
|
|||||||
<div>
|
<div>
|
||||||
<p class="mb-2">Drag and drop an audio file here</p>
|
<p class="mb-2">Drag and drop an audio file here</p>
|
||||||
<p class="text-sm">or use the Browse button below</p>
|
<p class="text-sm">or use the Browse button below</p>
|
||||||
<p class="text-xs mt-4">Accepted formats: MP3, WAV (max 25MB)</p>
|
<p class="text-xs mt-4">Accepted formats: MP3, WAV, OGG (max 25MB)</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -218,7 +218,7 @@
|
|||||||
<div class="shrink-0 flex gap-2">
|
<div class="shrink-0 flex gap-2">
|
||||||
<input
|
<input
|
||||||
type="file"
|
type="file"
|
||||||
accept=".mp3,.wav"
|
accept=".mp3,.wav,.ogg"
|
||||||
class="hidden"
|
class="hidden"
|
||||||
onchange={handleFileSelect}
|
onchange={handleFileSelect}
|
||||||
bind:this={fileInput}
|
bind:this={fileInput}
|
||||||
|
|||||||
@@ -116,6 +116,47 @@
|
|||||||
cancelEdit();
|
cancelEdit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const COPY_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;
|
||||||
|
const CHECK_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>`;
|
||||||
|
|
||||||
|
function codeBlockCopy(node: HTMLElement) {
|
||||||
|
function attachButtons() {
|
||||||
|
node.querySelectorAll<HTMLPreElement>('pre:not([data-copy-btn])').forEach(pre => {
|
||||||
|
pre.setAttribute('data-copy-btn', 'true');
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'code-copy-btn';
|
||||||
|
btn.title = 'Copy code';
|
||||||
|
btn.innerHTML = COPY_SVG;
|
||||||
|
btn.addEventListener('click', async () => {
|
||||||
|
const text = pre.querySelector('code')?.textContent ?? pre.textContent ?? '';
|
||||||
|
try {
|
||||||
|
if (navigator.clipboard && window.isSecureContext) {
|
||||||
|
await navigator.clipboard.writeText(text);
|
||||||
|
} else {
|
||||||
|
const ta = document.createElement('textarea');
|
||||||
|
ta.value = text;
|
||||||
|
ta.style.cssText = 'position:fixed;left:-9999px';
|
||||||
|
document.body.appendChild(ta);
|
||||||
|
ta.select();
|
||||||
|
document.execCommand('copy');
|
||||||
|
document.body.removeChild(ta);
|
||||||
|
}
|
||||||
|
btn.innerHTML = CHECK_SVG;
|
||||||
|
btn.classList.add('copied');
|
||||||
|
setTimeout(() => { btn.innerHTML = COPY_SVG; btn.classList.remove('copied'); }, 2000);
|
||||||
|
} catch (e) {
|
||||||
|
console.error('copy failed', e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
pre.appendChild(btn);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
attachButtons();
|
||||||
|
const mo = new MutationObserver(attachButtons);
|
||||||
|
mo.observe(node, { childList: true, subtree: true });
|
||||||
|
return { destroy: () => mo.disconnect() };
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
||||||
@@ -174,7 +215,7 @@
|
|||||||
{#if showRaw}
|
{#if showRaw}
|
||||||
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
||||||
{:else}
|
{:else}
|
||||||
<div class="prose prose-sm dark:prose-invert max-w-none">
|
<div class="prose prose-sm dark:prose-invert max-w-none" use:codeBlockCopy>
|
||||||
{#each renderedParts.blocks as block (block.id)}
|
{#each renderedParts.blocks as block (block.id)}
|
||||||
{@html block.html}
|
{@html block.html}
|
||||||
{/each}
|
{/each}
|
||||||
@@ -299,14 +340,42 @@
|
|||||||
|
|
||||||
<style>
|
<style>
|
||||||
.prose :global(pre) {
|
.prose :global(pre) {
|
||||||
|
position: relative;
|
||||||
background-color: var(--color-surface);
|
background-color: var(--color-surface);
|
||||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||||
border-radius: 0.375rem;
|
border-radius: 0.375rem;
|
||||||
padding: 0.75rem;
|
padding: 0.75rem;
|
||||||
|
padding-right: 2.5rem;
|
||||||
overflow-x: auto;
|
overflow-x: auto;
|
||||||
margin: 0.5rem 0;
|
margin: 0.5rem 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.prose :global(.code-copy-btn) {
|
||||||
|
position: absolute;
|
||||||
|
top: 0.375rem;
|
||||||
|
right: 0.375rem;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
padding: 0.25rem;
|
||||||
|
border-radius: 0.25rem;
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
background: var(--color-surface);
|
||||||
|
color: var(--color-txtsecondary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background-color 0.15s;
|
||||||
|
line-height: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.prose :global(.code-copy-btn:hover) {
|
||||||
|
background: var(--color-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.prose :global(.code-copy-btn.copied) {
|
||||||
|
color: var(--color-success);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
.prose :global(code) {
|
.prose :global(code) {
|
||||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
||||||
font-size: 0.875em;
|
font-size: 0.875em;
|
||||||
|
|||||||
@@ -25,6 +25,11 @@
|
|||||||
<optgroup label="Local">
|
<optgroup label="Local">
|
||||||
{#each grouped.local as model (model.id)}
|
{#each grouped.local as model (model.id)}
|
||||||
<option value={model.id}>{model.id}</option>
|
<option value={model.id}>{model.id}</option>
|
||||||
|
{#if model.aliases}
|
||||||
|
{#each model.aliases as alias (alias)}
|
||||||
|
<option value={alias}> ↳ {alias}</option>
|
||||||
|
{/each}
|
||||||
|
{/if}
|
||||||
{/each}
|
{/each}
|
||||||
</optgroup>
|
</optgroup>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -0,0 +1,406 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { models } from "../../stores/api";
|
||||||
|
import { persistentStore } from "../../stores/persistent";
|
||||||
|
import { rerank } from "../../lib/rerankApi";
|
||||||
|
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||||
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
|
|
||||||
|
type RerankRow = { doc: string; score: number | null };
|
||||||
|
type SortOrder = "none" | "asc" | "desc";
|
||||||
|
type EditorMode = "table" | "json";
|
||||||
|
|
||||||
|
const selectedModelStore = persistentStore<string>("playground-rerank-model", "");
|
||||||
|
|
||||||
|
const defaultQuery = "How do LLM's work?";
|
||||||
|
const defaultDocs = [
|
||||||
|
"Large language models (LLMs) use transformer architectures to predict the next token in a sequence based on massive amounts of text data.",
|
||||||
|
"LLMs are trained on diverse internet text, learning statistical patterns of language that allow them to generate coherent responses.",
|
||||||
|
"During training, LLMs minimize a loss function that measures the difference between predicted and actual tokens across billions of examples.",
|
||||||
|
"Attention mechanisms in transformers enable LLMs to weigh the importance of different words when generating output.",
|
||||||
|
"Fine\u2011tuning allows a pre\u2011trained LLM to adapt to a specific downstream task with a smaller dataset.",
|
||||||
|
"Neural networks consist of layers of interconnected neurons that adjust their weights during back\u2011propagation.",
|
||||||
|
"The history of the Roman Empire spanned over a thousand years.",
|
||||||
|
"Soccer is the most popular sport in many countries around the world.",
|
||||||
|
"Quantum computing uses qubits to perform calculations that are intractable for classical computers.",
|
||||||
|
];
|
||||||
|
|
||||||
|
let query = $state(defaultQuery);
|
||||||
|
let rows = $state<RerankRow[]>([
|
||||||
|
...defaultDocs.map((doc) => ({ doc, score: null })),
|
||||||
|
{ doc: "", score: null },
|
||||||
|
]);
|
||||||
|
let isLoading = $state(false);
|
||||||
|
let error = $state<string | null>(null);
|
||||||
|
let usage = $state<{ prompt_tokens: number; total_tokens: number } | null>(null);
|
||||||
|
let abortController: AbortController | null = null;
|
||||||
|
let sortOrder = $state<SortOrder>("desc");
|
||||||
|
let editorMode = $state<EditorMode>("table");
|
||||||
|
let jsonText = $state("");
|
||||||
|
let jsonError = $state<string | null>(null);
|
||||||
|
|
||||||
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
|
|
||||||
|
let canSubmit = $derived((() => {
|
||||||
|
if (!$selectedModelStore || isLoading) return false;
|
||||||
|
if (editorMode === "json") {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(jsonText) as Record<string, unknown>;
|
||||||
|
return (
|
||||||
|
typeof parsed.query === "string" &&
|
||||||
|
parsed.query.trim() !== "" &&
|
||||||
|
Array.isArray(parsed.documents) &&
|
||||||
|
(parsed.documents as unknown[]).some(
|
||||||
|
(d) => typeof d === "string" && (d as string).trim() !== ""
|
||||||
|
)
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return query.trim() !== "" && rows.some((r) => r.doc.trim() !== "");
|
||||||
|
})());
|
||||||
|
|
||||||
|
// Display rows with sort applied (display-only transform, rows[] is never mutated by sorting)
|
||||||
|
let displayRows = $derived((() => {
|
||||||
|
const indexed = rows.map((row, i) => ({ row, i }));
|
||||||
|
if (sortOrder === "none") return indexed;
|
||||||
|
return [...indexed].sort((a, b) => {
|
||||||
|
if (a.row.score === null && b.row.score === null) return 0;
|
||||||
|
if (a.row.score === null) return 1;
|
||||||
|
if (b.row.score === null) return -1;
|
||||||
|
return sortOrder === "desc"
|
||||||
|
? b.row.score - a.row.score
|
||||||
|
: a.row.score - b.row.score;
|
||||||
|
});
|
||||||
|
})());
|
||||||
|
|
||||||
|
// Auto-add a new empty row when the last row gets content (table mode only)
|
||||||
|
$effect(() => {
|
||||||
|
if (editorMode === "table" && rows[rows.length - 1]?.doc.trim() !== "") {
|
||||||
|
rows = [...rows, { doc: "", score: null }];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sync loading state to activity store
|
||||||
|
$effect(() => {
|
||||||
|
playgroundStores.rerankLoading.set(isLoading);
|
||||||
|
});
|
||||||
|
|
||||||
|
function switchToJson() {
|
||||||
|
if (editorMode === "json") return;
|
||||||
|
const docs = rows.filter((r) => r.doc.trim() !== "").map((r) => r.doc);
|
||||||
|
jsonText = JSON.stringify({ query, documents: docs }, null, 2);
|
||||||
|
jsonError = null;
|
||||||
|
editorMode = "json";
|
||||||
|
}
|
||||||
|
|
||||||
|
function switchToTable() {
|
||||||
|
if (editorMode === "table") return;
|
||||||
|
if (jsonText.trim() === "") {
|
||||||
|
query = "";
|
||||||
|
rows = [{ doc: "", score: null }];
|
||||||
|
jsonError = null;
|
||||||
|
editorMode = "table";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(jsonText) as unknown;
|
||||||
|
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
||||||
|
throw new Error("Expected a JSON object");
|
||||||
|
}
|
||||||
|
const obj = parsed as Record<string, unknown>;
|
||||||
|
if (typeof obj.query !== "string") throw new Error('"query" must be a string');
|
||||||
|
if (!Array.isArray(obj.documents)) throw new Error('"documents" must be an array');
|
||||||
|
query = obj.query;
|
||||||
|
const newRows: RerankRow[] = (obj.documents as unknown[]).map((d) => ({
|
||||||
|
doc: typeof d === "string" ? d : String(d),
|
||||||
|
score: null,
|
||||||
|
}));
|
||||||
|
if (newRows.length === 0 || newRows[newRows.length - 1].doc.trim() !== "") {
|
||||||
|
newRows.push({ doc: "", score: null });
|
||||||
|
}
|
||||||
|
rows = newRows;
|
||||||
|
jsonError = null;
|
||||||
|
editorMode = "table";
|
||||||
|
} catch (err) {
|
||||||
|
jsonError = err instanceof Error ? err.message : "Invalid JSON";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function cycleSortOrder() {
|
||||||
|
sortOrder = sortOrder === "none" ? "desc" : sortOrder === "desc" ? "asc" : "none";
|
||||||
|
}
|
||||||
|
|
||||||
|
function sortIndicator(): string {
|
||||||
|
if (sortOrder === "desc") return " ↓";
|
||||||
|
if (sortOrder === "asc") return " ↑";
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
async function submit() {
|
||||||
|
if (!canSubmit) return;
|
||||||
|
|
||||||
|
let submitQuery: string;
|
||||||
|
let nonEmptyEntries: { originalIndex: number; doc: string }[];
|
||||||
|
|
||||||
|
if (editorMode === "json") {
|
||||||
|
// Parse JSON, sync state to table, then submit
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(jsonText) as Record<string, unknown>;
|
||||||
|
submitQuery = parsed.query as string;
|
||||||
|
const docs = (parsed.documents as string[]).filter((d) => d.trim() !== "");
|
||||||
|
const newRows: RerankRow[] = docs.map((d) => ({ doc: d, score: null }));
|
||||||
|
newRows.push({ doc: "", score: null });
|
||||||
|
rows = newRows;
|
||||||
|
query = submitQuery;
|
||||||
|
editorMode = "table";
|
||||||
|
} catch {
|
||||||
|
error = "Invalid JSON — fix before submitting";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
nonEmptyEntries = rows
|
||||||
|
.map((r, i) => ({ originalIndex: i, doc: r.doc }))
|
||||||
|
.filter((e) => e.doc.trim() !== "");
|
||||||
|
} else {
|
||||||
|
submitQuery = query;
|
||||||
|
nonEmptyEntries = rows
|
||||||
|
.map((r, i) => ({ originalIndex: i, doc: r.doc }))
|
||||||
|
.filter((e) => e.doc.trim() !== "");
|
||||||
|
}
|
||||||
|
|
||||||
|
isLoading = true;
|
||||||
|
error = null;
|
||||||
|
usage = null;
|
||||||
|
|
||||||
|
// Clear previous scores
|
||||||
|
rows = rows.map((r) => ({ ...r, score: null }));
|
||||||
|
|
||||||
|
abortController = new AbortController();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await rerank(
|
||||||
|
$selectedModelStore,
|
||||||
|
submitQuery,
|
||||||
|
nonEmptyEntries.map((e) => e.doc),
|
||||||
|
abortController.signal
|
||||||
|
);
|
||||||
|
|
||||||
|
usage = response.usage;
|
||||||
|
|
||||||
|
// Map result.index (position in submitted docs array) back to original rows[] index
|
||||||
|
const updated = rows.map((r) => ({ ...r }));
|
||||||
|
for (const result of response.results) {
|
||||||
|
const entry = nonEmptyEntries[result.index];
|
||||||
|
if (entry !== undefined) {
|
||||||
|
updated[entry.originalIndex].score = result.relevance_score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rows = updated;
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error && err.name === "AbortError") {
|
||||||
|
// User cancelled
|
||||||
|
} else {
|
||||||
|
error = err instanceof Error ? err.message : "An error occurred";
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
isLoading = false;
|
||||||
|
abortController = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function cancel() {
|
||||||
|
abortController?.abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
function clear() {
|
||||||
|
query = defaultQuery;
|
||||||
|
rows = [...defaultDocs.map((doc) => ({ doc, score: null })), { doc: "", score: null }];
|
||||||
|
error = null;
|
||||||
|
usage = null;
|
||||||
|
sortOrder = "desc";
|
||||||
|
jsonText = "";
|
||||||
|
jsonError = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function deleteRow(originalIndex: number) {
|
||||||
|
if (rows.length <= 1) return;
|
||||||
|
rows = rows.filter((_, i) => i !== originalIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateDoc(originalIndex: number, value: string) {
|
||||||
|
const updated = rows.map((r) => ({ ...r }));
|
||||||
|
updated[originalIndex].doc = value;
|
||||||
|
rows = updated;
|
||||||
|
}
|
||||||
|
|
||||||
|
function scoreColor(score: number | null): string {
|
||||||
|
if (score === null) return "text-txtsecondary";
|
||||||
|
if (score > 0) return "text-green-600 dark:text-green-400";
|
||||||
|
return "text-red-500 dark:text-red-400";
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatScore(score: number | null): string {
|
||||||
|
if (score === null) return "—";
|
||||||
|
return score.toFixed(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleKeyDown(e: KeyboardEvent) {
|
||||||
|
if (e.key === "Enter" && !e.shiftKey) {
|
||||||
|
e.preventDefault();
|
||||||
|
submit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let isCleared = $derived(
|
||||||
|
query === defaultQuery &&
|
||||||
|
rows.every((r, i) => r.score === null && r.doc === (defaultDocs[i] ?? "")) &&
|
||||||
|
rows.length === defaultDocs.length + 1 &&
|
||||||
|
!jsonText.trim() &&
|
||||||
|
!error &&
|
||||||
|
!usage
|
||||||
|
);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="flex flex-col h-full">
|
||||||
|
<!-- Top bar: model selector + query input (table mode) + mode toggle -->
|
||||||
|
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||||
|
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a rerank model..." disabled={isLoading} />
|
||||||
|
{#if editorMode === "table"}
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
class="min-w-0 flex-1 basis-48 px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||||
|
placeholder="Query..."
|
||||||
|
bind:value={query}
|
||||||
|
disabled={isLoading}
|
||||||
|
onkeydown={handleKeyDown}
|
||||||
|
/>
|
||||||
|
{/if}
|
||||||
|
<!-- Table / JSON toggle -->
|
||||||
|
<div class="flex rounded border border-gray-200 dark:border-white/10 overflow-hidden shrink-0">
|
||||||
|
<button
|
||||||
|
class="px-3 py-1.5 text-sm transition-colors {editorMode === 'table'
|
||||||
|
? 'bg-primary text-btn-primary-text'
|
||||||
|
: 'bg-surface hover:bg-secondary-hover'}"
|
||||||
|
onclick={switchToTable}
|
||||||
|
disabled={isLoading}
|
||||||
|
>
|
||||||
|
Table
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
class="px-3 py-1.5 text-sm border-l border-gray-200 dark:border-white/10 transition-colors {editorMode === 'json'
|
||||||
|
? 'bg-primary text-btn-primary-text'
|
||||||
|
: 'bg-surface hover:bg-secondary-hover'}"
|
||||||
|
onclick={switchToJson}
|
||||||
|
disabled={isLoading}
|
||||||
|
>
|
||||||
|
JSON
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if !hasModels}
|
||||||
|
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||||
|
<p>No models configured. Add models to your configuration to use reranking.</p>
|
||||||
|
</div>
|
||||||
|
{:else if editorMode === "json"}
|
||||||
|
<!-- JSON editor -->
|
||||||
|
<div class="flex-1 flex flex-col min-h-0 mb-4">
|
||||||
|
<textarea
|
||||||
|
class="flex-1 w-full font-mono text-sm px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||||
|
bind:value={jsonText}
|
||||||
|
disabled={isLoading}
|
||||||
|
placeholder={'{\n "query": "your search query",\n "documents": [\n "document one",\n "document two"\n ]\n}'}
|
||||||
|
spellcheck={false}
|
||||||
|
></textarea>
|
||||||
|
{#if jsonError}
|
||||||
|
<p class="mt-1 text-sm text-red-500">{jsonError}</p>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<!-- Document table -->
|
||||||
|
<div class="flex-1 overflow-y-auto mb-4 border border-gray-200 dark:border-white/10 rounded">
|
||||||
|
<table class="w-full border-collapse table-fixed">
|
||||||
|
<colgroup>
|
||||||
|
<col class="w-auto" />
|
||||||
|
<col style="width: 120px" />
|
||||||
|
<col style="width: 40px" />
|
||||||
|
</colgroup>
|
||||||
|
<thead class="sticky top-0 bg-surface border-b border-gray-200 dark:border-white/10">
|
||||||
|
<tr>
|
||||||
|
<th class="px-3 py-2 text-left text-sm font-medium text-txtsecondary">Document</th>
|
||||||
|
<th
|
||||||
|
class="px-3 py-2 text-right text-sm font-medium text-txtsecondary cursor-pointer select-none hover:text-txtprimary transition-colors"
|
||||||
|
onclick={cycleSortOrder}
|
||||||
|
>
|
||||||
|
Score{sortIndicator()}
|
||||||
|
</th>
|
||||||
|
<th class="px-2 py-2"></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{#each displayRows as { row, i } (i)}
|
||||||
|
<tr class="border-b border-gray-100 dark:border-white/5 last:border-0">
|
||||||
|
<td class="px-3 py-1.5">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
class="w-full bg-transparent focus:outline-none focus:ring-1 focus:ring-primary rounded px-1 py-0.5"
|
||||||
|
placeholder={i === rows.length - 1 ? "Add document..." : "Document text..."}
|
||||||
|
value={row.doc}
|
||||||
|
oninput={(e) => updateDoc(i, (e.target as HTMLInputElement).value)}
|
||||||
|
disabled={isLoading}
|
||||||
|
onkeydown={handleKeyDown}
|
||||||
|
/>
|
||||||
|
</td>
|
||||||
|
<td class="px-3 py-1.5 text-right font-mono text-sm {scoreColor(row.score)}">
|
||||||
|
{#if isLoading && row.score === null && row.doc.trim() !== ""}
|
||||||
|
<span class="inline-block w-4 h-4 border-2 border-current border-t-transparent rounded-full animate-spin align-middle"></span>
|
||||||
|
{:else}
|
||||||
|
{formatScore(row.score)}
|
||||||
|
{/if}
|
||||||
|
</td>
|
||||||
|
<td class="px-2 py-1.5 text-center">
|
||||||
|
<button
|
||||||
|
class="w-7 h-7 flex items-center justify-center text-txtsecondary hover:text-red-500 transition-colors rounded disabled:opacity-30 disabled:cursor-not-allowed"
|
||||||
|
onclick={() => deleteRow(i)}
|
||||||
|
disabled={rows.length <= 1}
|
||||||
|
tabindex="-1"
|
||||||
|
aria-label="Remove row"
|
||||||
|
>
|
||||||
|
×
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<!-- Bottom toolbar -->
|
||||||
|
{#if hasModels}
|
||||||
|
<div class="shrink-0 flex flex-wrap items-center gap-2">
|
||||||
|
{#if isLoading}
|
||||||
|
<button class="btn bg-red-500 hover:bg-red-600 text-white" onclick={cancel}>
|
||||||
|
Cancel
|
||||||
|
</button>
|
||||||
|
{:else}
|
||||||
|
<button
|
||||||
|
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||||
|
onclick={submit}
|
||||||
|
disabled={!canSubmit}
|
||||||
|
>
|
||||||
|
Rerank
|
||||||
|
</button>
|
||||||
|
<button class="btn" onclick={clear} disabled={isCleared}>
|
||||||
|
Clear
|
||||||
|
</button>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<span class="text-sm text-red-500 ml-2">{error}</span>
|
||||||
|
{:else if usage}
|
||||||
|
<span class="text-sm text-txtsecondary ml-2">{usage.total_tokens} tokens</span>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
export interface RerankResult {
|
||||||
|
index: number;
|
||||||
|
relevance_score: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RerankResponse {
|
||||||
|
model: string;
|
||||||
|
object: string;
|
||||||
|
usage: { prompt_tokens: number; total_tokens: number };
|
||||||
|
results: RerankResult[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function rerank(
|
||||||
|
model: string,
|
||||||
|
query: string,
|
||||||
|
documents: string[],
|
||||||
|
signal: AbortSignal
|
||||||
|
): Promise<RerankResponse> {
|
||||||
|
const response = await fetch("/v1/rerank", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ model, query, documents }),
|
||||||
|
signal,
|
||||||
|
});
|
||||||
|
if (!response.ok) throw new Error(`${response.status} ${response.statusText}`);
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
@@ -9,6 +9,7 @@ export interface Model {
|
|||||||
description: string;
|
description: string;
|
||||||
unlisted: boolean;
|
unlisted: boolean;
|
||||||
peerID: string;
|
peerID: string;
|
||||||
|
aliases?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Metrics {
|
export interface Metrics {
|
||||||
|
|||||||
@@ -4,8 +4,9 @@
|
|||||||
import ImageInterface from "../components/playground/ImageInterface.svelte";
|
import ImageInterface from "../components/playground/ImageInterface.svelte";
|
||||||
import AudioInterface from "../components/playground/AudioInterface.svelte";
|
import AudioInterface from "../components/playground/AudioInterface.svelte";
|
||||||
import SpeechInterface from "../components/playground/SpeechInterface.svelte";
|
import SpeechInterface from "../components/playground/SpeechInterface.svelte";
|
||||||
|
import RerankInterface from "../components/playground/RerankInterface.svelte";
|
||||||
|
|
||||||
type Tab = "chat" | "images" | "speech" | "audio";
|
type Tab = "chat" | "images" | "speech" | "audio" | "rerank";
|
||||||
|
|
||||||
const selectedTabStore = persistentStore<Tab>("playground-selected-tab", "chat");
|
const selectedTabStore = persistentStore<Tab>("playground-selected-tab", "chat");
|
||||||
let mobileMenuOpen = $state(false);
|
let mobileMenuOpen = $state(false);
|
||||||
@@ -15,6 +16,7 @@
|
|||||||
{ id: "images", label: "Images" },
|
{ id: "images", label: "Images" },
|
||||||
{ id: "speech", label: "Speech" },
|
{ id: "speech", label: "Speech" },
|
||||||
{ id: "audio", label: "Transcription" },
|
{ id: "audio", label: "Transcription" },
|
||||||
|
{ id: "rerank", label: "Rerank" },
|
||||||
];
|
];
|
||||||
|
|
||||||
function selectTab(tab: Tab) {
|
function selectTab(tab: Tab) {
|
||||||
@@ -89,6 +91,9 @@
|
|||||||
<div class="h-full" class:tab-hidden={$selectedTabStore !== "audio"}>
|
<div class="h-full" class:tab-hidden={$selectedTabStore !== "audio"}>
|
||||||
<AudioInterface />
|
<AudioInterface />
|
||||||
</div>
|
</div>
|
||||||
|
<div class="h-full" class:tab-hidden={$selectedTabStore !== "rerank"}>
|
||||||
|
<RerankInterface />
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
const newModels = JSON.parse(message.data) as Model[];
|
const newModels = JSON.parse(message.data) as Model[];
|
||||||
// Sort models by name and id
|
// Sort models by name and id
|
||||||
newModels.sort((a, b) => {
|
newModels.sort((a, b) => {
|
||||||
return (a.name + a.id).localeCompare(b.name + b.id);
|
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric : true} );
|
||||||
});
|
});
|
||||||
models.set(newModels);
|
models.set(newModels);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -4,10 +4,11 @@ const chatStreaming = writable(false);
|
|||||||
const imageGenerating = writable(false);
|
const imageGenerating = writable(false);
|
||||||
const speechGenerating = writable(false);
|
const speechGenerating = writable(false);
|
||||||
const audioTranscribing = writable(false);
|
const audioTranscribing = writable(false);
|
||||||
|
const rerankLoading = writable(false);
|
||||||
|
|
||||||
export const playgroundActivity = derived(
|
export const playgroundActivity = derived(
|
||||||
[chatStreaming, imageGenerating, speechGenerating, audioTranscribing],
|
[chatStreaming, imageGenerating, speechGenerating, audioTranscribing, rerankLoading],
|
||||||
([$chat, $image, $speech, $audio]) => $chat || $image || $speech || $audio
|
([$chat, $image, $speech, $audio, $rerank]) => $chat || $image || $speech || $audio || $rerank
|
||||||
);
|
);
|
||||||
|
|
||||||
export const playgroundStores = {
|
export const playgroundStores = {
|
||||||
@@ -15,4 +16,5 @@ export const playgroundStores = {
|
|||||||
imageGenerating,
|
imageGenerating,
|
||||||
speechGenerating,
|
speechGenerating,
|
||||||
audioTranscribing,
|
audioTranscribing,
|
||||||
|
rerankLoading,
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user