Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c3c258a55d | |||
| 29a38fde0d | |||
| d569681daa | |||
| 24efdb76b1 | |||
| cc77139ff8 | |||
| 390a35bf93 | |||
| 181f71ca11 | |||
| 49546e2cf2 | |||
| 2c078964f4 | |||
| 175bb36fb1 | |||
| aedb640471 | |||
| 2f377f6dc6 | |||
| 64e4c79fc3 | |||
| 19fb5f35e9 | |||
| b45102bde8 | |||
| 1688bdd1e9 | |||
| d33d51fa75 | |||
| e3bf065574 | |||
| 3e52144058 | |||
| d5e52d7d00 | |||
| 17e5263a76 | |||
| 8d6d949ec3 | |||
| b5fde8eb6d | |||
| 7eef5defb8 |
+1
-1
@@ -4,7 +4,7 @@ early_access: false
|
||||
reviews:
|
||||
profile: "chill"
|
||||
request_changes_workflow: false
|
||||
high_level_summary: true
|
||||
high_level_summary: false
|
||||
poem: false
|
||||
review_status: true
|
||||
collapse_walkthrough: false
|
||||
|
||||
@@ -17,12 +17,19 @@ on:
|
||||
- 'docker/build-container.sh'
|
||||
- 'docker/*.Containerfile'
|
||||
|
||||
# grant permissions on GITHUB_TOKEN to publish packages
|
||||
# ref: https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#publishing-a-package-using-an-action
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [intel, cuda, vulkan, cpu, musa, rocm]
|
||||
platform: [intel, cuda, cuda13, vulkan, cpu, musa, rocm]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
## Project Description:
|
||||
|
||||
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
||||
|
||||
## Tech stack
|
||||
|
||||
- golang
|
||||
- typescript, vite and svelt5 for UI (located in ui/)
|
||||
|
||||
## Workflow Tasks
|
||||
|
||||
- when summarizing changes only include details that require further action
|
||||
- just say "Done." when there is no further action
|
||||
- use the github CLI `gh` to create pull requests and work with github
|
||||
- Rules for creating pull requests:
|
||||
- keep them short and focused on changes.
|
||||
- never include a test plan
|
||||
- write the summary using the same style rules as commit message
|
||||
|
||||
## Testing
|
||||
|
||||
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
||||
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
||||
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||
|
||||
### Commit message example format:
|
||||
|
||||
```
|
||||
proxy: add new feature
|
||||
|
||||
Add new feature that implements functionality X and Y.
|
||||
|
||||
- key change 1
|
||||
- key change 2
|
||||
- key change 3
|
||||
|
||||
fixes #123
|
||||
```
|
||||
|
||||
## Code Reviews
|
||||
|
||||
- use three levels High, Medium, Low severity
|
||||
- label each discovered issue with a label like H1, M2, L3 respectively
|
||||
- High severity are must fix issues (security, race conditions, critical bugs)
|
||||
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
|
||||
- Low severity are nice to have changes and nits
|
||||
- Include a suggestion with each discovered item
|
||||
- Limit your code review to three items with the highest priority first
|
||||
- Double check your discovered items and recommended remediations
|
||||
@@ -1,49 +1 @@
|
||||
## Project Description:
|
||||
|
||||
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
||||
|
||||
## Tech stack
|
||||
|
||||
- golang
|
||||
- typescript, vite and react for UI (located in ui/)
|
||||
|
||||
## Workflow Tasks
|
||||
|
||||
- when summarizing changes only include details that require further action
|
||||
- just say "Done." when there is no further action
|
||||
- use `gh` to create PRs and load issues
|
||||
- do include Co-Authored-By or created by when committing changes or creating PRs
|
||||
- keep PR descriptions short and focused on changes.
|
||||
- never include a test plan
|
||||
|
||||
## Testing
|
||||
|
||||
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
||||
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
||||
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||
|
||||
### Commit message example format:
|
||||
|
||||
```
|
||||
proxy: add new feature
|
||||
|
||||
Add new feature that implements functionality X and Y.
|
||||
|
||||
- key change 1
|
||||
- key change 2
|
||||
- key change 3
|
||||
|
||||
fixes #123
|
||||
```
|
||||
|
||||
## Code Reviews
|
||||
|
||||
- use three levels High, Medium, Low severity
|
||||
- label each discovered issue with a label like H1, M2, L3 respectively
|
||||
- High severity are must fix issues (security, race conditions, critical bugs)
|
||||
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
|
||||
- Low severity are nice to have changes and nits
|
||||
- Include a suggestion with each discovered item
|
||||
- Limit your code review to three items with the highest priority first
|
||||
- Double check your discovered items and recommended remediations
|
||||
@AGENTS.md
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
# llama-swap
|
||||
|
||||
Run multiple LLM models on your machine and hot-swap between them as needed. llama-swap works with any OpenAI API-compatible server, giving you the flexibility to switch models without restarting your applications.
|
||||
Run multiple generative AI models on your machine and hot-swap between them on demand. llama-swap works with any OpenAI and Anthropic API compatible server and is used by thousands of people to power their local AI workflows.
|
||||
|
||||
Built in Go for performance and simplicity, llama-swap has zero dependencies and is incredibly easy to set up. Get started in minutes - just one binary and one configuration file.
|
||||
|
||||
@@ -13,7 +13,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
||||
|
||||
- ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
|
||||
- ✅ On-demand model switching
|
||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc.)
|
||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, etc.)
|
||||
- future proof, upgrade your inference servers at any time.
|
||||
- ✅ OpenAI API supported endpoints:
|
||||
- `v1/completions`
|
||||
@@ -48,13 +48,27 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
||||
|
||||
### Web UI
|
||||
|
||||
llama-swap includes a real time web interface for monitoring logs and controlling models:
|
||||
llama-swap includes a real time web interface with a playground for testing out all sorts of local models:
|
||||
|
||||
<img width="1164" height="745" alt="image" src="https://github.com/user-attachments/assets/bacf3f9d-819f-430b-9ed2-1bfaa8d54579" />
|
||||
<img width="1125" height="876" alt="image" src="https://github.com/user-attachments/assets/8ee41947-97af-463d-b0f0-8e9c478fac07" />
|
||||
|
||||
The Activity Page shows recent requests:
|
||||
View detailed token metrics:
|
||||
|
||||
<img width="1111" height="515" alt="image" src="https://github.com/user-attachments/assets/64bfb280-d7a3-4126-971a-a128fd40410c" />
|
||||
|
||||
Inspect request and responses:
|
||||
|
||||
<img width="1111" height="720" alt="image" src="https://github.com/user-attachments/assets/24fe4aca-1448-4d7c-b9e8-a967589bda6c" />
|
||||
|
||||
Manually load and unload models:
|
||||
|
||||
<img width="1109" height="719" alt="image" src="https://github.com/user-attachments/assets/02b1e1f2-abd0-4050-84ae-facd66ff01c4" />
|
||||
|
||||
|
||||
Real time log streaming:
|
||||
|
||||
<img width="1107" height="559" alt="image" src="https://github.com/user-attachments/assets/39669a10-cff2-409e-836a-5bad8bd0140c" />
|
||||
|
||||
<img width="1360" height="963" alt="image" src="https://github.com/user-attachments/assets/5f3edee6-d03a-4ae5-ae06-b20ac1f135bd" />
|
||||
|
||||
## Installation
|
||||
|
||||
@@ -69,6 +83,7 @@ llama-swap can be installed in multiple ways
|
||||
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
||||
|
||||
Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
|
||||
The stable-diffusion.cpp server is also included for the musa and vulkan platforms.
|
||||
|
||||
```shell
|
||||
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda
|
||||
|
||||
+26
-5
@@ -48,6 +48,12 @@
|
||||
"default": 120,
|
||||
"description": "Number of seconds to wait for a model to be ready to serve requests."
|
||||
},
|
||||
"globalTTL": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Default TTL for all models in seconds, 0 means no TTL and models will never be automatically unloaded"
|
||||
},
|
||||
"logLevel": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
@@ -87,6 +93,12 @@
|
||||
"default": 1000,
|
||||
"description": "Maximum number of metrics to keep in memory. Controls how many metrics are stored before older ones are discarded."
|
||||
},
|
||||
"captureBuffer": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 5,
|
||||
"description": "Size in megabytes of the buffer for storing request/response captures. Set to 0 to disable captures."
|
||||
},
|
||||
"startPort": {
|
||||
"type": "integer",
|
||||
"default": 5800,
|
||||
@@ -171,9 +183,9 @@
|
||||
},
|
||||
"ttl": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Automatically unload the model after ttl seconds. 0 disables unloading. Must be >0 to enable."
|
||||
"minimum": -1,
|
||||
"default": -1,
|
||||
"description": "Automatically unload the model after ttl seconds. -1 uses the global TTL value, 0 disables unloading. Must be >0 to enable."
|
||||
},
|
||||
"useModelName": {
|
||||
"type": "string",
|
||||
@@ -194,11 +206,20 @@
|
||||
"additionalProperties": true,
|
||||
"default": {},
|
||||
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
||||
},
|
||||
"setParamsByID": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"default": {},
|
||||
"description": "Dictionary mapping requested model IDs (or aliases) to parameters to set/override in requests. Applied after setParams and can override those values. Useful with aliases to vary behaviour depending on which alias the client used (e.g. different reasoning_effort per alias). Keys support ${MODEL_ID} macro substitution. Protected params like 'model' cannot be overridden."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"default": {},
|
||||
"description": "Dictionary of filter settings. Supports stripParams and setParams."
|
||||
"description": "Dictionary of filter settings. Supports stripParams, setParams, and setParamsByID."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
@@ -353,4 +374,4 @@
|
||||
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
+46
-15
@@ -50,6 +50,11 @@ logToStdout: "proxy"
|
||||
# - useful for limiting memory usage when processing large volumes of metrics
|
||||
metricsMaxInMemory: 1000
|
||||
|
||||
# captureBuffer: how many MBs to allocate for storing request/response captures
|
||||
# - optional, default: 10
|
||||
# - set to 0 to disable
|
||||
captureBuffer: 15
|
||||
|
||||
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
||||
# - optional, default: 5800
|
||||
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
||||
@@ -70,6 +75,11 @@ sendLoadingState: true
|
||||
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
||||
includeAliasesInList: false
|
||||
|
||||
# globalTTL: the default TTL in seconds before unloading a model
|
||||
# - optional, default: 0 (never automatically unload)
|
||||
# - must be >= 0
|
||||
globalTTL: 0
|
||||
|
||||
# macros: a dictionary of string substitutions
|
||||
# - optional, default: empty dictionary
|
||||
# - macros are reusable snippets
|
||||
@@ -121,7 +131,7 @@ apiKeys:
|
||||
# - below are examples of the all the settings a model can have
|
||||
models:
|
||||
# keys are the model names used in API requests
|
||||
"llama":
|
||||
"gpt-oss-120b":
|
||||
# macros: a dictionary of string substitutions specific to this model
|
||||
# - optional, default: empty dictionary
|
||||
# - macros defined here override macros defined in the global macros section
|
||||
@@ -138,7 +148,7 @@ models:
|
||||
cmd: |
|
||||
# ${latest-llama} is a macro that is defined above
|
||||
${latest-llama}
|
||||
--model path/to/llama-8B-Q4_K_M.gguf
|
||||
--model path/to/gpt-oss-120B.gguf
|
||||
--ctx-size ${default_ctx}
|
||||
--temperature ${temp}
|
||||
|
||||
@@ -146,13 +156,13 @@ models:
|
||||
# - optional, default: empty string
|
||||
# - if set, it will be used in the v1/models API response
|
||||
# - if not set, it will be omitted in the JSON model record
|
||||
name: "llama 3.1 8B"
|
||||
name: "gpt-oss 120B"
|
||||
|
||||
# description: a description for the model
|
||||
# - optional, default: empty string
|
||||
# - if set, it will be used in the v1/models API response
|
||||
# - if not set, it will be omitted in the JSON model record
|
||||
description: "A small but capable model used for quick testing"
|
||||
description: "A thinking model from OpenAI"
|
||||
|
||||
# env: define an array of environment variables to inject into cmd's environment
|
||||
# - optional, default: empty array
|
||||
@@ -167,14 +177,6 @@ models:
|
||||
# - if you use a custom port in cmd this *must* be set
|
||||
proxy: http://127.0.0.1:8999
|
||||
|
||||
# aliases: alternative model names that this model configuration is used for
|
||||
# - optional, default: empty array
|
||||
# - aliases must be unique globally
|
||||
# - useful for impersonating a specific model
|
||||
aliases:
|
||||
- "gpt-4o-mini"
|
||||
- "gpt-3.5-turbo"
|
||||
|
||||
# checkEndpoint: URL path to check if the server is ready
|
||||
# - optional, default: /health
|
||||
# - endpoint is expected to return an HTTP 200 response
|
||||
@@ -183,8 +185,10 @@ models:
|
||||
checkEndpoint: /custom-endpoint
|
||||
|
||||
# ttl: automatically unload the model after ttl seconds
|
||||
# - optional, default: 0
|
||||
# - ttl values must be a value greater than 0
|
||||
# - optional, default: -1 (use global default)
|
||||
# - ttl values must be a value greater than or equal to 0
|
||||
# - a ttl of -1 will use the global TTL value as the default
|
||||
# - a ttl of 0 will mean never unload
|
||||
# - a value of 0 disables automatic unloading of the model
|
||||
ttl: 60
|
||||
|
||||
@@ -192,7 +196,7 @@ models:
|
||||
# - optional, default: ""
|
||||
# - useful for when the upstream server expects a specific model name that
|
||||
# is different from the model's ID
|
||||
useModelName: "qwen:qwq"
|
||||
useModelName: "openai/gpt-oss-120B"
|
||||
|
||||
# filters: a dictionary of filter settings
|
||||
# - optional, default: empty dictionary
|
||||
@@ -211,11 +215,38 @@ models:
|
||||
# - useful for enforcing specific parameter values
|
||||
# - protected params like "model" cannot be overridden
|
||||
# - values can be strings, numbers, booleans, arrays, or objects
|
||||
# - always runs for the model
|
||||
setParams:
|
||||
# Example: enforce specific sampling parameters
|
||||
temperature: 0.7
|
||||
top_p: 0.9
|
||||
|
||||
# setParamsByID: a dictionary of parameters to set based the model ID
|
||||
# - optional, default: empty dictionary
|
||||
# - combine with aliases to create variant behaviour without reloading the model
|
||||
# - parameters are set in the request body JSON
|
||||
# - run after setParams so it will override any settings
|
||||
# - protected params like "model" cannot be overridden
|
||||
# - values can be strings, numbers, booleans, arrays, or objects
|
||||
# - model aliases will be automatically created for each key
|
||||
setParamsByID:
|
||||
"${MODEL_ID}":
|
||||
chat_template_kwargs:
|
||||
reasoning_effort: medium
|
||||
"${MODEL_ID}:high":
|
||||
chat_template_kwargs:
|
||||
reasoning_effort: high
|
||||
"${MODEL_ID}:low":
|
||||
chat_template_kwargs:
|
||||
reasoning_effort: low
|
||||
|
||||
# aliases: alternative model names that this model configuration is used for
|
||||
# - optional, default: empty array
|
||||
# - aliases must be unique globally
|
||||
# - useful for impersonating a specific model
|
||||
aliases:
|
||||
- "gpt-4o-mini"
|
||||
|
||||
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
||||
# - optional, default: empty dictionary
|
||||
# - while metadata can contains complex types it is recommended to keep it simple
|
||||
|
||||
@@ -27,7 +27,7 @@ ARCH=$1
|
||||
PUSH_IMAGES=${2:-false}
|
||||
|
||||
# List of allowed architectures
|
||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm")
|
||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cuda13" "cpu" "rocm")
|
||||
|
||||
# Check if ARCH is in the allowed list
|
||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||
@@ -142,7 +142,7 @@ for CONTAINER_TYPE in non-root root; do
|
||||
fi
|
||||
|
||||
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
||||
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||
docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||
|
||||
@@ -150,7 +150,7 @@ for CONTAINER_TYPE in non-root root; do
|
||||
case "$ARCH" in
|
||||
"musa" | "vulkan")
|
||||
log_info "Adding sd-server to $CONTAINER_TAG"
|
||||
docker build -f llama-swap-sd.Containerfile \
|
||||
docker build --provenance=false -f llama-swap-sd.Containerfile \
|
||||
--build-arg BASE=${CONTAINER_TAG} \
|
||||
--build-arg SD_IMAGE=${SD_IMAGE} --build-arg SD_TAG=${SD_TAG} \
|
||||
--build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \
|
||||
|
||||
@@ -123,6 +123,8 @@ type Config struct {
|
||||
LogTimeFormat string `yaml:"logTimeFormat"`
|
||||
LogToStdout string `yaml:"logToStdout"`
|
||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||
CaptureBuffer int `yaml:"captureBuffer"`
|
||||
GlobalTTL int `yaml:"globalTTL"`
|
||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||
Profiles map[string][]string `yaml:"profiles"`
|
||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||
@@ -201,6 +203,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
LogTimeFormat: "",
|
||||
LogToStdout: LogToStdoutProxy,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
GlobalTTL: 0,
|
||||
}
|
||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||
return Config{}, err
|
||||
@@ -214,6 +218,10 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||
}
|
||||
|
||||
if config.GlobalTTL < 0 {
|
||||
return Config{}, fmt.Errorf("globalTTL must be >= 0")
|
||||
}
|
||||
|
||||
switch config.LogToStdout {
|
||||
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||
default:
|
||||
@@ -253,6 +261,15 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||
|
||||
// set model TTL to globalTTL it is the default value
|
||||
if modelConfig.UnloadAfter == MODEL_CONFIG_DEFAULT_TTL {
|
||||
modelConfig.UnloadAfter = config.GlobalTTL
|
||||
}
|
||||
|
||||
if modelConfig.UnloadAfter < 0 {
|
||||
return Config{}, fmt.Errorf("model %s: invalid TTL value %d", modelId, modelConfig.UnloadAfter)
|
||||
}
|
||||
|
||||
// Validate model macros
|
||||
for _, macro := range modelConfig.Macros {
|
||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||
@@ -291,6 +308,26 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
// Substitute macros in SetParamsByID keys and values
|
||||
if len(modelConfig.Filters.SetParamsByID) > 0 {
|
||||
newSetParamsByID := make(map[string]map[string]any, len(modelConfig.Filters.SetParamsByID))
|
||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||
newKey := strings.ReplaceAll(key, macroSlug, macroStr)
|
||||
newValAny, err := substituteMacroInValue(any(paramMap), entry.Name, entry.Value)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: %s", modelId, err.Error())
|
||||
}
|
||||
newParamMap, ok := newValAny.(map[string]any)
|
||||
if !ok {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: unexpected type after macro substitution", modelId)
|
||||
}
|
||||
newSetParamsByID[newKey] = newParamMap
|
||||
}
|
||||
modelConfig.Filters.SetParamsByID = newSetParamsByID
|
||||
}
|
||||
|
||||
// Substitute in metadata (type-preserving)
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
@@ -316,6 +353,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||
modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
|
||||
modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
|
||||
|
||||
if len(modelConfig.Metadata) > 0 {
|
||||
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||
@@ -335,6 +374,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
"proxy": modelConfig.Proxy,
|
||||
"checkEndpoint": modelConfig.CheckEndpoint,
|
||||
"filters.stripParams": modelConfig.Filters.StripParams,
|
||||
"name": modelConfig.Name,
|
||||
"description": modelConfig.Description,
|
||||
}
|
||||
|
||||
for fieldName, fieldValue := range fieldMap {
|
||||
@@ -357,6 +398,34 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// Validate SetParamsByID keys and values
|
||||
for key, paramMap := range modelConfig.Filters.SetParamsByID {
|
||||
if matches := macroPatternRegex.FindAllStringSubmatch(key, -1); len(matches) > 0 {
|
||||
return Config{}, fmt.Errorf("unknown macro '${%s}' found in model %s filters.setParamsByID key", matches[0][1], modelId)
|
||||
}
|
||||
if err := validateNestedForUnknownMacros(any(paramMap), fmt.Sprintf("model %s filters.setParamsByID[%s]", modelId, key)); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-register setParamsByID keys as aliases (skip the model's own ID)
|
||||
for key := range modelConfig.Filters.SetParamsByID {
|
||||
if key == modelId {
|
||||
continue
|
||||
}
|
||||
if _, exists := config.Models[key]; exists {
|
||||
return Config{}, fmt.Errorf("model %s filters.setParamsByID: key '%s' conflicts with an existing model ID", modelId, key)
|
||||
}
|
||||
if existingModel, exists := config.aliases[key]; exists {
|
||||
if existingModel != modelId {
|
||||
return Config{}, fmt.Errorf("duplicate alias '%s' in model %s filters.setParamsByID, already used by model %s", key, modelId, existingModel)
|
||||
}
|
||||
continue // already registered as explicit alias for this model
|
||||
}
|
||||
config.aliases[key] = modelId
|
||||
modelConfig.Aliases = append(modelConfig.Aliases, key)
|
||||
}
|
||||
|
||||
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
||||
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
||||
}
|
||||
|
||||
@@ -215,6 +215,7 @@ groups:
|
||||
},
|
||||
HealthCheckTimeout: 15,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
Profiles: map[string][]string{
|
||||
"test": {"model1", "model2"},
|
||||
},
|
||||
|
||||
@@ -848,6 +848,71 @@ func TestConfig_APIKeys_EnvMacros(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestConfig_GlobalTTL(t *testing.T) {
|
||||
t.Run("globalTTL sets default for models", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: 300
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 300, config.GlobalTTL)
|
||||
assert.Equal(t, 300, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("model ttl=0 overrides globalTTL", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: 300
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
ttl: 0
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("model explicit ttl overrides globalTTL", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: 300
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
ttl: 600
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 600, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("globalTTL defaults to 0", func(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
`
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, config.GlobalTTL)
|
||||
assert.Equal(t, 0, config.Models["model1"].UnloadAfter)
|
||||
})
|
||||
|
||||
t.Run("negative globalTTL rejected", func(t *testing.T) {
|
||||
content := `
|
||||
globalTTL: -1
|
||||
models:
|
||||
model1:
|
||||
cmd: server --port ${PORT}
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "globalTTL must be >= 0")
|
||||
})
|
||||
}
|
||||
|
||||
func TestConfig_EnvMacros(t *testing.T) {
|
||||
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
||||
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
||||
|
||||
@@ -204,6 +204,7 @@ groups:
|
||||
},
|
||||
HealthCheckTimeout: 15,
|
||||
MetricsMaxInMemory: 1000,
|
||||
CaptureBuffer: 5,
|
||||
Profiles: map[string][]string{
|
||||
"test": {"model1", "model2"},
|
||||
},
|
||||
|
||||
@@ -20,6 +20,12 @@ type Filters struct {
|
||||
// SetParams is a dictionary of parameters to set/override in requests
|
||||
// Protected params (like "model") cannot be set
|
||||
SetParams map[string]any `yaml:"setParams"`
|
||||
|
||||
// SetParamsByID maps requested model IDs to parameters to set/override in requests.
|
||||
// Useful with aliases: a single loaded model can behave differently depending on
|
||||
// which alias the client used. Applied after SetParams, so it can override those values.
|
||||
// Protected params (like "model") cannot be set.
|
||||
SetParamsByID map[string]map[string]any `yaml:"setParamsByID"`
|
||||
}
|
||||
|
||||
// SanitizedStripParams returns a sorted list of parameters to strip,
|
||||
@@ -51,6 +57,33 @@ func (f Filters) SanitizedStripParams() []string {
|
||||
return cleaned
|
||||
}
|
||||
|
||||
// SanitizedSetParamsByID returns the params to set for the given requestedModelID,
|
||||
// with protected params removed and keys sorted for consistent iteration order.
|
||||
// Returns nil if the ID has no entry or all its params are protected.
|
||||
func (f Filters) SanitizedSetParamsByID(requestedModelID string) (map[string]any, []string) {
|
||||
if len(f.SetParamsByID) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
params, found := f.SetParamsByID[requestedModelID]
|
||||
if !found || len(params) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
result := make(map[string]any, len(params))
|
||||
keys := make([]string, 0, len(params))
|
||||
for key, value := range params {
|
||||
if slices.Contains(ProtectedParams, key) {
|
||||
continue
|
||||
}
|
||||
result[key] = value
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
if len(result) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
return result, keys
|
||||
}
|
||||
|
||||
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
||||
// and keys sorted for consistent iteration order
|
||||
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
||||
|
||||
@@ -162,6 +162,123 @@ func TestFilters_SanitizedSetParams(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilters_SanitizedSetParamsByID(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setParamsByID map[string]map[string]any
|
||||
requestedModelID string
|
||||
wantParams map[string]any
|
||||
wantKeys []string
|
||||
}{
|
||||
{
|
||||
name: "empty SetParamsByID returns nil",
|
||||
setParamsByID: nil,
|
||||
requestedModelID: "model1",
|
||||
wantParams: nil,
|
||||
wantKeys: nil,
|
||||
},
|
||||
{
|
||||
name: "empty map returns nil",
|
||||
setParamsByID: map[string]map[string]any{},
|
||||
requestedModelID: "model1",
|
||||
wantParams: nil,
|
||||
wantKeys: nil,
|
||||
},
|
||||
{
|
||||
name: "non-matching model ID returns nil",
|
||||
setParamsByID: map[string]map[string]any{
|
||||
"model2": {"temperature": 0.9},
|
||||
},
|
||||
requestedModelID: "model1",
|
||||
wantParams: nil,
|
||||
wantKeys: nil,
|
||||
},
|
||||
{
|
||||
name: "matching model ID returns correct params",
|
||||
setParamsByID: map[string]map[string]any{
|
||||
"model1": {"temperature": 0.7, "top_p": 0.9},
|
||||
"model2": {"temperature": 0.5},
|
||||
},
|
||||
requestedModelID: "model1",
|
||||
wantParams: map[string]any{
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.9,
|
||||
},
|
||||
wantKeys: []string{"temperature", "top_p"},
|
||||
},
|
||||
{
|
||||
name: "protected param model is filtered out",
|
||||
setParamsByID: map[string]map[string]any{
|
||||
"model1": {
|
||||
"model": "should-be-filtered",
|
||||
"temperature": 0.7,
|
||||
},
|
||||
},
|
||||
requestedModelID: "model1",
|
||||
wantParams: map[string]any{
|
||||
"temperature": 0.7,
|
||||
},
|
||||
wantKeys: []string{"temperature"},
|
||||
},
|
||||
{
|
||||
name: "only protected param returns nil",
|
||||
setParamsByID: map[string]map[string]any{
|
||||
"model1": {
|
||||
"model": "should-be-filtered",
|
||||
},
|
||||
},
|
||||
requestedModelID: "model1",
|
||||
wantParams: nil,
|
||||
wantKeys: nil,
|
||||
},
|
||||
{
|
||||
name: "keys are sorted",
|
||||
setParamsByID: map[string]map[string]any{
|
||||
"model1": {
|
||||
"z_param": "z",
|
||||
"a_param": "a",
|
||||
"m_param": "m",
|
||||
},
|
||||
},
|
||||
requestedModelID: "model1",
|
||||
wantParams: map[string]any{
|
||||
"z_param": "z",
|
||||
"a_param": "a",
|
||||
"m_param": "m",
|
||||
},
|
||||
wantKeys: []string{"a_param", "m_param", "z_param"},
|
||||
},
|
||||
{
|
||||
name: "alias style key lookup",
|
||||
setParamsByID: map[string]map[string]any{
|
||||
"model1:high": {"reasoning_effort": "high"},
|
||||
"model1:low": {"reasoning_effort": "low"},
|
||||
},
|
||||
requestedModelID: "model1:high",
|
||||
wantParams: map[string]any{
|
||||
"reasoning_effort": "high",
|
||||
},
|
||||
wantKeys: []string{"reasoning_effort"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f := Filters{SetParamsByID: tt.setParamsByID}
|
||||
gotParams, gotKeys := f.SanitizedSetParamsByID(tt.requestedModelID)
|
||||
|
||||
if tt.wantParams == nil {
|
||||
assert.Nil(t, gotParams)
|
||||
assert.Nil(t, gotKeys)
|
||||
return
|
||||
}
|
||||
|
||||
assert.Equal(t, tt.wantKeys, gotKeys)
|
||||
assert.Equal(t, tt.wantParams, gotParams)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProtectedParams(t *testing.T) {
|
||||
// Verify that "model" is protected
|
||||
assert.Contains(t, ProtectedParams, "model")
|
||||
|
||||
@@ -104,6 +104,62 @@ models:
|
||||
assert.Contains(t, err.Error(), "self-reference")
|
||||
}
|
||||
|
||||
// Test macro substitution in name and description fields
|
||||
func TestConfig_MacroInNameAndDescription(t *testing.T) {
|
||||
content := `
|
||||
startPort: 10000
|
||||
macros:
|
||||
"VARIANT": "Q4_K_M"
|
||||
"FAMILY": "llama"
|
||||
|
||||
models:
|
||||
my-model:
|
||||
cmd: echo ok
|
||||
proxy: http://localhost:8080
|
||||
name: "${FAMILY} ${VARIANT}"
|
||||
description: "A ${FAMILY} model in ${VARIANT} format"
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "llama Q4_K_M", config.Models["my-model"].Name)
|
||||
assert.Equal(t, "A llama model in Q4_K_M format", config.Models["my-model"].Description)
|
||||
}
|
||||
|
||||
// Test MODEL_ID macro in name and description fields
|
||||
func TestConfig_ModelIDInNameAndDescription(t *testing.T) {
|
||||
content := `
|
||||
startPort: 10000
|
||||
models:
|
||||
llama-3b:
|
||||
cmd: echo ok
|
||||
proxy: http://localhost:8080
|
||||
name: "Model: ${MODEL_ID}"
|
||||
description: "Running ${MODEL_ID}"
|
||||
`
|
||||
|
||||
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "Model: llama-3b", config.Models["llama-3b"].Name)
|
||||
assert.Equal(t, "Running llama-3b", config.Models["llama-3b"].Description)
|
||||
}
|
||||
|
||||
// Test unknown macro in name or description returns an error
|
||||
func TestConfig_UnknownMacroInNameDescription(t *testing.T) {
|
||||
content := `
|
||||
startPort: 10000
|
||||
models:
|
||||
test:
|
||||
cmd: echo ok
|
||||
proxy: http://localhost:8080
|
||||
name: "Model ${UNDEFINED}"
|
||||
`
|
||||
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "UNDEFINED")
|
||||
}
|
||||
|
||||
// Test undefined macro reference error
|
||||
func TestConfig_UndefinedMacroReference(t *testing.T) {
|
||||
content := `
|
||||
|
||||
@@ -5,6 +5,10 @@ import (
|
||||
"runtime"
|
||||
)
|
||||
|
||||
const (
|
||||
MODEL_CONFIG_DEFAULT_TTL = -1
|
||||
)
|
||||
|
||||
type ModelConfig struct {
|
||||
Cmd string `yaml:"cmd"`
|
||||
CmdStop string `yaml:"cmdStop"`
|
||||
@@ -47,7 +51,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
Aliases: []string{},
|
||||
Env: []string{},
|
||||
CheckEndpoint: "/health",
|
||||
UnloadAfter: 0,
|
||||
UnloadAfter: MODEL_CONFIG_DEFAULT_TTL, // use GlobalTTL
|
||||
Unlisted: false,
|
||||
UseModelName: "",
|
||||
ConcurrencyLimit: 0,
|
||||
|
||||
@@ -73,6 +73,72 @@ models:
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfig_SetParamsByIDAutoAlias(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
filters:
|
||||
setParamsByID:
|
||||
"${MODEL_ID}:high":
|
||||
reasoning_effort: high
|
||||
"${MODEL_ID}:low":
|
||||
reasoning_effort: low
|
||||
`
|
||||
cfg, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Keys (other than the model's own ID) should be registered as aliases
|
||||
realName, found := cfg.RealModelName("model1:high")
|
||||
assert.True(t, found, "model1:high should be an auto-registered alias")
|
||||
assert.Equal(t, "model1", realName)
|
||||
|
||||
realName, found = cfg.RealModelName("model1:low")
|
||||
assert.True(t, found, "model1:low should be an auto-registered alias")
|
||||
assert.Equal(t, "model1", realName)
|
||||
|
||||
// Auto-aliases should also appear in modelConfig.Aliases
|
||||
aliases := cfg.Models["model1"].Aliases
|
||||
assert.Contains(t, aliases, "model1:high")
|
||||
assert.Contains(t, aliases, "model1:low")
|
||||
}
|
||||
|
||||
func TestConfig_SetParamsByIDAutoAliasConflictWithModelID(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
filters:
|
||||
setParamsByID:
|
||||
model2:
|
||||
reasoning_effort: high
|
||||
model2:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.ErrorContains(t, err, "conflicts with an existing model ID")
|
||||
}
|
||||
|
||||
func TestConfig_SetParamsByIDAutoAliasConflictWithOtherModel(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
model1:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
filters:
|
||||
setParamsByID:
|
||||
"shared-alias":
|
||||
reasoning_effort: high
|
||||
model2:
|
||||
cmd: path/to/cmd --port ${PORT}
|
||||
filters:
|
||||
setParamsByID:
|
||||
"shared-alias":
|
||||
reasoning_effort: low
|
||||
`
|
||||
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||
assert.ErrorContains(t, err, "duplicate alias")
|
||||
}
|
||||
|
||||
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
||||
content := `
|
||||
models:
|
||||
|
||||
@@ -8,6 +8,7 @@ const ConfigFileChangedEventID = 0x03
|
||||
const LogDataEventID = 0x04
|
||||
const TokenMetricsEventID = 0x05
|
||||
const ModelPreloadedEventID = 0x06
|
||||
const InFlightRequestsEventID = 0x07
|
||||
|
||||
type ProcessStateChangeEvent struct {
|
||||
ProcessName string
|
||||
@@ -58,3 +59,11 @@ type ModelPreloadedEvent struct {
|
||||
func (e ModelPreloadedEvent) Type() uint32 {
|
||||
return ModelPreloadedEventID
|
||||
}
|
||||
|
||||
type InFlightRequestsEvent struct {
|
||||
Total int
|
||||
}
|
||||
|
||||
func (e InFlightRequestsEvent) Type() uint32 {
|
||||
return InFlightRequestsEventID
|
||||
}
|
||||
|
||||
+173
-12
@@ -28,6 +28,28 @@ type TokenMetrics struct {
|
||||
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||
TokensPerSecond float64 `json:"tokens_per_second"`
|
||||
DurationMs int `json:"duration_ms"`
|
||||
HasCapture bool `json:"has_capture"`
|
||||
}
|
||||
|
||||
type ReqRespCapture struct {
|
||||
ID int `json:"id"`
|
||||
ReqPath string `json:"req_path"`
|
||||
ReqHeaders map[string]string `json:"req_headers"`
|
||||
ReqBody []byte `json:"req_body"`
|
||||
RespHeaders map[string]string `json:"resp_headers"`
|
||||
RespBody []byte `json:"resp_body"`
|
||||
}
|
||||
|
||||
// Size returns the approximate memory usage of this capture in bytes
|
||||
func (c *ReqRespCapture) Size() int {
|
||||
size := len(c.ReqPath) + len(c.ReqBody) + len(c.RespBody)
|
||||
for k, v := range c.ReqHeaders {
|
||||
size += len(k) + len(v)
|
||||
}
|
||||
for k, v := range c.RespHeaders {
|
||||
size += len(k) + len(v)
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
// TokenMetricsEvent represents a token metrics event
|
||||
@@ -46,19 +68,32 @@ type metricsMonitor struct {
|
||||
maxMetrics int
|
||||
nextID int
|
||||
logger *LogMonitor
|
||||
|
||||
// capture fields
|
||||
enableCaptures bool
|
||||
captures map[int]ReqRespCapture // map for O(1) lookup by ID
|
||||
captureOrder []int // track insertion order for FIFO eviction
|
||||
captureSize int // current total size in bytes
|
||||
maxCaptureSize int // max bytes for captures
|
||||
}
|
||||
|
||||
func newMetricsMonitor(logger *LogMonitor, maxMetrics int) *metricsMonitor {
|
||||
mp := &metricsMonitor{
|
||||
logger: logger,
|
||||
maxMetrics: maxMetrics,
|
||||
// newMetricsMonitor creates a new metricsMonitor. captureBufferMB is the
|
||||
// capture buffer size in megabytes; 0 disables captures.
|
||||
func newMetricsMonitor(logger *LogMonitor, maxMetrics int, captureBufferMB int) *metricsMonitor {
|
||||
return &metricsMonitor{
|
||||
logger: logger,
|
||||
maxMetrics: maxMetrics,
|
||||
enableCaptures: captureBufferMB > 0,
|
||||
captures: make(map[int]ReqRespCapture),
|
||||
captureOrder: make([]int, 0),
|
||||
captureSize: 0,
|
||||
maxCaptureSize: captureBufferMB * 1024 * 1024,
|
||||
}
|
||||
|
||||
return mp
|
||||
}
|
||||
|
||||
// addMetrics adds a new metric to the collection and publishes an event
|
||||
func (mp *metricsMonitor) addMetrics(metric TokenMetrics) {
|
||||
// addMetrics adds a new metric to the collection and publishes an event.
|
||||
// Returns the assigned metric ID.
|
||||
func (mp *metricsMonitor) addMetrics(metric TokenMetrics) int {
|
||||
mp.mu.Lock()
|
||||
defer mp.mu.Unlock()
|
||||
|
||||
@@ -69,6 +104,49 @@ func (mp *metricsMonitor) addMetrics(metric TokenMetrics) {
|
||||
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
|
||||
}
|
||||
event.Emit(TokenMetricsEvent{Metrics: metric})
|
||||
return metric.ID
|
||||
}
|
||||
|
||||
// addCapture adds a new capture to the buffer with size-based eviction.
|
||||
// Captures are skipped if enableCaptures is false or if capture exceeds maxCaptureSize.
|
||||
func (mp *metricsMonitor) addCapture(capture ReqRespCapture) {
|
||||
if !mp.enableCaptures {
|
||||
return
|
||||
}
|
||||
|
||||
mp.mu.Lock()
|
||||
defer mp.mu.Unlock()
|
||||
|
||||
captureSize := capture.Size()
|
||||
if captureSize > mp.maxCaptureSize {
|
||||
mp.logger.Warnf("capture size %d exceeds max %d, skipping", captureSize, mp.maxCaptureSize)
|
||||
return
|
||||
}
|
||||
|
||||
// Evict oldest (FIFO) until room available
|
||||
for mp.captureSize+captureSize > mp.maxCaptureSize && len(mp.captureOrder) > 0 {
|
||||
oldestID := mp.captureOrder[0]
|
||||
mp.captureOrder = mp.captureOrder[1:]
|
||||
if evicted, exists := mp.captures[oldestID]; exists {
|
||||
mp.captureSize -= evicted.Size()
|
||||
delete(mp.captures, oldestID)
|
||||
}
|
||||
}
|
||||
|
||||
mp.captures[capture.ID] = capture
|
||||
mp.captureOrder = append(mp.captureOrder, capture.ID)
|
||||
mp.captureSize += captureSize
|
||||
}
|
||||
|
||||
// getCaptureByID returns a capture by its ID, or nil if not found.
|
||||
func (mp *metricsMonitor) getCaptureByID(id int) *ReqRespCapture {
|
||||
mp.mu.RLock()
|
||||
defer mp.mu.RUnlock()
|
||||
|
||||
if capture, exists := mp.captures[id]; exists {
|
||||
return &capture
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getMetrics returns a copy of the current metrics
|
||||
@@ -97,6 +175,28 @@ func (mp *metricsMonitor) wrapHandler(
|
||||
request *http.Request,
|
||||
next func(modelID string, w http.ResponseWriter, r *http.Request) error,
|
||||
) error {
|
||||
// Capture request body and headers if captures enabled
|
||||
var reqBody []byte
|
||||
var reqHeaders map[string]string
|
||||
if mp.enableCaptures {
|
||||
if request.Body != nil {
|
||||
var err error
|
||||
reqBody, err = io.ReadAll(request.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read request body for capture: %w", err)
|
||||
}
|
||||
request.Body.Close()
|
||||
request.Body = io.NopCloser(bytes.NewBuffer(reqBody))
|
||||
}
|
||||
reqHeaders = make(map[string]string)
|
||||
for key, values := range request.Header {
|
||||
if len(values) > 0 {
|
||||
reqHeaders[key] = values[0]
|
||||
}
|
||||
}
|
||||
redactHeaders(reqHeaders)
|
||||
}
|
||||
|
||||
recorder := newBodyCopier(writer)
|
||||
|
||||
// Filter Accept-Encoding to only include encodings we can decompress for metrics
|
||||
@@ -140,7 +240,6 @@ func (mp *metricsMonitor) wrapHandler(
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
|
||||
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
||||
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||
@@ -153,6 +252,14 @@ func (mp *metricsMonitor) wrapHandler(
|
||||
usage := parsed.Get("usage")
|
||||
timings := parsed.Get("timings")
|
||||
|
||||
// extract timings for infill - response is an array, timings are in the last element
|
||||
// see #463
|
||||
if strings.HasPrefix(request.URL.Path, "/infill") {
|
||||
if arr := parsed.Array(); len(arr) > 0 {
|
||||
timings = arr[len(arr)-1].Get("timings")
|
||||
}
|
||||
}
|
||||
|
||||
if usage.Exists() || timings.Exists() {
|
||||
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
||||
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||
@@ -165,7 +272,38 @@ func (mp *metricsMonitor) wrapHandler(
|
||||
}
|
||||
}
|
||||
|
||||
mp.addMetrics(tm)
|
||||
// Build capture if enabled and determine if it will be stored
|
||||
var capture *ReqRespCapture
|
||||
if mp.enableCaptures {
|
||||
respHeaders := make(map[string]string)
|
||||
for key, values := range recorder.Header() {
|
||||
if len(values) > 0 {
|
||||
respHeaders[key] = values[0]
|
||||
}
|
||||
}
|
||||
redactHeaders(respHeaders)
|
||||
delete(respHeaders, "Content-Encoding")
|
||||
capture = &ReqRespCapture{
|
||||
ReqPath: request.URL.Path,
|
||||
ReqHeaders: reqHeaders,
|
||||
ReqBody: reqBody,
|
||||
RespHeaders: respHeaders,
|
||||
RespBody: body,
|
||||
}
|
||||
// Only set HasCapture if the capture will actually be stored (not too large)
|
||||
if capture.Size() <= mp.maxCaptureSize {
|
||||
tm.HasCapture = true
|
||||
}
|
||||
}
|
||||
|
||||
metricID := mp.addMetrics(tm)
|
||||
|
||||
// Store capture if enabled
|
||||
if capture != nil {
|
||||
capture.ID = metricID
|
||||
mp.addCapture(*capture)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -212,6 +350,11 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
|
||||
usage := parsed.Get("usage")
|
||||
timings := parsed.Get("timings")
|
||||
|
||||
// v1/responses format nests usage under response.usage
|
||||
if !usage.Exists() {
|
||||
usage = parsed.Get("response.usage")
|
||||
}
|
||||
|
||||
if usage.Exists() || timings.Exists() {
|
||||
return parseMetrics(modelID, start, usage, timings)
|
||||
}
|
||||
@@ -336,6 +479,24 @@ func (w *responseBodyCopier) StartTime() time.Time {
|
||||
return w.start
|
||||
}
|
||||
|
||||
// sensitiveHeaders lists headers that should be redacted in captures
|
||||
var sensitiveHeaders = map[string]bool{
|
||||
"authorization": true,
|
||||
"proxy-authorization": true,
|
||||
"cookie": true,
|
||||
"set-cookie": true,
|
||||
"x-api-key": true,
|
||||
}
|
||||
|
||||
// redactHeaders replaces sensitive header values in-place with "[REDACTED]"
|
||||
func redactHeaders(headers map[string]string) {
|
||||
for key := range headers {
|
||||
if sensitiveHeaders[strings.ToLower(key)] {
|
||||
headers[key] = "[REDACTED]"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filterAcceptEncoding filters the Accept-Encoding header to only include
|
||||
// encodings we can decompress (gzip, deflate). This respects the client's
|
||||
// preferences while ensuring we can parse response bodies for metrics.
|
||||
@@ -347,9 +508,9 @@ func filterAcceptEncoding(acceptEncoding string) string {
|
||||
supported := map[string]bool{"gzip": true, "deflate": true}
|
||||
var filtered []string
|
||||
|
||||
for _, part := range strings.Split(acceptEncoding, ",") {
|
||||
for part := range strings.SplitSeq(acceptEncoding, ",") {
|
||||
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
||||
encoding := strings.TrimSpace(strings.Split(part, ";")[0])
|
||||
encoding, _, _ := strings.Cut(strings.TrimSpace(part), ";")
|
||||
if supported[strings.ToLower(encoding)] {
|
||||
filtered = append(filtered, strings.TrimSpace(part))
|
||||
}
|
||||
|
||||
+352
-29
@@ -18,7 +18,7 @@ import (
|
||||
|
||||
func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||
t.Run("adds metrics and assigns ID", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
metric := TokenMetrics{
|
||||
Model: "test-model",
|
||||
@@ -37,7 +37,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("increments ID for each metric", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
mm.addMetrics(TokenMetrics{Model: "model"})
|
||||
@@ -51,7 +51,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("respects max metrics limit", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 3)
|
||||
mm := newMetricsMonitor(testLogger, 3, 0)
|
||||
|
||||
// Add 5 metrics
|
||||
for i := 0; i < 5; i++ {
|
||||
@@ -71,7 +71,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("emits TokenMetricsEvent", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
receivedEvent := make(chan TokenMetricsEvent, 1)
|
||||
cancel := event.On(func(e TokenMetricsEvent) {
|
||||
@@ -101,14 +101,14 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||
|
||||
func TestMetricsMonitor_GetMetrics(t *testing.T) {
|
||||
t.Run("returns empty slice when no metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
metrics := mm.getMetrics()
|
||||
assert.NotNil(t, metrics)
|
||||
assert.Equal(t, 0, len(metrics))
|
||||
})
|
||||
|
||||
t.Run("returns copy of metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
mm.addMetrics(TokenMetrics{Model: "model1"})
|
||||
mm.addMetrics(TokenMetrics{Model: "model2"})
|
||||
|
||||
@@ -128,7 +128,7 @@ func TestMetricsMonitor_GetMetrics(t *testing.T) {
|
||||
|
||||
func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
||||
t.Run("returns valid JSON for empty metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
jsonData, err := mm.getMetricsJSON()
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, jsonData)
|
||||
@@ -140,7 +140,7 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("returns valid JSON with metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
mm.addMetrics(TokenMetrics{
|
||||
Model: "model1",
|
||||
InputTokens: 100,
|
||||
@@ -168,7 +168,7 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
||||
|
||||
func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
||||
t.Run("successful non-streaming request with usage data", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `{
|
||||
"usage": {
|
||||
@@ -199,7 +199,7 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("successful request with timings data", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `{
|
||||
"timings": {
|
||||
@@ -239,7 +239,7 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("streaming request with SSE format", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
// Note: SSE format requires proper line breaks - each data line followed by blank line
|
||||
responseBody := `data: {"choices":[{"text":"Hello"}]}
|
||||
@@ -275,7 +275,7 @@ data: [DONE]
|
||||
})
|
||||
|
||||
t.Run("non-OK status code does not record metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
@@ -295,7 +295,7 @@ data: [DONE]
|
||||
})
|
||||
|
||||
t.Run("empty response body records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
@@ -317,7 +317,7 @@ data: [DONE]
|
||||
})
|
||||
|
||||
t.Run("invalid JSON records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -341,7 +341,7 @@ data: [DONE]
|
||||
})
|
||||
|
||||
t.Run("next handler error is propagated", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
expectedErr := assert.AnError
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
@@ -360,7 +360,7 @@ data: [DONE]
|
||||
})
|
||||
|
||||
t.Run("response without usage or timings records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `{"result": "ok"}`
|
||||
|
||||
@@ -384,6 +384,75 @@ data: [DONE]
|
||||
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||
})
|
||||
|
||||
t.Run("infill request extracts timings from last array element", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
// Infill response is an array with timings in the last element
|
||||
responseBody := `[
|
||||
{"content": "first chunk"},
|
||||
{"content": "second chunk"},
|
||||
{"content": "final", "timings": {
|
||||
"prompt_n": 150,
|
||||
"predicted_n": 75,
|
||||
"prompt_per_second": 200.5,
|
||||
"predicted_per_second": 35.5,
|
||||
"prompt_ms": 600.0,
|
||||
"predicted_ms": 1800.0,
|
||||
"cache_n": 30
|
||||
}}
|
||||
]`
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(responseBody))
|
||||
return nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/infill", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
ginCtx, _ := gin.CreateTestContext(rec)
|
||||
|
||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||
assert.NoError(t, err)
|
||||
|
||||
metrics := mm.getMetrics()
|
||||
assert.Equal(t, 1, len(metrics))
|
||||
assert.Equal(t, "test-model", metrics[0].Model)
|
||||
assert.Equal(t, 150, metrics[0].InputTokens)
|
||||
assert.Equal(t, 75, metrics[0].OutputTokens)
|
||||
assert.Equal(t, 30, metrics[0].CachedTokens)
|
||||
assert.Equal(t, 200.5, metrics[0].PromptPerSecond)
|
||||
assert.Equal(t, 35.5, metrics[0].TokensPerSecond)
|
||||
assert.Equal(t, 2400, metrics[0].DurationMs) // 600 + 1800
|
||||
})
|
||||
|
||||
t.Run("infill request with empty array records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `[]`
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(responseBody))
|
||||
return nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/infill", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
ginCtx, _ := gin.CreateTestContext(rec)
|
||||
|
||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||
assert.NoError(t, err)
|
||||
|
||||
metrics := mm.getMetrics()
|
||||
assert.Equal(t, 1, len(metrics))
|
||||
assert.Equal(t, "test-model", metrics[0].Model)
|
||||
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||
})
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
||||
@@ -437,7 +506,7 @@ func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
||||
|
||||
func TestMetricsMonitor_Concurrent(t *testing.T) {
|
||||
t.Run("concurrent addMetrics is safe", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 1000)
|
||||
mm := newMetricsMonitor(testLogger, 1000, 0)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
numGoroutines := 10
|
||||
@@ -464,7 +533,7 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("concurrent reads and writes are safe", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 100)
|
||||
mm := newMetricsMonitor(testLogger, 100, 0)
|
||||
|
||||
done := make(chan bool)
|
||||
|
||||
@@ -502,7 +571,7 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
|
||||
|
||||
func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
||||
t.Run("prefers timings over usage data", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
// Timings should take precedence over usage
|
||||
responseBody := `{
|
||||
@@ -542,7 +611,7 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("handles missing cache_n in timings", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `{
|
||||
"timings": {
|
||||
@@ -577,7 +646,7 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
||||
|
||||
func TestMetricsMonitor_StreamingResponse(t *testing.T) {
|
||||
t.Run("finds metrics in last valid SSE data", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
// Metrics should be found in the last data line before [DONE]
|
||||
responseBody := `data: {"choices":[{"text":"First"}]}
|
||||
@@ -611,7 +680,7 @@ data: [DONE]
|
||||
})
|
||||
|
||||
t.Run("handles streaming with no valid JSON records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `data: not json
|
||||
|
||||
@@ -640,8 +709,37 @@ data: [DONE]
|
||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||
})
|
||||
|
||||
t.Run("v1/responses format with nested response.usage", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
// v1/responses SSE format: usage is nested under response.usage
|
||||
responseBody := "event: response.completed\n" +
|
||||
`data: {"type":"response.completed","response":{"id":"resp_abc","object":"response","created_at":1773416985,"status":"completed","model":"test-model","output":[],"usage":{"input_tokens":17,"output_tokens":23,"total_tokens":40}}}` +
|
||||
"\n\n"
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(responseBody))
|
||||
return nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/v1/responses", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
ginCtx, _ := gin.CreateTestContext(rec)
|
||||
|
||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||
assert.NoError(t, err)
|
||||
|
||||
metrics := mm.getMetrics()
|
||||
assert.Equal(t, 1, len(metrics))
|
||||
assert.Equal(t, "test-model", metrics[0].Model)
|
||||
assert.Equal(t, 17, metrics[0].InputTokens)
|
||||
assert.Equal(t, 23, metrics[0].OutputTokens)
|
||||
})
|
||||
|
||||
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := ``
|
||||
|
||||
@@ -669,7 +767,7 @@ data: [DONE]
|
||||
|
||||
// Benchmark tests
|
||||
func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
|
||||
mm := newMetricsMonitor(testLogger, 1000)
|
||||
mm := newMetricsMonitor(testLogger, 1000, 0)
|
||||
|
||||
metric := TokenMetrics{
|
||||
Model: "test-model",
|
||||
@@ -690,7 +788,7 @@ func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
|
||||
|
||||
func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
|
||||
// Test performance with a smaller buffer where wrapping occurs more frequently
|
||||
mm := newMetricsMonitor(testLogger, 100)
|
||||
mm := newMetricsMonitor(testLogger, 100, 0)
|
||||
|
||||
metric := TokenMetrics{
|
||||
Model: "test-model",
|
||||
@@ -711,7 +809,7 @@ func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
|
||||
|
||||
func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
||||
t.Run("gzip encoded response", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
||||
|
||||
@@ -745,7 +843,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("deflate encoded response", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `{"usage": {"prompt_tokens": 200, "completion_tokens": 75}}`
|
||||
|
||||
@@ -779,7 +877,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("invalid gzip data records minimal metrics", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
// Invalid compressed data
|
||||
invalidData := []byte("this is not gzip data")
|
||||
@@ -807,7 +905,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("unknown encoding treated as uncompressed", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10)
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
responseBody := `{"usage": {"prompt_tokens": 300, "completion_tokens": 100}}`
|
||||
|
||||
@@ -832,3 +930,228 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
||||
assert.Equal(t, 100, metrics[0].OutputTokens)
|
||||
})
|
||||
}
|
||||
|
||||
func TestReqRespCapture_Size(t *testing.T) {
|
||||
t.Run("calculates size correctly", func(t *testing.T) {
|
||||
capture := ReqRespCapture{
|
||||
ID: 1,
|
||||
ReqPath: "/v1/chat/completions", // 20 bytes
|
||||
ReqHeaders: map[string]string{
|
||||
"Content-Type": "application/json", // 12 + 16 = 28
|
||||
},
|
||||
ReqBody: []byte("request body"), // 12 bytes
|
||||
RespHeaders: map[string]string{
|
||||
"X-Test": "value", // 6 + 5 = 11
|
||||
},
|
||||
RespBody: []byte("response body"), // 13 bytes
|
||||
}
|
||||
|
||||
// Expected: 20 + 12 + 13 + 28 + 11 = 84
|
||||
assert.Equal(t, 84, capture.Size())
|
||||
})
|
||||
|
||||
t.Run("handles empty capture", func(t *testing.T) {
|
||||
capture := ReqRespCapture{}
|
||||
assert.Equal(t, 0, capture.Size())
|
||||
})
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_AddCapture(t *testing.T) {
|
||||
t.Run("does nothing when captures disabled", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
capture := ReqRespCapture{
|
||||
ID: 0,
|
||||
ReqBody: []byte("test"),
|
||||
}
|
||||
mm.addCapture(capture)
|
||||
|
||||
// Should not store capture
|
||||
assert.Nil(t, mm.getCaptureByID(0))
|
||||
})
|
||||
|
||||
t.Run("adds capture when enabled", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||
|
||||
capture := ReqRespCapture{
|
||||
ID: 0,
|
||||
ReqBody: []byte("test request"),
|
||||
RespBody: []byte("test response"),
|
||||
}
|
||||
mm.addCapture(capture)
|
||||
|
||||
retrieved := mm.getCaptureByID(0)
|
||||
assert.NotNil(t, retrieved)
|
||||
assert.Equal(t, 0, retrieved.ID)
|
||||
assert.Equal(t, []byte("test request"), retrieved.ReqBody)
|
||||
assert.Equal(t, []byte("test response"), retrieved.RespBody)
|
||||
})
|
||||
|
||||
t.Run("evicts oldest when exceeding max size", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||
mm.maxCaptureSize = 100 // Set small limit for test
|
||||
|
||||
// Add captures that will exceed the limit
|
||||
capture1 := ReqRespCapture{ID: 0, ReqBody: make([]byte, 40)}
|
||||
capture2 := ReqRespCapture{ID: 1, ReqBody: make([]byte, 40)}
|
||||
capture3 := ReqRespCapture{ID: 2, ReqBody: make([]byte, 40)}
|
||||
|
||||
mm.addCapture(capture1)
|
||||
mm.addCapture(capture2)
|
||||
// Adding capture3 should evict capture1
|
||||
mm.addCapture(capture3)
|
||||
|
||||
assert.Nil(t, mm.getCaptureByID(0), "capture 0 should be evicted")
|
||||
assert.NotNil(t, mm.getCaptureByID(1), "capture 1 should exist")
|
||||
assert.NotNil(t, mm.getCaptureByID(2), "capture 2 should exist")
|
||||
})
|
||||
|
||||
t.Run("skips capture larger than max size", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||
mm.maxCaptureSize = 100
|
||||
|
||||
// Add a capture larger than max
|
||||
largeCapture := ReqRespCapture{ID: 0, ReqBody: make([]byte, 200)}
|
||||
mm.addCapture(largeCapture)
|
||||
|
||||
assert.Nil(t, mm.getCaptureByID(0), "oversized capture should not be stored")
|
||||
})
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_GetCaptureByID(t *testing.T) {
|
||||
t.Run("returns nil for non-existent ID", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||
|
||||
assert.Nil(t, mm.getCaptureByID(999))
|
||||
})
|
||||
|
||||
t.Run("returns capture by ID", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||
|
||||
capture := ReqRespCapture{
|
||||
ID: 42,
|
||||
ReqBody: []byte("test"),
|
||||
}
|
||||
mm.addCapture(capture)
|
||||
|
||||
retrieved := mm.getCaptureByID(42)
|
||||
assert.NotNil(t, retrieved)
|
||||
assert.Equal(t, 42, retrieved.ID)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRedactHeaders(t *testing.T) {
|
||||
t.Run("redacts sensitive headers", func(t *testing.T) {
|
||||
headers := map[string]string{
|
||||
"Authorization": "Bearer secret-token",
|
||||
"Proxy-Authorization": "Basic creds",
|
||||
"Cookie": "session=abc123",
|
||||
"Set-Cookie": "session=xyz789",
|
||||
"X-Api-Key": "sk-12345",
|
||||
"Content-Type": "application/json",
|
||||
"X-Custom": "safe-value",
|
||||
}
|
||||
|
||||
redactHeaders(headers)
|
||||
|
||||
assert.Equal(t, "[REDACTED]", headers["Authorization"])
|
||||
assert.Equal(t, "[REDACTED]", headers["Proxy-Authorization"])
|
||||
assert.Equal(t, "[REDACTED]", headers["Cookie"])
|
||||
assert.Equal(t, "[REDACTED]", headers["Set-Cookie"])
|
||||
assert.Equal(t, "[REDACTED]", headers["X-Api-Key"])
|
||||
assert.Equal(t, "application/json", headers["Content-Type"])
|
||||
assert.Equal(t, "safe-value", headers["X-Custom"])
|
||||
})
|
||||
|
||||
t.Run("handles mixed case header names", func(t *testing.T) {
|
||||
headers := map[string]string{
|
||||
"authorization": "Bearer token",
|
||||
"COOKIE": "session=abc",
|
||||
"x-api-key": "key123",
|
||||
}
|
||||
|
||||
redactHeaders(headers)
|
||||
|
||||
assert.Equal(t, "[REDACTED]", headers["authorization"])
|
||||
assert.Equal(t, "[REDACTED]", headers["COOKIE"])
|
||||
assert.Equal(t, "[REDACTED]", headers["x-api-key"])
|
||||
})
|
||||
|
||||
t.Run("handles empty headers", func(t *testing.T) {
|
||||
headers := map[string]string{}
|
||||
redactHeaders(headers)
|
||||
assert.Empty(t, headers)
|
||||
})
|
||||
}
|
||||
|
||||
func TestMetricsMonitor_WrapHandler_Capture(t *testing.T) {
|
||||
t.Run("captures request and response when enabled", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||
|
||||
requestBody := `{"model": "test", "prompt": "hello"}`
|
||||
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("X-Custom", "header-value")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(responseBody))
|
||||
return nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer secret")
|
||||
rec := httptest.NewRecorder()
|
||||
ginCtx, _ := gin.CreateTestContext(rec)
|
||||
|
||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Check metric was recorded
|
||||
metrics := mm.getMetrics()
|
||||
assert.Equal(t, 1, len(metrics))
|
||||
metricID := metrics[0].ID
|
||||
|
||||
// Check capture was stored with same ID
|
||||
capture := mm.getCaptureByID(metricID)
|
||||
assert.NotNil(t, capture)
|
||||
assert.Equal(t, metricID, capture.ID)
|
||||
assert.Equal(t, []byte(requestBody), capture.ReqBody)
|
||||
assert.Equal(t, []byte(responseBody), capture.RespBody)
|
||||
assert.Equal(t, "/test", capture.ReqPath)
|
||||
assert.Equal(t, "application/json", capture.ReqHeaders["Content-Type"])
|
||||
assert.Equal(t, "[REDACTED]", capture.ReqHeaders["Authorization"])
|
||||
assert.Equal(t, "application/json", capture.RespHeaders["Content-Type"])
|
||||
assert.Equal(t, "header-value", capture.RespHeaders["X-Custom"])
|
||||
})
|
||||
|
||||
t.Run("does not capture when disabled", func(t *testing.T) {
|
||||
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||
|
||||
requestBody := `{"model": "test"}`
|
||||
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
||||
|
||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(responseBody))
|
||||
return nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
|
||||
rec := httptest.NewRecorder()
|
||||
ginCtx, _ := gin.CreateTestContext(rec)
|
||||
|
||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Metrics should still be recorded
|
||||
metrics := mm.getMetrics()
|
||||
assert.Equal(t, 1, len(metrics))
|
||||
|
||||
// But no capture
|
||||
capture := mm.getCaptureByID(metrics[0].ID)
|
||||
assert.Nil(t, capture)
|
||||
})
|
||||
}
|
||||
|
||||
+10
-10
@@ -117,12 +117,12 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
|
||||
}
|
||||
|
||||
expectedMessage := "I_sense_imminent_danger"
|
||||
config := getTestSimpleResponderConfig(expectedMessage)
|
||||
assert.Equal(t, 0, config.UnloadAfter)
|
||||
config.UnloadAfter = 3 // seconds
|
||||
assert.Equal(t, 3, config.UnloadAfter)
|
||||
conf := getTestSimpleResponderConfig(expectedMessage)
|
||||
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||
conf.UnloadAfter = 3 // seconds
|
||||
assert.Equal(t, 3, conf.UnloadAfter)
|
||||
|
||||
process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger)
|
||||
process := NewProcess("ttl_test", 2, conf, debugLogger, debugLogger)
|
||||
defer process.Stop()
|
||||
|
||||
// this should take 4 seconds
|
||||
@@ -159,12 +159,12 @@ func TestProcess_LowTTLValue(t *testing.T) {
|
||||
t.Skip("skipping test, edit process_test.go to run it ")
|
||||
}
|
||||
|
||||
config := getTestSimpleResponderConfig("fast_ttl")
|
||||
assert.Equal(t, 0, config.UnloadAfter)
|
||||
config.UnloadAfter = 1 // second
|
||||
assert.Equal(t, 1, config.UnloadAfter)
|
||||
conf := getTestSimpleResponderConfig("fast_ttl")
|
||||
assert.Equal(t, config.MODEL_CONFIG_DEFAULT_TTL, conf.UnloadAfter)
|
||||
conf.UnloadAfter = 1 // second
|
||||
assert.Equal(t, 1, conf.UnloadAfter)
|
||||
|
||||
process := NewProcess("ttl", 2, config, debugLogger, debugLogger)
|
||||
process := NewProcess("ttl", 2, conf, debugLogger, debugLogger)
|
||||
defer process.Stop()
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
|
||||
+77
-20
@@ -28,6 +28,40 @@ const (
|
||||
|
||||
type proxyCtxKey string
|
||||
|
||||
type InflightCounter struct {
|
||||
mu sync.Mutex
|
||||
total int
|
||||
}
|
||||
|
||||
func newInflightCounter() *InflightCounter {
|
||||
return &InflightCounter{}
|
||||
}
|
||||
|
||||
func (ic *InflightCounter) Current() int {
|
||||
ic.mu.Lock()
|
||||
total := ic.total
|
||||
ic.mu.Unlock()
|
||||
return total
|
||||
}
|
||||
|
||||
func (ic *InflightCounter) Increment() int {
|
||||
ic.mu.Lock()
|
||||
ic.total++
|
||||
total := ic.total
|
||||
ic.mu.Unlock()
|
||||
return total
|
||||
}
|
||||
|
||||
func (ic *InflightCounter) Decrement() int {
|
||||
ic.mu.Lock()
|
||||
if ic.total > 0 {
|
||||
ic.total--
|
||||
}
|
||||
total := ic.total
|
||||
ic.mu.Unlock()
|
||||
return total
|
||||
}
|
||||
|
||||
type ProxyManager struct {
|
||||
sync.Mutex
|
||||
|
||||
@@ -43,6 +77,8 @@ type ProxyManager struct {
|
||||
|
||||
processGroups map[string]*ProcessGroup
|
||||
|
||||
inFlightCounter *InflightCounter
|
||||
|
||||
// shutdown signaling
|
||||
shutdownCtx context.Context
|
||||
shutdownCancel context.CancelFunc
|
||||
@@ -151,10 +187,12 @@ func New(proxyConfig config.Config) *ProxyManager {
|
||||
muxLogger: muxLogger,
|
||||
upstreamLogger: upstreamLogger,
|
||||
|
||||
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
|
||||
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics, proxyConfig.CaptureBuffer),
|
||||
|
||||
processGroups: make(map[string]*ProcessGroup),
|
||||
|
||||
inFlightCounter: newInflightCounter(),
|
||||
|
||||
shutdownCtx: shutdownCtx,
|
||||
shutdownCancel: shutdownCancel,
|
||||
|
||||
@@ -276,37 +314,37 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
|
||||
// Set up routes using the Gin engine
|
||||
// Protected routes use pm.apiKeyAuth() middleware
|
||||
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
// Support legacy /v1/completions api, see issue #12
|
||||
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
||||
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
// Support anthropic count_tokens API (Also added in the above PR)
|
||||
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
|
||||
// Support embeddings and reranking
|
||||
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
|
||||
// llama-server's /reranking endpoint + aliases
|
||||
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
|
||||
// llama-server's /infill endpoint for code infilling
|
||||
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
|
||||
// llama-server's /completion endpoint
|
||||
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
|
||||
// Support audio/speech endpoint
|
||||
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyGETModelHandler)
|
||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler)
|
||||
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler)
|
||||
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
|
||||
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
||||
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
|
||||
|
||||
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
|
||||
|
||||
@@ -325,7 +363,7 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
pm.ginEngine.GET("/upstream", func(c *gin.Context) {
|
||||
c.Redirect(http.StatusFound, "/ui/models")
|
||||
})
|
||||
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.proxyToUpstream)
|
||||
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyToUpstream)
|
||||
pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler)
|
||||
pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler)
|
||||
pm.ginEngine.GET("/health", func(c *gin.Context) {
|
||||
@@ -389,6 +427,14 @@ func (pm *ProxyManager) setupGinEngine() {
|
||||
gin.DisableConsoleColor()
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) trackInflight() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Increment()})
|
||||
defer event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Decrement()})
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
// ServeHTTP implements http.Handler interface
|
||||
func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
pm.ginEngine.ServeHTTP(w, r)
|
||||
@@ -674,6 +720,17 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// setParamsByID: set params based on the requested model ID (runs after setParams, can override it)
|
||||
setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel)
|
||||
for _, key := range setParamsByIDKeys {
|
||||
pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key)
|
||||
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key])
|
||||
if err != nil {
|
||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
||||
nextHandler = processGroup.ProxyRequest
|
||||
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
@@ -13,12 +14,13 @@ import (
|
||||
)
|
||||
|
||||
type Model struct {
|
||||
Id string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
State string `json:"state"`
|
||||
Unlisted bool `json:"unlisted"`
|
||||
PeerID string `json:"peerID"`
|
||||
Id string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
State string `json:"state"`
|
||||
Unlisted bool `json:"unlisted"`
|
||||
PeerID string `json:"peerID"`
|
||||
Aliases []string `json:"aliases,omitempty"`
|
||||
}
|
||||
|
||||
func addApiHandlers(pm *ProxyManager) {
|
||||
@@ -31,6 +33,7 @@ func addApiHandlers(pm *ProxyManager) {
|
||||
apiGroup.GET("/events", pm.apiSendEvents)
|
||||
apiGroup.GET("/metrics", pm.apiGetMetrics)
|
||||
apiGroup.GET("/version", pm.apiGetVersion)
|
||||
apiGroup.GET("/captures/:id", pm.apiGetCapture)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,6 +84,7 @@ func (pm *ProxyManager) getModelStatus() []Model {
|
||||
Description: pm.config.Models[modelID].Description,
|
||||
State: state,
|
||||
Unlisted: pm.config.Models[modelID].Unlisted,
|
||||
Aliases: pm.config.Models[modelID].Aliases,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -105,6 +109,7 @@ const (
|
||||
msgTypeModelStatus messageType = "modelStatus"
|
||||
msgTypeLogData messageType = "logData"
|
||||
msgTypeMetrics messageType = "metrics"
|
||||
msgTypeInFlight messageType = "inflight"
|
||||
)
|
||||
|
||||
type messageEnvelope struct {
|
||||
@@ -164,6 +169,18 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
sendInFlight := func(total int) {
|
||||
jsonData, err := json.Marshal(gin.H{"total": total})
|
||||
if err == nil {
|
||||
select {
|
||||
case sendBuffer <- messageEnvelope{Type: msgTypeInFlight, Data: string(jsonData)}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send updated models list
|
||||
*/
|
||||
@@ -191,11 +208,19 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
|
||||
sendMetrics([]TokenMetrics{e.Metrics})
|
||||
})()
|
||||
|
||||
/**
|
||||
* Send in-flight request stats related to token stats "Waiting: N" count.
|
||||
*/
|
||||
defer event.On(func(e InFlightRequestsEvent) {
|
||||
sendInFlight(e.Total)
|
||||
})()
|
||||
|
||||
// send initial batch of data
|
||||
sendLogData("proxy", pm.proxyLogger.GetHistory())
|
||||
sendLogData("upstream", pm.upstreamLogger.GetHistory())
|
||||
sendModels()
|
||||
sendMetrics(pm.metricsMonitor.getMetrics())
|
||||
sendInFlight(pm.inFlightCounter.Current())
|
||||
|
||||
for {
|
||||
select {
|
||||
@@ -250,3 +275,20 @@ func (pm *ProxyManager) apiGetVersion(c *gin.Context) {
|
||||
"build_date": pm.buildDate,
|
||||
})
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
|
||||
idStr := c.Param("id")
|
||||
id, err := strconv.Atoi(idStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid capture ID"})
|
||||
return
|
||||
}
|
||||
|
||||
capture := pm.metricsMonitor.getCaptureByID(id)
|
||||
if capture == nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, capture)
|
||||
}
|
||||
|
||||
@@ -730,7 +730,7 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
|
||||
// Verify extended fields are present
|
||||
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
||||
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
||||
assert.Equal(t, 0, response.Running[0].TTL, "ttl should default to 0")
|
||||
assert.Equal(t, -1, response.Running[0].TTL, "ttl should default to -1 (use globalTTL)")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1046,6 +1046,61 @@ func TestProxyManager_FiltersStripParams(t *testing.T) {
|
||||
// t.Logf("%v", response)
|
||||
}
|
||||
|
||||
func TestProxyManager_FiltersSetParamsByID(t *testing.T) {
|
||||
// no explicit aliases — setParamsByID keys are auto-registered as aliases
|
||||
configStr := strings.Replace(`
|
||||
logLevel: error
|
||||
models:
|
||||
model1:
|
||||
cmd: 'SRPATH --port ${PORT} --silent --respond model1'
|
||||
proxy: "http://127.0.0.1:${PORT}"
|
||||
filters:
|
||||
setParams:
|
||||
reasoning_effort: medium
|
||||
setParamsByID:
|
||||
"${MODEL_ID}:high":
|
||||
reasoning_effort: high
|
||||
"${MODEL_ID}:low":
|
||||
reasoning_effort: low
|
||||
`, "SRPATH", simpleResponderPath, -1)
|
||||
|
||||
cfg, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||
if !assert.NoError(t, err, "invalid test configuration") {
|
||||
return
|
||||
}
|
||||
|
||||
proxy := New(cfg)
|
||||
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||
|
||||
tests := []struct {
|
||||
requestedModel string
|
||||
wantEffort string
|
||||
}{
|
||||
// setParams applies, no setParamsByID match
|
||||
{requestedModel: "model1", wantEffort: "medium"},
|
||||
// setParamsByID overrides setParams
|
||||
{requestedModel: "model1:high", wantEffort: "high"},
|
||||
{requestedModel: "model1:low", wantEffort: "low"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.requestedModel, func(t *testing.T) {
|
||||
reqBody := fmt.Sprintf(`{"model":%q}`, tt.requestedModel)
|
||||
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||
w := CreateTestResponseRecorder()
|
||||
proxy.ServeHTTP(w, req)
|
||||
assert.Equal(t, http.StatusOK, w.Code)
|
||||
|
||||
var response map[string]interface{}
|
||||
assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response))
|
||||
|
||||
requestBody, _ := response["request_body"].(string)
|
||||
gotEffort := gjson.Get(requestBody, "reasoning_effort").String()
|
||||
assert.Equal(t, tt.wantEffort, gotEffort, "reasoning_effort mismatch for model %s", tt.requestedModel)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyManager_HealthEndpoint(t *testing.T) {
|
||||
config := config.AddDefaultGroupToConfig(config.Config{
|
||||
HealthCheckTimeout: 15,
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
legacy-peer-deps=true
|
||||
Generated
+976
-1179
File diff suppressed because it is too large
Load Diff
@@ -12,18 +12,18 @@
|
||||
"test:watch": "vitest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
||||
"@sveltejs/vite-plugin-svelte": "^7.0.0",
|
||||
"@tailwindcss/vite": "^4.1.8",
|
||||
"@tsconfig/svelte": "^5.0.4",
|
||||
"@types/hast": "^3.0.4",
|
||||
"@types/node": "^25.1.0",
|
||||
"svelte": "^5.19.0",
|
||||
"svelte": "^5.46.4",
|
||||
"svelte-check": "^4.1.4",
|
||||
"tailwindcss": "^4.1.8",
|
||||
"typescript": "~5.8.3",
|
||||
"vite": "^6.3.5",
|
||||
"vite-plugin-compression2": "^2.4.0",
|
||||
"vitest": "^4.0.18"
|
||||
"vite": "^8.0.0",
|
||||
"vite-plugin-compression2": "^2.5.1",
|
||||
"vitest": "^4.1.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"highlight.js": "^11.11.1",
|
||||
|
||||
@@ -6,23 +6,28 @@
|
||||
import Models from "./routes/Models.svelte";
|
||||
import Activity from "./routes/Activity.svelte";
|
||||
import Playground from "./routes/Playground.svelte";
|
||||
import PlaygroundStub from "./routes/PlaygroundStub.svelte";
|
||||
import { enableAPIEvents } from "./stores/api";
|
||||
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
||||
import { currentRoute } from "./stores/route";
|
||||
|
||||
const routes = {
|
||||
"/": Playground,
|
||||
"/": PlaygroundStub,
|
||||
"/models": Models,
|
||||
"/logs": LogViewer,
|
||||
"/activity": Activity,
|
||||
"*": Playground,
|
||||
"*": PlaygroundStub,
|
||||
};
|
||||
|
||||
// Sync theme to document attribute
|
||||
function handleRouteLoaded(event: { detail: { route: string | RegExp } }) {
|
||||
const route = event.detail.route;
|
||||
currentRoute.set(typeof route === "string" ? route : "/");
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
|
||||
});
|
||||
|
||||
// Sync title to document
|
||||
$effect(() => {
|
||||
const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}";
|
||||
document.title = `${icon} ${$appTitle}`;
|
||||
@@ -43,6 +48,11 @@
|
||||
<Header />
|
||||
|
||||
<main class="flex-1 overflow-auto p-4">
|
||||
<Router {routes} />
|
||||
<div class="h-full" class:hidden={$currentRoute !== "/"}>
|
||||
<Playground />
|
||||
</div>
|
||||
<div class="h-full" class:hidden={$currentRoute === "/"}>
|
||||
<Router {routes} on:routeLoaded={handleRouteLoaded} />
|
||||
</div>
|
||||
</main>
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,452 @@
|
||||
<script lang="ts">
|
||||
import type { ReqRespCapture } from "../lib/types";
|
||||
|
||||
interface Props {
|
||||
capture: ReqRespCapture | null;
|
||||
open: boolean;
|
||||
onclose: () => void;
|
||||
}
|
||||
|
||||
let { capture, open, onclose }: Props = $props();
|
||||
|
||||
let dialogEl: HTMLDialogElement | undefined = $state();
|
||||
|
||||
type BodyTab = "raw" | "pretty" | "chat";
|
||||
let reqBodyTab: BodyTab = $state("pretty");
|
||||
let respBodyTab: BodyTab = $state("pretty");
|
||||
let copiedReq = $state(false);
|
||||
let copiedResp = $state(false);
|
||||
|
||||
$effect(() => {
|
||||
if (open && dialogEl) {
|
||||
dialogEl.showModal();
|
||||
} else if (!open && dialogEl) {
|
||||
dialogEl.close();
|
||||
}
|
||||
});
|
||||
|
||||
// Reset tabs when capture changes
|
||||
$effect(() => {
|
||||
if (capture) {
|
||||
const reqCt = getContentType(capture.req_headers);
|
||||
const respCt = getContentType(capture.resp_headers);
|
||||
reqBodyTab = reqCt.includes("json") ? "pretty" : "raw";
|
||||
respBodyTab = respCt.includes("text/event-stream")
|
||||
? "chat"
|
||||
: respCt.includes("json")
|
||||
? "pretty"
|
||||
: "raw";
|
||||
}
|
||||
});
|
||||
|
||||
function handleDialogClose() {
|
||||
onclose();
|
||||
}
|
||||
|
||||
function decodeBody(body: string | null | undefined): string {
|
||||
if (!body) return "";
|
||||
try {
|
||||
const binary = atob(body);
|
||||
const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
|
||||
return new TextDecoder().decode(bytes);
|
||||
} catch {
|
||||
return body;
|
||||
}
|
||||
}
|
||||
|
||||
function formatJson(str: string): string {
|
||||
try {
|
||||
const parsed = JSON.parse(str);
|
||||
return JSON.stringify(parsed, null, 2);
|
||||
} catch {
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
function getContentType(
|
||||
headers: Record<string, string> | null | undefined,
|
||||
): string {
|
||||
if (!headers) return "";
|
||||
const ct = headers["Content-Type"] || headers["content-type"] || "";
|
||||
return ct.toLowerCase();
|
||||
}
|
||||
|
||||
function isImageContentType(contentType: string): boolean {
|
||||
return contentType.startsWith("image/");
|
||||
}
|
||||
|
||||
function isTextContentType(contentType: string): boolean {
|
||||
return (
|
||||
contentType.startsWith("text/") ||
|
||||
contentType.includes("application/json") ||
|
||||
contentType.includes("application/xml") ||
|
||||
contentType.includes("application/javascript")
|
||||
);
|
||||
}
|
||||
|
||||
function getImageDataUrl(body: string, contentType: string): string {
|
||||
const mimeType = contentType.split(";")[0].trim();
|
||||
return `data:${mimeType};base64,${body}`;
|
||||
}
|
||||
|
||||
interface SSEChat {
|
||||
reasoning: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
function parseSSEChat(text: string): SSEChat {
|
||||
const result: SSEChat = { reasoning: "", content: "" };
|
||||
for (const line of text.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
||||
const data = trimmed.slice(6);
|
||||
if (data === "[DONE]") continue;
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
const delta = parsed.choices?.[0]?.delta;
|
||||
if (delta?.content) result.content += delta.content;
|
||||
if (delta?.reasoning_content) result.reasoning += delta.reasoning_content;
|
||||
} catch {
|
||||
// skip unparseable lines
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async function copyToClipboard(text: string, type: "req" | "resp") {
|
||||
try {
|
||||
await navigator.clipboard.writeText(text);
|
||||
if (type === "req") {
|
||||
copiedReq = true;
|
||||
setTimeout(() => (copiedReq = false), 1500);
|
||||
} else {
|
||||
copiedResp = true;
|
||||
setTimeout(() => (copiedResp = false), 1500);
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
function getCopyText(): string {
|
||||
if (respBodyTab === "chat") {
|
||||
let text = "";
|
||||
if (sseChat.reasoning) text += sseChat.reasoning + "\n\n";
|
||||
text += sseChat.content;
|
||||
return text;
|
||||
}
|
||||
return displayedResponseBody;
|
||||
}
|
||||
|
||||
// Request body derivations
|
||||
let requestContentType = $derived(
|
||||
capture ? getContentType(capture.req_headers) : "",
|
||||
);
|
||||
let isRequestJson = $derived(requestContentType.includes("json"));
|
||||
|
||||
let requestBodyRaw = $derived.by(() => {
|
||||
if (!capture) return "";
|
||||
return decodeBody(capture.req_body);
|
||||
});
|
||||
|
||||
let requestBodyPretty = $derived.by(() => {
|
||||
if (!isRequestJson) return requestBodyRaw;
|
||||
return formatJson(requestBodyRaw);
|
||||
});
|
||||
|
||||
let displayedRequestBody = $derived(
|
||||
reqBodyTab === "pretty" ? requestBodyPretty : requestBodyRaw,
|
||||
);
|
||||
|
||||
// Response body derivations
|
||||
let responseContentType = $derived(
|
||||
capture ? getContentType(capture.resp_headers) : "",
|
||||
);
|
||||
let isResponseImage = $derived(isImageContentType(responseContentType));
|
||||
let isResponseText = $derived(isTextContentType(responseContentType));
|
||||
let isResponseJson = $derived(responseContentType.includes("json"));
|
||||
let isSSE = $derived(responseContentType.includes("text/event-stream"));
|
||||
|
||||
let responseBodyRaw = $derived.by(() => {
|
||||
if (!capture) return "";
|
||||
return decodeBody(capture.resp_body);
|
||||
});
|
||||
|
||||
let responseBodyPretty = $derived.by(() => {
|
||||
if (!isResponseJson) return responseBodyRaw;
|
||||
return formatJson(responseBodyRaw);
|
||||
});
|
||||
|
||||
let sseChat = $derived.by(() => {
|
||||
if (!isSSE || !responseBodyRaw)
|
||||
return { reasoning: "", content: "" } as SSEChat;
|
||||
return parseSSEChat(responseBodyRaw);
|
||||
});
|
||||
|
||||
let displayedResponseBody = $derived.by(() => {
|
||||
if (respBodyTab === "pretty") return responseBodyPretty;
|
||||
return responseBodyRaw;
|
||||
});
|
||||
</script>
|
||||
|
||||
<dialog
|
||||
bind:this={dialogEl}
|
||||
onclose={handleDialogClose}
|
||||
class="bg-surface text-txtmain rounded-lg shadow-xl max-w-4xl w-full max-h-[90vh] p-0 backdrop:bg-black/50 m-auto"
|
||||
>
|
||||
{#if capture}
|
||||
<div class="flex flex-col max-h-[90vh]">
|
||||
<div
|
||||
class="flex justify-between items-center p-4 border-b border-card-border"
|
||||
>
|
||||
<h2 class="text-xl font-bold pb-0">Capture #{capture.id + 1}{#if capture.req_path} <span class="text-base font-mono font-normal text-txtsecondary">{capture.req_path}</span>{/if}</h2>
|
||||
<button
|
||||
onclick={() => dialogEl?.close()}
|
||||
class="text-txtsecondary hover:text-txtmain text-2xl leading-none"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="overflow-y-auto flex-1 p-4 space-y-4">
|
||||
<!-- Request Headers -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
>
|
||||
Request Headers
|
||||
</summary>
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
|
||||
>
|
||||
<table class="w-full text-sm">
|
||||
<tbody>
|
||||
{#each Object.entries(capture.req_headers || {}) as [key, value]}
|
||||
<tr class="border-b border-card-border-inner last:border-0">
|
||||
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
|
||||
>{key}</td
|
||||
>
|
||||
<td class="px-3 py-1 font-mono break-all">{value}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<!-- Request Body -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
>
|
||||
Request Body
|
||||
</summary>
|
||||
{#if requestBodyRaw}
|
||||
<div class="mt-2 flex items-center justify-between">
|
||||
<div class="flex gap-1">
|
||||
{#if isRequestJson}
|
||||
<button
|
||||
class="tab-btn"
|
||||
class:tab-btn-active={reqBodyTab === "pretty"}
|
||||
onclick={() => (reqBodyTab = "pretty")}>Pretty</button
|
||||
>
|
||||
<button
|
||||
class="tab-btn"
|
||||
class:tab-btn-active={reqBodyTab === "raw"}
|
||||
onclick={() => (reqBodyTab = "raw")}>Raw</button
|
||||
>
|
||||
{/if}
|
||||
</div>
|
||||
<button
|
||||
class="tab-btn"
|
||||
onclick={() =>
|
||||
copyToClipboard(displayedRequestBody, "req")}
|
||||
>
|
||||
{#if copiedReq}
|
||||
Copied!
|
||||
{:else}
|
||||
Copy
|
||||
{/if}
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
>
|
||||
<pre
|
||||
class="p-3 text-sm font-mono whitespace-pre-wrap break-all">{displayedRequestBody}</pre>
|
||||
</div>
|
||||
{:else}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
>
|
||||
<pre class="p-3 text-sm font-mono whitespace-pre-wrap break-all"
|
||||
>(empty)</pre
|
||||
>
|
||||
</div>
|
||||
{/if}
|
||||
</details>
|
||||
|
||||
<!-- Response Headers -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
>
|
||||
Response Headers
|
||||
</summary>
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
|
||||
>
|
||||
<table class="w-full text-sm">
|
||||
<tbody>
|
||||
{#each Object.entries(capture.resp_headers || {}) as [key, value]}
|
||||
<tr class="border-b border-card-border-inner last:border-0">
|
||||
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
|
||||
>{key}</td
|
||||
>
|
||||
<td class="px-3 py-1 font-mono break-all">{value}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<!-- Response Body -->
|
||||
<details class="group" open>
|
||||
<summary
|
||||
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||
>
|
||||
Response Body
|
||||
</summary>
|
||||
{#if isResponseImage && capture.resp_body}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
>
|
||||
<div class="p-3 flex justify-center">
|
||||
<img
|
||||
src={getImageDataUrl(capture.resp_body, responseContentType)}
|
||||
alt="Response"
|
||||
class="max-w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
{:else if isSSE || isResponseText}
|
||||
<div class="mt-2 flex items-center justify-between">
|
||||
<div class="flex gap-1">
|
||||
{#if isSSE}
|
||||
<button
|
||||
class="tab-btn"
|
||||
class:tab-btn-active={respBodyTab === "chat"}
|
||||
onclick={() => (respBodyTab = "chat")}>Chat</button
|
||||
>
|
||||
{/if}
|
||||
{#if isResponseJson}
|
||||
<button
|
||||
class="tab-btn"
|
||||
class:tab-btn-active={respBodyTab === "pretty"}
|
||||
onclick={() => (respBodyTab = "pretty")}>Pretty</button
|
||||
>
|
||||
{/if}
|
||||
{#if isSSE || isResponseJson}
|
||||
<button
|
||||
class="tab-btn"
|
||||
class:tab-btn-active={respBodyTab === "raw"}
|
||||
onclick={() => (respBodyTab = "raw")}>Raw</button
|
||||
>
|
||||
{/if}
|
||||
</div>
|
||||
<button
|
||||
class="tab-btn"
|
||||
onclick={() => copyToClipboard(getCopyText(), "resp")}
|
||||
>
|
||||
{#if copiedResp}
|
||||
Copied!
|
||||
{:else}
|
||||
Copy
|
||||
{/if}
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
>
|
||||
{#if respBodyTab === "chat"}
|
||||
<div class="p-3 text-sm space-y-3">
|
||||
{#if sseChat.reasoning}
|
||||
<div>
|
||||
<div
|
||||
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
|
||||
>
|
||||
Reasoning
|
||||
</div>
|
||||
<pre
|
||||
class="font-mono whitespace-pre-wrap break-all text-txtsecondary">{sseChat.reasoning}</pre>
|
||||
</div>
|
||||
{/if}
|
||||
{#if sseChat.content}
|
||||
<div>
|
||||
{#if sseChat.reasoning}
|
||||
<div
|
||||
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
|
||||
>
|
||||
Response
|
||||
</div>
|
||||
{/if}
|
||||
<pre
|
||||
class="font-mono whitespace-pre-wrap break-all">{sseChat.content}</pre>
|
||||
</div>
|
||||
{/if}
|
||||
{#if !sseChat.reasoning && !sseChat.content}
|
||||
<pre class="font-mono">(empty)</pre>
|
||||
{/if}
|
||||
</div>
|
||||
{:else}
|
||||
<pre
|
||||
class="p-3 text-sm font-mono whitespace-pre-wrap break-all">{displayedResponseBody || "(empty)"}</pre>
|
||||
{/if}
|
||||
</div>
|
||||
{:else if responseBodyRaw}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
>
|
||||
<div class="p-3 text-sm text-txtsecondary italic">
|
||||
(binary data - {responseContentType || "unknown content type"})
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<div
|
||||
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||
>
|
||||
<pre class="p-3 text-sm font-mono">(empty)</pre>
|
||||
</div>
|
||||
{/if}
|
||||
</details>
|
||||
</div>
|
||||
|
||||
<div class="p-4 border-t border-card-border flex justify-end">
|
||||
<button onclick={() => dialogEl?.close()} class="btn"> Close </button>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</dialog>
|
||||
|
||||
<style>
|
||||
.tab-btn {
|
||||
padding: 2px 10px;
|
||||
font-size: 0.75rem;
|
||||
border-radius: 4px;
|
||||
color: var(--color-txtsecondary);
|
||||
cursor: pointer;
|
||||
border: 1px solid transparent;
|
||||
background: transparent;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.tab-btn:hover {
|
||||
color: var(--color-txtmain);
|
||||
background: var(--color-secondary);
|
||||
}
|
||||
.tab-btn-active {
|
||||
color: var(--color-primary);
|
||||
background: color-mix(in srgb, var(--color-primary) 12%, transparent);
|
||||
border-color: color-mix(in srgb, var(--color-primary) 25%, transparent);
|
||||
}
|
||||
</style>
|
||||
@@ -1,6 +1,8 @@
|
||||
<script lang="ts">
|
||||
import { link, location } from "svelte-spa-router";
|
||||
import { link } from "svelte-spa-router";
|
||||
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
|
||||
import { currentRoute } from "../stores/route";
|
||||
import { playgroundActivity } from "../stores/playgroundActivity";
|
||||
import ConnectionStatus from "./ConnectionStatus.svelte";
|
||||
|
||||
function handleTitleChange(newTitle: string): void {
|
||||
@@ -22,9 +24,10 @@
|
||||
handleTitleChange(target.textContent || "(set title)");
|
||||
}
|
||||
|
||||
function isActive(path: string, currentLocation: string): boolean {
|
||||
return path === "/" ? currentLocation === "/" : currentLocation.startsWith(path);
|
||||
function isActive(path: string, current: string): boolean {
|
||||
return path === "/" ? current === "/" : current.startsWith(path);
|
||||
}
|
||||
|
||||
</script>
|
||||
|
||||
<header
|
||||
@@ -47,8 +50,7 @@
|
||||
<a
|
||||
href="/"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/", $location)}
|
||||
class="p-1 whitespace-nowrap {isActive('/', $currentRoute) ? 'font-semibold' : ''} {$playgroundActivity ? 'activity-link' : 'text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100'}"
|
||||
>
|
||||
Playground
|
||||
</a>
|
||||
@@ -56,7 +58,7 @@
|
||||
href="/models"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/models", $location)}
|
||||
class:font-semibold={isActive("/models", $currentRoute)}
|
||||
>
|
||||
Models
|
||||
</a>
|
||||
@@ -64,7 +66,7 @@
|
||||
href="/activity"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/activity", $location)}
|
||||
class:font-semibold={isActive("/activity", $currentRoute)}
|
||||
>
|
||||
Activity
|
||||
</a>
|
||||
@@ -72,7 +74,7 @@
|
||||
href="/logs"
|
||||
use:link
|
||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||
class:font-semibold={isActive("/logs", $location)}
|
||||
class:font-semibold={isActive("/logs", $currentRoute)}
|
||||
>
|
||||
Logs
|
||||
</a>
|
||||
@@ -96,3 +98,23 @@
|
||||
<ConnectionStatus />
|
||||
</menu>
|
||||
</header>
|
||||
|
||||
<style>
|
||||
.activity-link {
|
||||
background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7, #8b5cf6, #6366f1);
|
||||
background-size: 200% 100%;
|
||||
-webkit-background-clip: text;
|
||||
background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
animation: gradient-shift 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes gradient-shift {
|
||||
0% {
|
||||
background-position: 0% 50%;
|
||||
}
|
||||
100% {
|
||||
background-position: 200% 50%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -65,10 +65,17 @@
|
||||
});
|
||||
|
||||
let preElement: HTMLPreElement;
|
||||
let userScrolledUp = $state(false);
|
||||
|
||||
// Auto scroll to bottom when logs change
|
||||
function handleScroll() {
|
||||
if (!preElement) return;
|
||||
const { scrollTop, scrollHeight, clientHeight } = preElement;
|
||||
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
|
||||
}
|
||||
|
||||
// Auto scroll to bottom when logs change, unless user has scrolled up
|
||||
$effect(() => {
|
||||
if (preElement && filteredLogs) {
|
||||
if (preElement && filteredLogs && !userScrolledUp) {
|
||||
preElement.scrollTop = preElement.scrollHeight;
|
||||
}
|
||||
});
|
||||
@@ -82,7 +89,7 @@
|
||||
<div class="flex gap-2 items-center">
|
||||
<button class="btn border-0" onclick={toggleFontSize} title="Change font size">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||
<path fill-rule="evenodd" d="M10.5 3.75a6 6 0 0 0-5.98 6.496A5.25 5.25 0 0 0 6.75 20.25H18a4.5 4.5 0 0 0 2.206-8.423 3.75 3.75 0 0 0-4.133-4.303A6.001 6.001 0 0 0 10.5 3.75Zm2.25 6a.75.75 0 0 0-1.5 0v4.94l-1.72-1.72a.75.75 0 0 0-1.06 1.06l3 3a.75.75 0 0 0 1.06 0l3-3a.75.75 0 1 0-1.06-1.06l-1.72 1.72V9.75Z" clip-rule="evenodd" />
|
||||
<path d="M2 4v3h5v12h3V7h5V4H2zm19 5h-9v3h3v7h3v-7h3V9z"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button class="btn border-0" onclick={toggleWrapText} title="Toggle text wrap">
|
||||
@@ -127,6 +134,6 @@
|
||||
{/if}
|
||||
</div>
|
||||
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
|
||||
<pre bind:this={preElement} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
||||
<pre bind:this={preElement} onscroll={handleScroll} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -165,6 +165,9 @@
|
||||
{#if model.description}
|
||||
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
||||
{/if}
|
||||
{#if model.aliases && model.aliases.length > 0}
|
||||
<p class="text-xs text-txtsecondary">Aliases: {model.aliases.join(", ")}</p>
|
||||
{/if}
|
||||
</td>
|
||||
<td class="w-12">
|
||||
{#if model.state === "stopped"}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { metrics } from "../stores/api";
|
||||
import { inFlightRequests, metrics } from "../stores/api";
|
||||
import TokenHistogram from "./TokenHistogram.svelte";
|
||||
|
||||
interface HistogramData {
|
||||
@@ -15,7 +15,14 @@
|
||||
let stats = $derived.by(() => {
|
||||
const totalRequests = $metrics.length;
|
||||
if (totalRequests === 0) {
|
||||
return { totalRequests: 0, totalInputTokens: 0, totalOutputTokens: 0, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null };
|
||||
return {
|
||||
totalRequests: 0,
|
||||
totalInputTokens: 0,
|
||||
totalOutputTokens: 0,
|
||||
inFlightRequests: $inFlightRequests,
|
||||
tokenStats: { p99: "0", p95: "0", p50: "0" },
|
||||
histogramData: null,
|
||||
};
|
||||
}
|
||||
|
||||
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
|
||||
@@ -24,7 +31,14 @@
|
||||
// Calculate token statistics using output_tokens and duration_ms
|
||||
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
|
||||
if (validMetrics.length === 0) {
|
||||
return { totalRequests, totalInputTokens, totalOutputTokens, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null };
|
||||
return {
|
||||
totalRequests,
|
||||
totalInputTokens,
|
||||
totalOutputTokens,
|
||||
inFlightRequests: $inFlightRequests,
|
||||
tokenStats: { p99: "0", p95: "0", p50: "0" },
|
||||
histogramData: null,
|
||||
};
|
||||
}
|
||||
|
||||
// Calculate tokens/second for each valid metric
|
||||
@@ -63,6 +77,7 @@
|
||||
totalRequests,
|
||||
totalInputTokens,
|
||||
totalOutputTokens,
|
||||
inFlightRequests: $inFlightRequests,
|
||||
tokenStats: {
|
||||
p99: p99.toFixed(2),
|
||||
p95: p95.toFixed(2),
|
||||
@@ -95,7 +110,12 @@
|
||||
|
||||
<tbody class="bg-surface divide-y divide-card-border-inner">
|
||||
<tr class="hover:bg-secondary">
|
||||
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">{stats.totalRequests}</td>
|
||||
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">
|
||||
<div class="flex flex-col gap-1">
|
||||
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Completed: {nf.format(stats.totalRequests)}</span>
|
||||
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Waiting: {nf.format(stats.inFlightRequests)}</span>
|
||||
</div>
|
||||
</td>
|
||||
|
||||
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
|
||||
<div class="flex items-center gap-2">
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { transcribeAudio } from "../../lib/audioApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
|
||||
const selectedModelStore = persistentStore<string>("playground-audio-model", "");
|
||||
@@ -15,18 +16,22 @@
|
||||
let fileInput = $state<HTMLInputElement | null>(null);
|
||||
let copied = $state(false);
|
||||
|
||||
const ACCEPTED_FORMATS = ['.mp3', '.wav'];
|
||||
const ACCEPTED_FORMATS = ['.mp3', '.wav', '.ogg'];
|
||||
const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB
|
||||
|
||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||
|
||||
let canTranscribe = $derived(selectedFile !== null && $selectedModelStore !== "" && !isTranscribing);
|
||||
|
||||
$effect(() => {
|
||||
playgroundStores.audioTranscribing.set(isTranscribing);
|
||||
});
|
||||
|
||||
function validateFile(file: File): { valid: boolean; error?: string } {
|
||||
const ext = '.' + file.name.split('.').pop()?.toLowerCase();
|
||||
|
||||
if (!ACCEPTED_FORMATS.includes(ext)) {
|
||||
return { valid: false, error: 'Invalid file type. Accepted: MP3, WAV' };
|
||||
return { valid: false, error: 'Invalid file type. Accepted: MP3, WAV, OGG' };
|
||||
}
|
||||
|
||||
if (file.size > MAX_FILE_SIZE) {
|
||||
@@ -203,7 +208,7 @@
|
||||
<div>
|
||||
<p class="mb-2">Drag and drop an audio file here</p>
|
||||
<p class="text-sm">or use the Browse button below</p>
|
||||
<p class="text-xs mt-4">Accepted formats: MP3, WAV (max 25MB)</p>
|
||||
<p class="text-xs mt-4">Accepted formats: MP3, WAV, OGG (max 25MB)</p>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
@@ -213,7 +218,7 @@
|
||||
<div class="shrink-0 flex gap-2">
|
||||
<input
|
||||
type="file"
|
||||
accept=".mp3,.wav"
|
||||
accept=".mp3,.wav,.ogg"
|
||||
class="hidden"
|
||||
onchange={handleFileSelect}
|
||||
bind:this={fileInput}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { streamChatCompletion } from "../../lib/chatApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import type { ChatMessage, ContentPart } from "../../lib/types";
|
||||
import ChatMessageComponent from "./ChatMessage.svelte";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
@@ -11,7 +12,16 @@
|
||||
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
||||
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
||||
|
||||
let messages = $state<ChatMessage[]>([]);
|
||||
function loadMessages(): ChatMessage[] {
|
||||
try {
|
||||
const saved = localStorage.getItem("playground-messages");
|
||||
return saved ? JSON.parse(saved) : [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
let messages = $state<ChatMessage[]>(loadMessages());
|
||||
let userInput = $state("");
|
||||
let isStreaming = $state(false);
|
||||
let isReasoning = $state(false);
|
||||
@@ -24,21 +34,52 @@
|
||||
let imageError = $state<string | null>(null);
|
||||
|
||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||
let userScrolledUp = $state(false);
|
||||
|
||||
// Auto-scroll when messages change
|
||||
$effect(() => {
|
||||
if (messages.length > 0 && messagesContainer) {
|
||||
playgroundStores.chatStreaming.set(isStreaming);
|
||||
});
|
||||
|
||||
function handleMessagesScroll() {
|
||||
if (!messagesContainer) return;
|
||||
const { scrollTop, scrollHeight, clientHeight } = messagesContainer;
|
||||
// Consider "at bottom" if within 40px of the bottom
|
||||
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
|
||||
}
|
||||
|
||||
// Auto-scroll when messages change — skip if user scrolled up
|
||||
$effect(() => {
|
||||
if (messages.length > 0 && messagesContainer && !userScrolledUp) {
|
||||
messagesContainer.scrollTo({
|
||||
top: messagesContainer.scrollHeight,
|
||||
behavior: "smooth",
|
||||
behavior: isStreaming ? "instant" : "smooth",
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Persist messages to localStorage (throttled to once per 2s)
|
||||
let lastSaveTime = 0;
|
||||
$effect(() => {
|
||||
const json = JSON.stringify(messages);
|
||||
const elapsed = Date.now() - lastSaveTime;
|
||||
const save = () => {
|
||||
try { localStorage.setItem("playground-messages", json); } catch {}
|
||||
lastSaveTime = Date.now();
|
||||
};
|
||||
if (elapsed >= 2000) {
|
||||
save();
|
||||
return;
|
||||
}
|
||||
const timer = setTimeout(save, 2000 - elapsed);
|
||||
return () => clearTimeout(timer);
|
||||
});
|
||||
|
||||
async function sendMessage() {
|
||||
const trimmedInput = userInput.trim();
|
||||
if ((!trimmedInput && attachedImages.length === 0) || !$selectedModelStore || isStreaming) return;
|
||||
|
||||
userScrolledUp = false;
|
||||
|
||||
// Build message content (multimodal if images attached)
|
||||
let content: string | ContentPart[];
|
||||
if (attachedImages.length > 0) {
|
||||
@@ -321,6 +362,7 @@
|
||||
<div
|
||||
class="flex-1 overflow-y-auto mb-4 px-2"
|
||||
bind:this={messagesContainer}
|
||||
onscroll={handleMessagesScroll}
|
||||
>
|
||||
{#if messages.length === 0}
|
||||
<div class="h-full flex items-center justify-center text-txtsecondary">
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
<script lang="ts">
|
||||
import { renderMarkdown, escapeHtml } from "../../lib/markdown";
|
||||
import { renderMarkdown, escapeHtml, renderStreamingMarkdown, createStreamingCache } from "../../lib/markdown";
|
||||
import type { RenderedBlock } from "../../lib/markdown";
|
||||
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte";
|
||||
import { getTextContent, getImageUrls } from "../../lib/types";
|
||||
import type { ContentPart } from "../../lib/types";
|
||||
@@ -22,11 +23,17 @@
|
||||
let hasImages = $derived(imageUrls.length > 0);
|
||||
let canEdit = $derived(onEdit !== undefined && !hasImages);
|
||||
|
||||
let renderedContent = $derived(
|
||||
role === "assistant" && !isStreaming
|
||||
? renderMarkdown(textContent)
|
||||
: escapeHtml(textContent).replace(/\n/g, '<br>')
|
||||
);
|
||||
let streamingCache = createStreamingCache();
|
||||
let renderedParts = $derived.by(() => {
|
||||
if (role !== "assistant") {
|
||||
return { blocks: [{ id: -1, html: escapeHtml(textContent).replace(/\n/g, '<br>') }] as RenderedBlock[], pendingHtml: "" };
|
||||
}
|
||||
if (!isStreaming) {
|
||||
streamingCache = createStreamingCache();
|
||||
return { blocks: [{ id: -1, html: renderMarkdown(textContent) }] as RenderedBlock[], pendingHtml: "" };
|
||||
}
|
||||
return renderStreamingMarkdown(textContent, streamingCache);
|
||||
});
|
||||
let copied = $state(false);
|
||||
let showRaw = $state(false);
|
||||
let isEditing = $state(false);
|
||||
@@ -109,13 +116,54 @@
|
||||
cancelEdit();
|
||||
}
|
||||
}
|
||||
|
||||
const COPY_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;
|
||||
const CHECK_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>`;
|
||||
|
||||
function codeBlockCopy(node: HTMLElement) {
|
||||
function attachButtons() {
|
||||
node.querySelectorAll<HTMLPreElement>('pre:not([data-copy-btn])').forEach(pre => {
|
||||
pre.setAttribute('data-copy-btn', 'true');
|
||||
const btn = document.createElement('button');
|
||||
btn.className = 'code-copy-btn';
|
||||
btn.title = 'Copy code';
|
||||
btn.innerHTML = COPY_SVG;
|
||||
btn.addEventListener('click', async () => {
|
||||
const text = pre.querySelector('code')?.textContent ?? pre.textContent ?? '';
|
||||
try {
|
||||
if (navigator.clipboard && window.isSecureContext) {
|
||||
await navigator.clipboard.writeText(text);
|
||||
} else {
|
||||
const ta = document.createElement('textarea');
|
||||
ta.value = text;
|
||||
ta.style.cssText = 'position:fixed;left:-9999px';
|
||||
document.body.appendChild(ta);
|
||||
ta.select();
|
||||
document.execCommand('copy');
|
||||
document.body.removeChild(ta);
|
||||
}
|
||||
btn.innerHTML = CHECK_SVG;
|
||||
btn.classList.add('copied');
|
||||
setTimeout(() => { btn.innerHTML = COPY_SVG; btn.classList.remove('copied'); }, 2000);
|
||||
} catch (e) {
|
||||
console.error('copy failed', e);
|
||||
}
|
||||
});
|
||||
pre.appendChild(btn);
|
||||
});
|
||||
}
|
||||
attachButtons();
|
||||
const mo = new MutationObserver(attachButtons);
|
||||
mo.observe(node, { childList: true, subtree: true });
|
||||
return { destroy: () => mo.disconnect() };
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
||||
<div
|
||||
class="relative group max-w-[85%] rounded-lg px-4 py-2 {role === 'user'
|
||||
? 'bg-primary text-btn-primary-text'
|
||||
: 'bg-surface border border-gray-200 dark:border-white/10'}"
|
||||
class="relative group rounded-lg px-4 py-2 {role === 'user'
|
||||
? 'max-w-[85%] bg-primary text-btn-primary-text'
|
||||
: 'w-full sm:w-4/5 bg-surface border border-gray-200 dark:border-white/10'}"
|
||||
>
|
||||
{#if role === "assistant"}
|
||||
{#if reasoning_content || isReasoning}
|
||||
@@ -167,8 +215,11 @@
|
||||
{#if showRaw}
|
||||
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
||||
{:else}
|
||||
<div class="prose prose-sm dark:prose-invert max-w-none">
|
||||
{@html renderedContent}
|
||||
<div class="prose prose-sm dark:prose-invert max-w-none" use:codeBlockCopy>
|
||||
{#each renderedParts.blocks as block (block.id)}
|
||||
{@html block.html}
|
||||
{/each}
|
||||
{@html renderedParts.pendingHtml}
|
||||
{#if isStreaming && !isReasoning}
|
||||
<span class="inline-block w-2 h-4 bg-current animate-pulse ml-0.5"></span>
|
||||
{/if}
|
||||
@@ -289,14 +340,42 @@
|
||||
|
||||
<style>
|
||||
.prose :global(pre) {
|
||||
position: relative;
|
||||
background-color: var(--color-surface);
|
||||
border: 1px solid var(--color-border, rgba(128, 128, 128, 0.2));
|
||||
border-radius: 0.375rem;
|
||||
padding: 0.75rem;
|
||||
padding-right: 2.5rem;
|
||||
overflow-x: auto;
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn) {
|
||||
position: absolute;
|
||||
top: 0.375rem;
|
||||
right: 0.375rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 0.25rem;
|
||||
border-radius: 0.25rem;
|
||||
border: 1px solid var(--color-border);
|
||||
background: var(--color-surface);
|
||||
color: var(--color-txtsecondary);
|
||||
cursor: pointer;
|
||||
transition: background-color 0.15s;
|
||||
line-height: 0;
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn:hover) {
|
||||
background: var(--color-secondary);
|
||||
}
|
||||
|
||||
.prose :global(.code-copy-btn.copied) {
|
||||
color: var(--color-success);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.prose :global(code) {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
||||
font-size: 0.875em;
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { generateImage } from "../../lib/imageApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||
|
||||
@@ -17,6 +18,10 @@
|
||||
|
||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||
|
||||
$effect(() => {
|
||||
playgroundStores.imageGenerating.set(isGenerating);
|
||||
});
|
||||
|
||||
async function generate() {
|
||||
const trimmedPrompt = prompt.trim();
|
||||
if (!trimmedPrompt || !$selectedModelStore || isGenerating) return;
|
||||
|
||||
@@ -25,6 +25,11 @@
|
||||
<optgroup label="Local">
|
||||
{#each grouped.local as model (model.id)}
|
||||
<option value={model.id}>{model.id}</option>
|
||||
{#if model.aliases}
|
||||
{#each model.aliases as alias (alias)}
|
||||
<option value={alias}> ↳ {alias}</option>
|
||||
{/each}
|
||||
{/if}
|
||||
{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
|
||||
@@ -0,0 +1,406 @@
|
||||
<script lang="ts">
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { rerank } from "../../lib/rerankApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
|
||||
type RerankRow = { doc: string; score: number | null };
|
||||
type SortOrder = "none" | "asc" | "desc";
|
||||
type EditorMode = "table" | "json";
|
||||
|
||||
const selectedModelStore = persistentStore<string>("playground-rerank-model", "");
|
||||
|
||||
const defaultQuery = "How do LLM's work?";
|
||||
const defaultDocs = [
|
||||
"Large language models (LLMs) use transformer architectures to predict the next token in a sequence based on massive amounts of text data.",
|
||||
"LLMs are trained on diverse internet text, learning statistical patterns of language that allow them to generate coherent responses.",
|
||||
"During training, LLMs minimize a loss function that measures the difference between predicted and actual tokens across billions of examples.",
|
||||
"Attention mechanisms in transformers enable LLMs to weigh the importance of different words when generating output.",
|
||||
"Fine\u2011tuning allows a pre\u2011trained LLM to adapt to a specific downstream task with a smaller dataset.",
|
||||
"Neural networks consist of layers of interconnected neurons that adjust their weights during back\u2011propagation.",
|
||||
"The history of the Roman Empire spanned over a thousand years.",
|
||||
"Soccer is the most popular sport in many countries around the world.",
|
||||
"Quantum computing uses qubits to perform calculations that are intractable for classical computers.",
|
||||
];
|
||||
|
||||
let query = $state(defaultQuery);
|
||||
let rows = $state<RerankRow[]>([
|
||||
...defaultDocs.map((doc) => ({ doc, score: null })),
|
||||
{ doc: "", score: null },
|
||||
]);
|
||||
let isLoading = $state(false);
|
||||
let error = $state<string | null>(null);
|
||||
let usage = $state<{ prompt_tokens: number; total_tokens: number } | null>(null);
|
||||
let abortController: AbortController | null = null;
|
||||
let sortOrder = $state<SortOrder>("desc");
|
||||
let editorMode = $state<EditorMode>("table");
|
||||
let jsonText = $state("");
|
||||
let jsonError = $state<string | null>(null);
|
||||
|
||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||
|
||||
let canSubmit = $derived((() => {
|
||||
if (!$selectedModelStore || isLoading) return false;
|
||||
if (editorMode === "json") {
|
||||
try {
|
||||
const parsed = JSON.parse(jsonText) as Record<string, unknown>;
|
||||
return (
|
||||
typeof parsed.query === "string" &&
|
||||
parsed.query.trim() !== "" &&
|
||||
Array.isArray(parsed.documents) &&
|
||||
(parsed.documents as unknown[]).some(
|
||||
(d) => typeof d === "string" && (d as string).trim() !== ""
|
||||
)
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return query.trim() !== "" && rows.some((r) => r.doc.trim() !== "");
|
||||
})());
|
||||
|
||||
// Display rows with sort applied (display-only transform, rows[] is never mutated by sorting)
|
||||
let displayRows = $derived((() => {
|
||||
const indexed = rows.map((row, i) => ({ row, i }));
|
||||
if (sortOrder === "none") return indexed;
|
||||
return [...indexed].sort((a, b) => {
|
||||
if (a.row.score === null && b.row.score === null) return 0;
|
||||
if (a.row.score === null) return 1;
|
||||
if (b.row.score === null) return -1;
|
||||
return sortOrder === "desc"
|
||||
? b.row.score - a.row.score
|
||||
: a.row.score - b.row.score;
|
||||
});
|
||||
})());
|
||||
|
||||
// Auto-add a new empty row when the last row gets content (table mode only)
|
||||
$effect(() => {
|
||||
if (editorMode === "table" && rows[rows.length - 1]?.doc.trim() !== "") {
|
||||
rows = [...rows, { doc: "", score: null }];
|
||||
}
|
||||
});
|
||||
|
||||
// Sync loading state to activity store
|
||||
$effect(() => {
|
||||
playgroundStores.rerankLoading.set(isLoading);
|
||||
});
|
||||
|
||||
function switchToJson() {
|
||||
if (editorMode === "json") return;
|
||||
const docs = rows.filter((r) => r.doc.trim() !== "").map((r) => r.doc);
|
||||
jsonText = JSON.stringify({ query, documents: docs }, null, 2);
|
||||
jsonError = null;
|
||||
editorMode = "json";
|
||||
}
|
||||
|
||||
function switchToTable() {
|
||||
if (editorMode === "table") return;
|
||||
if (jsonText.trim() === "") {
|
||||
query = "";
|
||||
rows = [{ doc: "", score: null }];
|
||||
jsonError = null;
|
||||
editorMode = "table";
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(jsonText) as unknown;
|
||||
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
||||
throw new Error("Expected a JSON object");
|
||||
}
|
||||
const obj = parsed as Record<string, unknown>;
|
||||
if (typeof obj.query !== "string") throw new Error('"query" must be a string');
|
||||
if (!Array.isArray(obj.documents)) throw new Error('"documents" must be an array');
|
||||
query = obj.query;
|
||||
const newRows: RerankRow[] = (obj.documents as unknown[]).map((d) => ({
|
||||
doc: typeof d === "string" ? d : String(d),
|
||||
score: null,
|
||||
}));
|
||||
if (newRows.length === 0 || newRows[newRows.length - 1].doc.trim() !== "") {
|
||||
newRows.push({ doc: "", score: null });
|
||||
}
|
||||
rows = newRows;
|
||||
jsonError = null;
|
||||
editorMode = "table";
|
||||
} catch (err) {
|
||||
jsonError = err instanceof Error ? err.message : "Invalid JSON";
|
||||
}
|
||||
}
|
||||
|
||||
function cycleSortOrder() {
|
||||
sortOrder = sortOrder === "none" ? "desc" : sortOrder === "desc" ? "asc" : "none";
|
||||
}
|
||||
|
||||
function sortIndicator(): string {
|
||||
if (sortOrder === "desc") return " ↓";
|
||||
if (sortOrder === "asc") return " ↑";
|
||||
return "";
|
||||
}
|
||||
|
||||
async function submit() {
|
||||
if (!canSubmit) return;
|
||||
|
||||
let submitQuery: string;
|
||||
let nonEmptyEntries: { originalIndex: number; doc: string }[];
|
||||
|
||||
if (editorMode === "json") {
|
||||
// Parse JSON, sync state to table, then submit
|
||||
try {
|
||||
const parsed = JSON.parse(jsonText) as Record<string, unknown>;
|
||||
submitQuery = parsed.query as string;
|
||||
const docs = (parsed.documents as string[]).filter((d) => d.trim() !== "");
|
||||
const newRows: RerankRow[] = docs.map((d) => ({ doc: d, score: null }));
|
||||
newRows.push({ doc: "", score: null });
|
||||
rows = newRows;
|
||||
query = submitQuery;
|
||||
editorMode = "table";
|
||||
} catch {
|
||||
error = "Invalid JSON — fix before submitting";
|
||||
return;
|
||||
}
|
||||
nonEmptyEntries = rows
|
||||
.map((r, i) => ({ originalIndex: i, doc: r.doc }))
|
||||
.filter((e) => e.doc.trim() !== "");
|
||||
} else {
|
||||
submitQuery = query;
|
||||
nonEmptyEntries = rows
|
||||
.map((r, i) => ({ originalIndex: i, doc: r.doc }))
|
||||
.filter((e) => e.doc.trim() !== "");
|
||||
}
|
||||
|
||||
isLoading = true;
|
||||
error = null;
|
||||
usage = null;
|
||||
|
||||
// Clear previous scores
|
||||
rows = rows.map((r) => ({ ...r, score: null }));
|
||||
|
||||
abortController = new AbortController();
|
||||
|
||||
try {
|
||||
const response = await rerank(
|
||||
$selectedModelStore,
|
||||
submitQuery,
|
||||
nonEmptyEntries.map((e) => e.doc),
|
||||
abortController.signal
|
||||
);
|
||||
|
||||
usage = response.usage;
|
||||
|
||||
// Map result.index (position in submitted docs array) back to original rows[] index
|
||||
const updated = rows.map((r) => ({ ...r }));
|
||||
for (const result of response.results) {
|
||||
const entry = nonEmptyEntries[result.index];
|
||||
if (entry !== undefined) {
|
||||
updated[entry.originalIndex].score = result.relevance_score;
|
||||
}
|
||||
}
|
||||
rows = updated;
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
// User cancelled
|
||||
} else {
|
||||
error = err instanceof Error ? err.message : "An error occurred";
|
||||
}
|
||||
} finally {
|
||||
isLoading = false;
|
||||
abortController = null;
|
||||
}
|
||||
}
|
||||
|
||||
function cancel() {
|
||||
abortController?.abort();
|
||||
}
|
||||
|
||||
function clear() {
|
||||
query = defaultQuery;
|
||||
rows = [...defaultDocs.map((doc) => ({ doc, score: null })), { doc: "", score: null }];
|
||||
error = null;
|
||||
usage = null;
|
||||
sortOrder = "desc";
|
||||
jsonText = "";
|
||||
jsonError = null;
|
||||
}
|
||||
|
||||
function deleteRow(originalIndex: number) {
|
||||
if (rows.length <= 1) return;
|
||||
rows = rows.filter((_, i) => i !== originalIndex);
|
||||
}
|
||||
|
||||
function updateDoc(originalIndex: number, value: string) {
|
||||
const updated = rows.map((r) => ({ ...r }));
|
||||
updated[originalIndex].doc = value;
|
||||
rows = updated;
|
||||
}
|
||||
|
||||
function scoreColor(score: number | null): string {
|
||||
if (score === null) return "text-txtsecondary";
|
||||
if (score > 0) return "text-green-600 dark:text-green-400";
|
||||
return "text-red-500 dark:text-red-400";
|
||||
}
|
||||
|
||||
function formatScore(score: number | null): string {
|
||||
if (score === null) return "—";
|
||||
return score.toFixed(3);
|
||||
}
|
||||
|
||||
function handleKeyDown(e: KeyboardEvent) {
|
||||
if (e.key === "Enter" && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
submit();
|
||||
}
|
||||
}
|
||||
|
||||
let isCleared = $derived(
|
||||
query === defaultQuery &&
|
||||
rows.every((r, i) => r.score === null && r.doc === (defaultDocs[i] ?? "")) &&
|
||||
rows.length === defaultDocs.length + 1 &&
|
||||
!jsonText.trim() &&
|
||||
!error &&
|
||||
!usage
|
||||
);
|
||||
</script>
|
||||
|
||||
<div class="flex flex-col h-full">
|
||||
<!-- Top bar: model selector + query input (table mode) + mode toggle -->
|
||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select a rerank model..." disabled={isLoading} />
|
||||
{#if editorMode === "table"}
|
||||
<input
|
||||
type="text"
|
||||
class="min-w-0 flex-1 basis-48 px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary"
|
||||
placeholder="Query..."
|
||||
bind:value={query}
|
||||
disabled={isLoading}
|
||||
onkeydown={handleKeyDown}
|
||||
/>
|
||||
{/if}
|
||||
<!-- Table / JSON toggle -->
|
||||
<div class="flex rounded border border-gray-200 dark:border-white/10 overflow-hidden shrink-0">
|
||||
<button
|
||||
class="px-3 py-1.5 text-sm transition-colors {editorMode === 'table'
|
||||
? 'bg-primary text-btn-primary-text'
|
||||
: 'bg-surface hover:bg-secondary-hover'}"
|
||||
onclick={switchToTable}
|
||||
disabled={isLoading}
|
||||
>
|
||||
Table
|
||||
</button>
|
||||
<button
|
||||
class="px-3 py-1.5 text-sm border-l border-gray-200 dark:border-white/10 transition-colors {editorMode === 'json'
|
||||
? 'bg-primary text-btn-primary-text'
|
||||
: 'bg-surface hover:bg-secondary-hover'}"
|
||||
onclick={switchToJson}
|
||||
disabled={isLoading}
|
||||
>
|
||||
JSON
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if !hasModels}
|
||||
<div class="flex-1 flex items-center justify-center text-txtsecondary">
|
||||
<p>No models configured. Add models to your configuration to use reranking.</p>
|
||||
</div>
|
||||
{:else if editorMode === "json"}
|
||||
<!-- JSON editor -->
|
||||
<div class="flex-1 flex flex-col min-h-0 mb-4">
|
||||
<textarea
|
||||
class="flex-1 w-full font-mono text-sm px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-surface focus:outline-none focus:ring-2 focus:ring-primary resize-none"
|
||||
bind:value={jsonText}
|
||||
disabled={isLoading}
|
||||
placeholder={'{\n "query": "your search query",\n "documents": [\n "document one",\n "document two"\n ]\n}'}
|
||||
spellcheck={false}
|
||||
></textarea>
|
||||
{#if jsonError}
|
||||
<p class="mt-1 text-sm text-red-500">{jsonError}</p>
|
||||
{/if}
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Document table -->
|
||||
<div class="flex-1 overflow-y-auto mb-4 border border-gray-200 dark:border-white/10 rounded">
|
||||
<table class="w-full border-collapse table-fixed">
|
||||
<colgroup>
|
||||
<col class="w-auto" />
|
||||
<col style="width: 120px" />
|
||||
<col style="width: 40px" />
|
||||
</colgroup>
|
||||
<thead class="sticky top-0 bg-surface border-b border-gray-200 dark:border-white/10">
|
||||
<tr>
|
||||
<th class="px-3 py-2 text-left text-sm font-medium text-txtsecondary">Document</th>
|
||||
<th
|
||||
class="px-3 py-2 text-right text-sm font-medium text-txtsecondary cursor-pointer select-none hover:text-txtprimary transition-colors"
|
||||
onclick={cycleSortOrder}
|
||||
>
|
||||
Score{sortIndicator()}
|
||||
</th>
|
||||
<th class="px-2 py-2"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{#each displayRows as { row, i } (i)}
|
||||
<tr class="border-b border-gray-100 dark:border-white/5 last:border-0">
|
||||
<td class="px-3 py-1.5">
|
||||
<input
|
||||
type="text"
|
||||
class="w-full bg-transparent focus:outline-none focus:ring-1 focus:ring-primary rounded px-1 py-0.5"
|
||||
placeholder={i === rows.length - 1 ? "Add document..." : "Document text..."}
|
||||
value={row.doc}
|
||||
oninput={(e) => updateDoc(i, (e.target as HTMLInputElement).value)}
|
||||
disabled={isLoading}
|
||||
onkeydown={handleKeyDown}
|
||||
/>
|
||||
</td>
|
||||
<td class="px-3 py-1.5 text-right font-mono text-sm {scoreColor(row.score)}">
|
||||
{#if isLoading && row.score === null && row.doc.trim() !== ""}
|
||||
<span class="inline-block w-4 h-4 border-2 border-current border-t-transparent rounded-full animate-spin align-middle"></span>
|
||||
{:else}
|
||||
{formatScore(row.score)}
|
||||
{/if}
|
||||
</td>
|
||||
<td class="px-2 py-1.5 text-center">
|
||||
<button
|
||||
class="w-7 h-7 flex items-center justify-center text-txtsecondary hover:text-red-500 transition-colors rounded disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
onclick={() => deleteRow(i)}
|
||||
disabled={rows.length <= 1}
|
||||
tabindex="-1"
|
||||
aria-label="Remove row"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Bottom toolbar -->
|
||||
{#if hasModels}
|
||||
<div class="shrink-0 flex flex-wrap items-center gap-2">
|
||||
{#if isLoading}
|
||||
<button class="btn bg-red-500 hover:bg-red-600 text-white" onclick={cancel}>
|
||||
Cancel
|
||||
</button>
|
||||
{:else}
|
||||
<button
|
||||
class="btn bg-primary text-btn-primary-text hover:opacity-90"
|
||||
onclick={submit}
|
||||
disabled={!canSubmit}
|
||||
>
|
||||
Rerank
|
||||
</button>
|
||||
<button class="btn" onclick={clear} disabled={isCleared}>
|
||||
Clear
|
||||
</button>
|
||||
{/if}
|
||||
|
||||
{#if error}
|
||||
<span class="text-sm text-red-500 ml-2">{error}</span>
|
||||
{:else if usage}
|
||||
<span class="text-sm text-txtsecondary ml-2">{usage.total_tokens} tokens</span>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
@@ -2,6 +2,7 @@
|
||||
import { models } from "../../stores/api";
|
||||
import { persistentStore } from "../../stores/persistent";
|
||||
import { generateSpeech } from "../../lib/speechApi";
|
||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||
import ModelSelector from "./ModelSelector.svelte";
|
||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||
|
||||
@@ -20,11 +21,9 @@
|
||||
let availableVoices = $state<string[]>(["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]);
|
||||
let isLoadingVoices = $state(false);
|
||||
|
||||
// Default voices to fall back to if API call fails
|
||||
const defaultVoices = ["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
||||
const CACHE_KEY = "playground-speech-voices-cache";
|
||||
|
||||
// Load voices cache from localStorage
|
||||
function getVoicesCache(): Record<string, string[]> {
|
||||
if (typeof window === "undefined") return {};
|
||||
try {
|
||||
@@ -35,7 +34,6 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Save voices cache to localStorage
|
||||
function saveVoicesCache(cache: Record<string, string[]>) {
|
||||
if (typeof window === "undefined") return;
|
||||
try {
|
||||
@@ -47,9 +45,12 @@
|
||||
|
||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||
|
||||
// Track if this is the initial page load to avoid fetching on refresh
|
||||
let isInitialLoad = $state(true);
|
||||
|
||||
$effect(() => {
|
||||
playgroundStores.speechGenerating.set(isGenerating);
|
||||
});
|
||||
|
||||
// On page load, restore cached voices for the selected model if available
|
||||
$effect(() => {
|
||||
const model = $selectedModelStore;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { renderMarkdown, escapeHtml } from "./markdown";
|
||||
import { renderMarkdown, escapeHtml, splitCompleteBlocks, closePendingBlock, normalizeLatexDelimiters, renderStreamingMarkdown, createStreamingCache } from "./markdown";
|
||||
|
||||
describe("renderMarkdown", () => {
|
||||
describe("basic markdown", () => {
|
||||
@@ -130,6 +130,35 @@ More text here.
|
||||
expect(result).toContain("katex");
|
||||
expect(result).toContain("sqrt");
|
||||
});
|
||||
|
||||
it("renders \\[...\\] display math", () => {
|
||||
const result = renderMarkdown("\\[\nx^2 + y^2 = z^2\n\\]");
|
||||
expect(result).toContain("katex");
|
||||
});
|
||||
|
||||
it("renders \\(...\\) inline math", () => {
|
||||
const result = renderMarkdown("The equation \\(E = mc^2\\) is famous.");
|
||||
expect(result).toContain("katex");
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeLatexDelimiters", () => {
|
||||
it("converts \\[...\\] to $$...$$", () => {
|
||||
expect(normalizeLatexDelimiters("\\[\nx^2\n\\]")).toBe("$$\nx^2\n$$");
|
||||
});
|
||||
|
||||
it("converts \\(...\\) to $...$", () => {
|
||||
expect(normalizeLatexDelimiters("\\(x^2\\)")).toBe("$x^2$");
|
||||
});
|
||||
|
||||
it("leaves $$ and $ delimiters unchanged", () => {
|
||||
const text = "$$x^2$$ and $y$";
|
||||
expect(normalizeLatexDelimiters(text)).toBe(text);
|
||||
});
|
||||
|
||||
it("handles multiple occurrences", () => {
|
||||
expect(normalizeLatexDelimiters("\\(a\\) and \\(b\\)")).toBe("$a$ and $b$");
|
||||
});
|
||||
});
|
||||
|
||||
describe("escapeHtml", () => {
|
||||
@@ -158,3 +187,237 @@ More text here.
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("splitCompleteBlocks", () => {
|
||||
it("returns everything as pending when no blank line", () => {
|
||||
const result = splitCompleteBlocks("Hello world");
|
||||
expect(result.complete).toBe("");
|
||||
expect(result.pending).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("returns empty for empty input", () => {
|
||||
const result = splitCompleteBlocks("");
|
||||
expect(result.complete).toBe("");
|
||||
expect(result.pending).toBe("");
|
||||
});
|
||||
|
||||
it("splits on blank line between paragraphs", () => {
|
||||
const result = splitCompleteBlocks("First paragraph.\n\nSecond paragraph");
|
||||
expect(result.complete).toBe("First paragraph.\n");
|
||||
expect(result.pending).toBe("Second paragraph");
|
||||
});
|
||||
|
||||
it("splits multiple paragraphs at last blank line", () => {
|
||||
const result = splitCompleteBlocks("Para 1.\n\nPara 2.\n\nPara 3");
|
||||
expect(result.complete).toBe("Para 1.\n\nPara 2.\n");
|
||||
expect(result.pending).toBe("Para 3");
|
||||
});
|
||||
|
||||
it("treats closed code fence as complete boundary", () => {
|
||||
const text = "```js\nconst x = 1;\n```\nMore text";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("```js\nconst x = 1;\n```");
|
||||
expect(result.pending).toBe("More text");
|
||||
});
|
||||
|
||||
it("treats unclosed code fence as pending", () => {
|
||||
const text = "Done paragraph.\n\n```js\nconst x = 1;";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("Done paragraph.\n");
|
||||
expect(result.pending).toBe("```js\nconst x = 1;");
|
||||
});
|
||||
|
||||
it("does not split on blank lines inside code fences", () => {
|
||||
const text = "```\nline1\n\nline2\n```";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("```\nline1\n\nline2\n```");
|
||||
expect(result.pending).toBe("");
|
||||
});
|
||||
|
||||
it("handles tilde fences", () => {
|
||||
const text = "~~~py\nprint('hi')\n~~~\nAfter";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("~~~py\nprint('hi')\n~~~");
|
||||
expect(result.pending).toBe("After");
|
||||
});
|
||||
|
||||
it("does not close backtick fence with tilde fence", () => {
|
||||
const text = "```\ncode\n~~~\nstill code";
|
||||
const result = splitCompleteBlocks(text);
|
||||
// The ~~~ should not close a backtick fence, so everything from ``` onward is pending
|
||||
expect(result.complete).toBe("");
|
||||
expect(result.pending).toBe("```\ncode\n~~~\nstill code");
|
||||
});
|
||||
|
||||
it("treats closed math block as complete boundary", () => {
|
||||
const text = "$$\nx^2\n$$\nAfter";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("$$\nx^2\n$$");
|
||||
expect(result.pending).toBe("After");
|
||||
});
|
||||
|
||||
it("treats unclosed math block as pending", () => {
|
||||
const text = "Before.\n\n$$\nx^2";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("Before.\n");
|
||||
expect(result.pending).toBe("$$\nx^2");
|
||||
});
|
||||
|
||||
it("treats closed \\[...\\] math block as complete boundary", () => {
|
||||
const text = "\\[\nx^2\n\\]\nAfter";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("\\[\nx^2\n\\]");
|
||||
expect(result.pending).toBe("After");
|
||||
});
|
||||
|
||||
it("treats unclosed \\[ math block as pending", () => {
|
||||
const text = "Before.\n\n\\[\nx^2";
|
||||
const result = splitCompleteBlocks(text);
|
||||
expect(result.complete).toBe("Before.\n");
|
||||
expect(result.pending).toBe("\\[\nx^2");
|
||||
});
|
||||
|
||||
it("handles trailing blank line making everything complete", () => {
|
||||
const text = "Hello world.\n";
|
||||
const result = splitCompleteBlocks(text);
|
||||
// Last line is empty string after split, which is a blank line
|
||||
expect(result.complete).toBe("Hello world.\n");
|
||||
expect(result.pending).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("closePendingBlock", () => {
|
||||
it("returns empty string for empty input", () => {
|
||||
expect(closePendingBlock("")).toBe("");
|
||||
});
|
||||
|
||||
it("returns plain text unchanged", () => {
|
||||
expect(closePendingBlock("Hello world")).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("closes an open backtick code fence", () => {
|
||||
const result = closePendingBlock("```python\nprint('hi')");
|
||||
expect(result).toBe("```python\nprint('hi')\n```");
|
||||
});
|
||||
|
||||
it("closes an open tilde code fence", () => {
|
||||
const result = closePendingBlock("~~~js\nconst x = 1;");
|
||||
expect(result).toBe("~~~js\nconst x = 1;\n~~~");
|
||||
});
|
||||
|
||||
it("does not modify already-closed code fence", () => {
|
||||
const text = "```py\ncode\n```";
|
||||
expect(closePendingBlock(text)).toBe(text);
|
||||
});
|
||||
|
||||
it("closes an open math block", () => {
|
||||
const result = closePendingBlock("$$\nx^2 + y^2");
|
||||
expect(result).toBe("$$\nx^2 + y^2\n$$");
|
||||
});
|
||||
|
||||
it("does not modify already-closed math block", () => {
|
||||
const text = "$$\nx^2\n$$";
|
||||
expect(closePendingBlock(text)).toBe(text);
|
||||
});
|
||||
|
||||
it("closes an open \\[ math block with \\]", () => {
|
||||
const result = closePendingBlock("\\[\nx^2 + y^2");
|
||||
expect(result).toBe("\\[\nx^2 + y^2\n\\]");
|
||||
});
|
||||
|
||||
it("does not modify already-closed \\[...\\] math block", () => {
|
||||
const text = "\\[\nx^2\n\\]";
|
||||
expect(closePendingBlock(text)).toBe(text);
|
||||
});
|
||||
|
||||
it("closes code fence when preceded by regular text", () => {
|
||||
const result = closePendingBlock("Some text\n```\ncode");
|
||||
expect(result).toBe("Some text\n```\ncode\n```");
|
||||
});
|
||||
|
||||
it("leaves headers unchanged", () => {
|
||||
expect(closePendingBlock("## Hello")).toBe("## Hello");
|
||||
});
|
||||
|
||||
it("leaves tables unchanged", () => {
|
||||
const table = "| a | b |\n| --- | --- |\n| 1 | 2 |";
|
||||
expect(closePendingBlock(table)).toBe(table);
|
||||
});
|
||||
|
||||
it("leaves lists unchanged", () => {
|
||||
expect(closePendingBlock("- item 1\n- item 2")).toBe("- item 1\n- item 2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("renderStreamingMarkdown", () => {
|
||||
it("renders complete blocks and pending as markdown", () => {
|
||||
const cache = createStreamingCache();
|
||||
const text = "# Hello\n\nWorld";
|
||||
const { blocks, pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0].html).toContain("<h1>Hello</h1>");
|
||||
expect(pendingHtml).toContain("World");
|
||||
expect(pendingHtml).toContain("<p>");
|
||||
});
|
||||
|
||||
it("preserves existing blocks when complete portion is unchanged", () => {
|
||||
const cache = createStreamingCache();
|
||||
renderStreamingMarkdown("# Hello\n\nWor", cache);
|
||||
const firstBlocks = cache.blocks;
|
||||
|
||||
const { blocks } = renderStreamingMarkdown("# Hello\n\nWorld", cache);
|
||||
// Same block array reference — nothing changed in the complete section
|
||||
expect(blocks).toBe(firstBlocks);
|
||||
expect(cache.completeKey).toBe("# Hello\n");
|
||||
});
|
||||
|
||||
it("appends a new block when a new section completes", () => {
|
||||
const cache = createStreamingCache();
|
||||
renderStreamingMarkdown("# Hello\n\nParagraph", cache);
|
||||
expect(cache.blocks).toHaveLength(1);
|
||||
const firstBlock = cache.blocks[0];
|
||||
|
||||
renderStreamingMarkdown("# Hello\n\nParagraph.\n\nMore", cache);
|
||||
expect(cache.blocks).toHaveLength(2);
|
||||
// First block is preserved with the same id and html
|
||||
expect(cache.blocks[0].id).toBe(firstBlock.id);
|
||||
expect(cache.blocks[0].html).toBe(firstBlock.html);
|
||||
// Second block contains the new paragraph
|
||||
expect(cache.blocks[1].html).toContain("Paragraph.");
|
||||
});
|
||||
|
||||
it("assigns unique stable ids to each block", () => {
|
||||
const cache = createStreamingCache();
|
||||
renderStreamingMarkdown("A.\n\nB.\n\nC", cache);
|
||||
expect(cache.blocks).toHaveLength(1);
|
||||
const id0 = cache.blocks[0].id;
|
||||
|
||||
renderStreamingMarkdown("A.\n\nB.\n\nC.\n\nD", cache);
|
||||
expect(cache.blocks).toHaveLength(2);
|
||||
expect(cache.blocks[0].id).toBe(id0);
|
||||
expect(cache.blocks[1].id).toBe(id0 + 1);
|
||||
});
|
||||
|
||||
it("renders pending code block with syntax highlighting", () => {
|
||||
const cache = createStreamingCache();
|
||||
const text = "Done.\n\n```python\nprint('hello')";
|
||||
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||
expect(pendingHtml).toContain("<code");
|
||||
expect(pendingHtml).toContain("hljs");
|
||||
});
|
||||
|
||||
it("renders pending table as markdown", () => {
|
||||
const cache = createStreamingCache();
|
||||
const text = "Done.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |";
|
||||
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||
expect(pendingHtml).toContain("<table>");
|
||||
expect(pendingHtml).toContain("<td>");
|
||||
});
|
||||
|
||||
it("renders pending portion through markdown pipeline", () => {
|
||||
const cache = createStreamingCache();
|
||||
const text = "Done.\n\nSome **bold** text";
|
||||
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||
expect(pendingHtml).toContain("<strong>bold</strong>");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -69,13 +69,189 @@ const processor = unified()
|
||||
.use(rehypeHighlight)
|
||||
.use(rehypeStringify, { allowDangerousHtml: true });
|
||||
|
||||
export function splitCompleteBlocks(text: string): { complete: string; pending: string } {
|
||||
if (!text) {
|
||||
return { complete: "", pending: "" };
|
||||
}
|
||||
|
||||
const lines = text.split("\n");
|
||||
let lastCompleteBoundary = -1; // index of last line that ends a complete block
|
||||
let inFence = false;
|
||||
let fenceChar = "";
|
||||
let inMathBlock = false;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const trimmed = lines[i].trimEnd();
|
||||
|
||||
if (inFence) {
|
||||
// Check for closing fence: same character, at least 3, no other content
|
||||
if (new RegExp(`^\\s*${fenceChar.replace(/~/g, "\\~")}{3,}\\s*$`).test(trimmed)) {
|
||||
inFence = false;
|
||||
fenceChar = "";
|
||||
lastCompleteBoundary = i;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inMathBlock) {
|
||||
if (trimmed === "$$" || trimmed === "\\]") {
|
||||
inMathBlock = false;
|
||||
lastCompleteBoundary = i;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for opening fence
|
||||
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
|
||||
if (fenceMatch) {
|
||||
// Check if it's an opening fence (may have language info after)
|
||||
// A line with just ``` or ~~~ could be opening or closing, but since we're not in a fence it's opening
|
||||
fenceChar = fenceMatch[2][0]; // '`' or '~'
|
||||
inFence = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for opening math block
|
||||
if (trimmed === "$$" || trimmed === "\\[") {
|
||||
inMathBlock = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Outside fences/math: blank line marks a complete boundary
|
||||
if (trimmed === "") {
|
||||
lastCompleteBoundary = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastCompleteBoundary < 0) {
|
||||
return { complete: "", pending: text };
|
||||
}
|
||||
|
||||
const completeLines = lines.slice(0, lastCompleteBoundary + 1);
|
||||
const pendingLines = lines.slice(lastCompleteBoundary + 1);
|
||||
|
||||
return {
|
||||
complete: completeLines.join("\n"),
|
||||
pending: pendingLines.join("\n"),
|
||||
};
|
||||
}
|
||||
|
||||
export function closePendingBlock(pending: string): string {
|
||||
if (!pending) return "";
|
||||
|
||||
const lines = pending.split("\n");
|
||||
let inFence = false;
|
||||
let fenceStr = "";
|
||||
let inMathBlock = false;
|
||||
let mathClose = "";
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trimEnd();
|
||||
|
||||
if (inFence) {
|
||||
if (new RegExp(`^\\s*${fenceStr[0] === "~" ? "~~~" : "\\`\\`\\`"}\\s*$`).test(trimmed)) {
|
||||
inFence = false;
|
||||
fenceStr = "";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inMathBlock) {
|
||||
if (trimmed === "$$" || trimmed === "\\]") {
|
||||
inMathBlock = false;
|
||||
mathClose = "";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
|
||||
if (fenceMatch) {
|
||||
fenceStr = fenceMatch[2];
|
||||
inFence = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed === "$$") {
|
||||
inMathBlock = true;
|
||||
mathClose = "$$";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed === "\\[") {
|
||||
inMathBlock = true;
|
||||
mathClose = "\\]";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (inFence) return pending + "\n" + fenceStr;
|
||||
if (inMathBlock) return pending + "\n" + mathClose;
|
||||
return pending;
|
||||
}
|
||||
|
||||
export interface RenderedBlock {
|
||||
id: number;
|
||||
html: string;
|
||||
}
|
||||
|
||||
export interface StreamingCache {
|
||||
blocks: RenderedBlock[];
|
||||
nextId: number;
|
||||
completeKey: string;
|
||||
}
|
||||
|
||||
export function createStreamingCache(): StreamingCache {
|
||||
return { blocks: [], nextId: 0, completeKey: "" };
|
||||
}
|
||||
|
||||
export function renderStreamingMarkdown(
|
||||
text: string,
|
||||
cache: StreamingCache,
|
||||
): { blocks: RenderedBlock[]; pendingHtml: string } {
|
||||
const { complete, pending } = splitCompleteBlocks(text);
|
||||
|
||||
if (complete) {
|
||||
if (cache.completeKey !== complete) {
|
||||
if (complete.startsWith(cache.completeKey) && cache.completeKey.length > 0) {
|
||||
// Complete section grew — render only the new part as a new block
|
||||
const newPart = complete.slice(cache.completeKey.length);
|
||||
cache.blocks = [...cache.blocks, { id: cache.nextId++, html: renderMarkdown(newPart) }];
|
||||
} else {
|
||||
// Complete section changed unexpectedly — re-render as single block
|
||||
cache.blocks = [{ id: cache.nextId++, html: renderMarkdown(complete) }];
|
||||
}
|
||||
cache.completeKey = complete;
|
||||
}
|
||||
} else if (cache.blocks.length > 0) {
|
||||
cache.blocks = [];
|
||||
cache.completeKey = "";
|
||||
}
|
||||
|
||||
let pendingHtml = "";
|
||||
if (pending) {
|
||||
const closed = closePendingBlock(pending);
|
||||
pendingHtml = renderMarkdown(closed);
|
||||
}
|
||||
|
||||
return { blocks: cache.blocks, pendingHtml };
|
||||
}
|
||||
|
||||
// Convert \[...\] to $$...$$ and \(...\) to $...$
|
||||
export function normalizeLatexDelimiters(text: string): string {
|
||||
// Display math: \[...\] → $$...$$ (may span multiple lines)
|
||||
text = text.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => `$$${inner}$$`);
|
||||
// Inline math: \(...\) → $...$
|
||||
text = text.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => `$${inner}$`);
|
||||
return text;
|
||||
}
|
||||
|
||||
export function renderMarkdown(content: string): string {
|
||||
if (!content) {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
const result = processor.processSync(content);
|
||||
const result = processor.processSync(normalizeLatexDelimiters(content));
|
||||
return String(result);
|
||||
} catch {
|
||||
// Fallback to escaped plain text if markdown parsing fails
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
export interface RerankResult {
|
||||
index: number;
|
||||
relevance_score: number;
|
||||
}
|
||||
|
||||
export interface RerankResponse {
|
||||
model: string;
|
||||
object: string;
|
||||
usage: { prompt_tokens: number; total_tokens: number };
|
||||
results: RerankResult[];
|
||||
}
|
||||
|
||||
export async function rerank(
|
||||
model: string,
|
||||
query: string,
|
||||
documents: string[],
|
||||
signal: AbortSignal
|
||||
): Promise<RerankResponse> {
|
||||
const response = await fetch("/v1/rerank", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ model, query, documents }),
|
||||
signal,
|
||||
});
|
||||
if (!response.ok) throw new Error(`${response.status} ${response.statusText}`);
|
||||
return response.json();
|
||||
}
|
||||
@@ -9,6 +9,7 @@ export interface Model {
|
||||
description: string;
|
||||
unlisted: boolean;
|
||||
peerID: string;
|
||||
aliases?: string[];
|
||||
}
|
||||
|
||||
export interface Metrics {
|
||||
@@ -21,6 +22,16 @@ export interface Metrics {
|
||||
prompt_per_second: number;
|
||||
tokens_per_second: number;
|
||||
duration_ms: number;
|
||||
has_capture: boolean;
|
||||
}
|
||||
|
||||
export interface ReqRespCapture {
|
||||
id: number;
|
||||
req_path: string;
|
||||
req_headers: Record<string, string>;
|
||||
req_body: string; // base64 encoded bytes
|
||||
resp_headers: Record<string, string>;
|
||||
resp_body: string; // base64 encoded bytes
|
||||
}
|
||||
|
||||
export interface LogData {
|
||||
@@ -28,8 +39,12 @@ export interface LogData {
|
||||
data: string;
|
||||
}
|
||||
|
||||
export interface InFlightStats {
|
||||
total: number;
|
||||
}
|
||||
|
||||
export interface APIEventEnvelope {
|
||||
type: "modelStatus" | "logData" | "metrics";
|
||||
type: "modelStatus" | "logData" | "metrics" | "inflight";
|
||||
data: string;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
<script lang="ts">
|
||||
import { metrics } from "../stores/api";
|
||||
import { metrics, getCapture } from "../stores/api";
|
||||
import Tooltip from "../components/Tooltip.svelte";
|
||||
import CaptureDialog from "../components/CaptureDialog.svelte";
|
||||
import type { ReqRespCapture } from "../lib/types";
|
||||
|
||||
function formatSpeed(speed: number): string {
|
||||
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
||||
@@ -38,6 +40,25 @@
|
||||
}
|
||||
|
||||
let sortedMetrics = $derived([...$metrics].sort((a, b) => b.id - a.id));
|
||||
|
||||
let selectedCapture = $state<ReqRespCapture | null>(null);
|
||||
let dialogOpen = $state(false);
|
||||
let loadingCaptureId = $state<number | null>(null);
|
||||
|
||||
async function viewCapture(id: number) {
|
||||
loadingCaptureId = id;
|
||||
const capture = await getCapture(id);
|
||||
loadingCaptureId = null;
|
||||
if (capture) {
|
||||
selectedCapture = capture;
|
||||
dialogOpen = true;
|
||||
}
|
||||
}
|
||||
|
||||
function closeDialog() {
|
||||
dialogOpen = false;
|
||||
selectedCapture = null;
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="p-2">
|
||||
@@ -65,6 +86,7 @@
|
||||
<th class="px-6 py-3">Prompt Processing</th>
|
||||
<th class="px-6 py-3">Generation Speed</th>
|
||||
<th class="px-6 py-3">Duration</th>
|
||||
<th class="px-6 py-3">Capture</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="divide-y">
|
||||
@@ -79,6 +101,19 @@
|
||||
<td class="px-6 py-4">{formatSpeed(metric.prompt_per_second)}</td>
|
||||
<td class="px-6 py-4">{formatSpeed(metric.tokens_per_second)}</td>
|
||||
<td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
|
||||
<td class="px-6 py-4">
|
||||
{#if metric.has_capture}
|
||||
<button
|
||||
onclick={() => viewCapture(metric.id)}
|
||||
disabled={loadingCaptureId === metric.id}
|
||||
class="btn btn--sm"
|
||||
>
|
||||
{loadingCaptureId === metric.id ? "..." : "View"}
|
||||
</button>
|
||||
{:else}
|
||||
<span class="text-txtsecondary">-</span>
|
||||
{/if}
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
@@ -86,3 +121,5 @@
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<CaptureDialog capture={selectedCapture} open={dialogOpen} onclose={closeDialog} />
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
import ImageInterface from "../components/playground/ImageInterface.svelte";
|
||||
import AudioInterface from "../components/playground/AudioInterface.svelte";
|
||||
import SpeechInterface from "../components/playground/SpeechInterface.svelte";
|
||||
import RerankInterface from "../components/playground/RerankInterface.svelte";
|
||||
|
||||
type Tab = "chat" | "images" | "speech" | "audio";
|
||||
type Tab = "chat" | "images" | "speech" | "audio" | "rerank";
|
||||
|
||||
const selectedTabStore = persistentStore<Tab>("playground-selected-tab", "chat");
|
||||
let mobileMenuOpen = $state(false);
|
||||
@@ -15,6 +16,7 @@
|
||||
{ id: "images", label: "Images" },
|
||||
{ id: "speech", label: "Speech" },
|
||||
{ id: "audio", label: "Transcription" },
|
||||
{ id: "rerank", label: "Rerank" },
|
||||
];
|
||||
|
||||
function selectTab(tab: Tab) {
|
||||
@@ -89,6 +91,9 @@
|
||||
<div class="h-full" class:tab-hidden={$selectedTabStore !== "audio"}>
|
||||
<AudioInterface />
|
||||
</div>
|
||||
<div class="h-full" class:tab-hidden={$selectedTabStore !== "rerank"}>
|
||||
<RerankInterface />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
<!-- empty: real Playground is always mounted in App.svelte -->
|
||||
@@ -1,5 +1,5 @@
|
||||
import { writable } from "svelte/store";
|
||||
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope } from "../lib/types";
|
||||
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture, InFlightStats } from "../lib/types";
|
||||
import { connectionState } from "./theme";
|
||||
|
||||
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
||||
@@ -9,6 +9,7 @@ export const models = writable<Model[]>([]);
|
||||
export const proxyLogs = writable<string>("");
|
||||
export const upstreamLogs = writable<string>("");
|
||||
export const metrics = writable<Metrics[]>([]);
|
||||
export const inFlightRequests = writable<number>(0);
|
||||
export const versionInfo = writable<VersionInfo>({
|
||||
build_date: "unknown",
|
||||
commit: "unknown",
|
||||
@@ -29,6 +30,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
||||
apiEventSource?.close();
|
||||
apiEventSource = null;
|
||||
metrics.set([]);
|
||||
inFlightRequests.set(0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -46,6 +48,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
||||
proxyLogs.set("");
|
||||
upstreamLogs.set("");
|
||||
metrics.set([]);
|
||||
inFlightRequests.set(0);
|
||||
models.set([]);
|
||||
retryCount = 0;
|
||||
connectionState.set("connected");
|
||||
@@ -59,7 +62,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
||||
const newModels = JSON.parse(message.data) as Model[];
|
||||
// Sort models by name and id
|
||||
newModels.sort((a, b) => {
|
||||
return (a.name + a.id).localeCompare(b.name + b.id);
|
||||
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric : true} );
|
||||
});
|
||||
models.set(newModels);
|
||||
break;
|
||||
@@ -83,6 +86,11 @@ export function enableAPIEvents(enabled: boolean): void {
|
||||
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
|
||||
break;
|
||||
}
|
||||
case "inflight": {
|
||||
const stats = JSON.parse(message.data) as InFlightStats;
|
||||
inFlightRequests.set(stats.total ?? 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(e.data, err);
|
||||
@@ -172,3 +180,19 @@ export async function loadModel(model: string): Promise<void> {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getCapture(id: number): Promise<ReqRespCapture | null> {
|
||||
try {
|
||||
const response = await fetch(`/api/captures/${id}`);
|
||||
if (response.status === 404) {
|
||||
return null;
|
||||
}
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch capture: ${response.status}`);
|
||||
}
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch capture:", error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
import { writable, derived } from "svelte/store";
|
||||
|
||||
const chatStreaming = writable(false);
|
||||
const imageGenerating = writable(false);
|
||||
const speechGenerating = writable(false);
|
||||
const audioTranscribing = writable(false);
|
||||
const rerankLoading = writable(false);
|
||||
|
||||
export const playgroundActivity = derived(
|
||||
[chatStreaming, imageGenerating, speechGenerating, audioTranscribing, rerankLoading],
|
||||
([$chat, $image, $speech, $audio, $rerank]) => $chat || $image || $speech || $audio || $rerank
|
||||
);
|
||||
|
||||
export const playgroundStores = {
|
||||
chatStreaming,
|
||||
imageGenerating,
|
||||
speechGenerating,
|
||||
audioTranscribing,
|
||||
rerankLoading,
|
||||
};
|
||||
@@ -0,0 +1,3 @@
|
||||
import { writable } from "svelte/store";
|
||||
|
||||
export const currentRoute = writable("/");
|
||||
Reference in New Issue
Block a user