Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d33d51fa75 | |||
| e3bf065574 | |||
| 3e52144058 | |||
| d5e52d7d00 | |||
| 17e5263a76 | |||
| 8d6d949ec3 | |||
| b5fde8eb6d | |||
| 7eef5defb8 | |||
| bc01e6f539 | |||
| 0462e3dc3f | |||
| 7b20fc011b |
+1
-1
@@ -4,7 +4,7 @@ early_access: false
|
|||||||
reviews:
|
reviews:
|
||||||
profile: "chill"
|
profile: "chill"
|
||||||
request_changes_workflow: false
|
request_changes_workflow: false
|
||||||
high_level_summary: true
|
high_level_summary: false
|
||||||
poem: false
|
poem: false
|
||||||
review_status: true
|
review_status: true
|
||||||
collapse_walkthrough: false
|
collapse_walkthrough: false
|
||||||
|
|||||||
@@ -15,6 +15,14 @@ on:
|
|||||||
paths:
|
paths:
|
||||||
- '.github/workflows/containers.yml'
|
- '.github/workflows/containers.yml'
|
||||||
- 'docker/build-container.sh'
|
- 'docker/build-container.sh'
|
||||||
|
- 'docker/*.Containerfile'
|
||||||
|
|
||||||
|
# grant permissions on GITHUB_TOKEN to publish packages
|
||||||
|
# ref: https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#publishing-a-package-using-an-action
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
id-token: write
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-and-push:
|
build-and-push:
|
||||||
|
|||||||
@@ -3,9 +3,25 @@ name: Windows CI
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
|
# only run when backend source changes
|
||||||
|
# cmd/ is excluded because it contains utilities without tests
|
||||||
|
paths:
|
||||||
|
- '**/*.go'
|
||||||
|
- '!cmd/**'
|
||||||
|
- 'go.mod'
|
||||||
|
- 'go.sum'
|
||||||
|
- 'Makefile'
|
||||||
|
- '.github/workflows/go-ci-windows.yml'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
|
paths:
|
||||||
|
- '**/*.go'
|
||||||
|
- '!cmd/**'
|
||||||
|
- 'go.mod'
|
||||||
|
- 'go.sum'
|
||||||
|
- 'Makefile'
|
||||||
|
- '.github/workflows/go-ci-windows.yml'
|
||||||
|
|
||||||
# Allows manual triggering of the workflow
|
# Allows manual triggering of the workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
@@ -3,9 +3,25 @@ name: Linux CI
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
|
# only run when backend source changes
|
||||||
|
# cmd/ is excluded because it contains utilities without tests
|
||||||
|
paths:
|
||||||
|
- '**/*.go'
|
||||||
|
- '!cmd/**'
|
||||||
|
- 'go.mod'
|
||||||
|
- 'go.sum'
|
||||||
|
- 'Makefile'
|
||||||
|
- '.github/workflows/go-ci.yml'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
|
paths:
|
||||||
|
- '**/*.go'
|
||||||
|
- '!cmd/**'
|
||||||
|
- 'go.mod'
|
||||||
|
- 'go.sum'
|
||||||
|
- 'Makefile'
|
||||||
|
- '.github/workflows/go-ci.yml'
|
||||||
|
|
||||||
# Allows manual triggering of the workflow
|
# Allows manual triggering of the workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ jobs:
|
|||||||
- name: Set up Node.js
|
- name: Set up Node.js
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: "23"
|
node-version: "24"
|
||||||
- name: Install dependencies and build UI
|
- name: Install dependencies and build UI
|
||||||
run: |
|
run: |
|
||||||
cd ui-svelte
|
cd ui-svelte
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
name: UI Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ "main" ]
|
||||||
|
paths:
|
||||||
|
- 'ui-svelte/**'
|
||||||
|
- '.github/workflows/ui-tests.yml'
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches: [ "main" ]
|
||||||
|
paths:
|
||||||
|
- 'ui-svelte/**'
|
||||||
|
- '.github/workflows/ui-tests.yml'
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
run-tests:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: ui-svelte
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '24'
|
||||||
|
cache: 'npm'
|
||||||
|
cache-dependency-path: ui-svelte/package-lock.json
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Type check
|
||||||
|
run: npm run check
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: npm test
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
## Project Description:
|
||||||
|
|
||||||
|
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
||||||
|
|
||||||
|
## Tech stack
|
||||||
|
|
||||||
|
- golang
|
||||||
|
- typescript, vite and svelt5 for UI (located in ui/)
|
||||||
|
|
||||||
|
## Workflow Tasks
|
||||||
|
|
||||||
|
- when summarizing changes only include details that require further action
|
||||||
|
- just say "Done." when there is no further action
|
||||||
|
- use the github CLI `gh` to create pull requests and work with github
|
||||||
|
- Rules for creating pull requests:
|
||||||
|
- keep them short and focused on changes.
|
||||||
|
- never include a test plan
|
||||||
|
- write the summary using the same style rules as commit message
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
||||||
|
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
||||||
|
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||||
|
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||||
|
|
||||||
|
### Commit message example format:
|
||||||
|
|
||||||
|
```
|
||||||
|
proxy: add new feature
|
||||||
|
|
||||||
|
Add new feature that implements functionality X and Y.
|
||||||
|
|
||||||
|
- key change 1
|
||||||
|
- key change 2
|
||||||
|
- key change 3
|
||||||
|
|
||||||
|
fixes #123
|
||||||
|
```
|
||||||
|
|
||||||
|
## Code Reviews
|
||||||
|
|
||||||
|
- use three levels High, Medium, Low severity
|
||||||
|
- label each discovered issue with a label like H1, M2, L3 respectively
|
||||||
|
- High severity are must fix issues (security, race conditions, critical bugs)
|
||||||
|
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
|
||||||
|
- Low severity are nice to have changes and nits
|
||||||
|
- Include a suggestion with each discovered item
|
||||||
|
- Limit your code review to three items with the highest priority first
|
||||||
|
- Double check your discovered items and recommended remediations
|
||||||
@@ -1,49 +1 @@
|
|||||||
## Project Description:
|
@AGENTS.md
|
||||||
|
|
||||||
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
|
||||||
|
|
||||||
## Tech stack
|
|
||||||
|
|
||||||
- golang
|
|
||||||
- typescript, vite and react for UI (located in ui/)
|
|
||||||
|
|
||||||
## Workflow Tasks
|
|
||||||
|
|
||||||
- when summarizing changes only include details that require further action
|
|
||||||
- just say "Done." when there is no further action
|
|
||||||
- use `gh` to create PRs and load issues
|
|
||||||
- do include Co-Authored-By or created by when committing changes or creating PRs
|
|
||||||
- keep PR descriptions short and focused on changes.
|
|
||||||
- never include a test plan
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
|
||||||
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
|
||||||
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
|
||||||
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
|
||||||
|
|
||||||
### Commit message example format:
|
|
||||||
|
|
||||||
```
|
|
||||||
proxy: add new feature
|
|
||||||
|
|
||||||
Add new feature that implements functionality X and Y.
|
|
||||||
|
|
||||||
- key change 1
|
|
||||||
- key change 2
|
|
||||||
- key change 3
|
|
||||||
|
|
||||||
fixes #123
|
|
||||||
```
|
|
||||||
|
|
||||||
## Code Reviews
|
|
||||||
|
|
||||||
- use three levels High, Medium, Low severity
|
|
||||||
- label each discovered issue with a label like H1, M2, L3 respectively
|
|
||||||
- High severity are must fix issues (security, race conditions, critical bugs)
|
|
||||||
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
|
|
||||||
- Low severity are nice to have changes and nits
|
|
||||||
- Include a suggestion with each discovered item
|
|
||||||
- Limit your code review to three items with the highest priority first
|
|
||||||
- Double check your discovered items and recommended remediations
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
|
|
||||||
- ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
|
- ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
|
||||||
- ✅ On-demand model switching
|
- ✅ On-demand model switching
|
||||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc.)
|
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, etc.)
|
||||||
- future proof, upgrade your inference servers at any time.
|
- future proof, upgrade your inference servers at any time.
|
||||||
- ✅ OpenAI API supported endpoints:
|
- ✅ OpenAI API supported endpoints:
|
||||||
- `v1/completions`
|
- `v1/completions`
|
||||||
@@ -69,6 +69,7 @@ llama-swap can be installed in multiple ways
|
|||||||
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
||||||
|
|
||||||
Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
|
Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
|
||||||
|
The stable-diffusion.cpp server is also included for the musa and vulkan platforms.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda
|
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda
|
||||||
|
|||||||
@@ -87,6 +87,12 @@
|
|||||||
"default": 1000,
|
"default": 1000,
|
||||||
"description": "Maximum number of metrics to keep in memory. Controls how many metrics are stored before older ones are discarded."
|
"description": "Maximum number of metrics to keep in memory. Controls how many metrics are stored before older ones are discarded."
|
||||||
},
|
},
|
||||||
|
"captureBuffer": {
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 0,
|
||||||
|
"default": 5,
|
||||||
|
"description": "Size in megabytes of the buffer for storing request/response captures. Set to 0 to disable captures."
|
||||||
|
},
|
||||||
"startPort": {
|
"startPort": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 5800,
|
"default": 5800,
|
||||||
|
|||||||
@@ -50,6 +50,11 @@ logToStdout: "proxy"
|
|||||||
# - useful for limiting memory usage when processing large volumes of metrics
|
# - useful for limiting memory usage when processing large volumes of metrics
|
||||||
metricsMaxInMemory: 1000
|
metricsMaxInMemory: 1000
|
||||||
|
|
||||||
|
# captureBuffer: how many MBs to allocate for storing request/response captures
|
||||||
|
# - optional, default: 10
|
||||||
|
# - set to 0 to disable
|
||||||
|
captureBuffer: 15
|
||||||
|
|
||||||
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
||||||
# - optional, default: 5800
|
# - optional, default: 5800
|
||||||
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
cd $(dirname "$0")
|
cd $(dirname "$0")
|
||||||
|
|
||||||
# use this to test locally, example:
|
# use this to test locally, example:
|
||||||
@@ -8,6 +10,9 @@ cd $(dirname "$0")
|
|||||||
# the scopes: gist, read:org, repo, write:packages
|
# the scopes: gist, read:org, repo, write:packages
|
||||||
# then: gh auth login (and copy/paste the new token)
|
# then: gh auth login (and copy/paste the new token)
|
||||||
|
|
||||||
|
LOG_DEBUG=${LOG_DEBUG:-0}
|
||||||
|
DEBUG_ABORT_BUILD=${DEBUG_ABORT_BUILD:-}
|
||||||
|
|
||||||
log_debug() {
|
log_debug() {
|
||||||
if [ "$LOG_DEBUG" = "1" ]; then
|
if [ "$LOG_DEBUG" = "1" ]; then
|
||||||
echo "[DEBUG] $*"
|
echo "[DEBUG] $*"
|
||||||
@@ -31,7 +36,7 @@ if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if GITHUB_TOKEN is set and not empty
|
# Check if GITHUB_TOKEN is set and not empty
|
||||||
if [[ -z "$GITHUB_TOKEN" ]]; then
|
if [[ -z "${GITHUB_TOKEN:-}" ]]; then
|
||||||
log_info "Error: GITHUB_TOKEN is not set or is empty."
|
log_info "Error: GITHUB_TOKEN is not set or is empty."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
@@ -39,6 +44,7 @@ fi
|
|||||||
# Set llama.cpp base image, customizable using the BASE_LLAMACPP_IMAGE environment
|
# Set llama.cpp base image, customizable using the BASE_LLAMACPP_IMAGE environment
|
||||||
# variable, this permits testing with forked llama.cpp repositories
|
# variable, this permits testing with forked llama.cpp repositories
|
||||||
BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp}
|
BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp}
|
||||||
|
SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp}
|
||||||
|
|
||||||
# Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable
|
# Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable
|
||||||
# to enable easy container builds on forked repos
|
# to enable easy container builds on forked repos
|
||||||
@@ -105,6 +111,8 @@ else
|
|||||||
BASE_TAG=server-${ARCH}-${LCPP_TAG}
|
BASE_TAG=server-${ARCH}-${LCPP_TAG}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
SD_TAG=master-${ARCH}
|
||||||
|
|
||||||
# Abort if LCPP_TAG is empty.
|
# Abort if LCPP_TAG is empty.
|
||||||
if [[ -z "$LCPP_TAG" ]]; then
|
if [[ -z "$LCPP_TAG" ]]; then
|
||||||
log_info "Abort: Could not find llama-server container for arch: $ARCH"
|
log_info "Abort: Could not find llama-server container for arch: $ARCH"
|
||||||
@@ -134,9 +142,21 @@ for CONTAINER_TYPE in non-root root; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
||||||
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||||
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||||
|
|
||||||
|
# For architectures with stable-diffusion.cpp support, layer sd-server on top
|
||||||
|
case "$ARCH" in
|
||||||
|
"musa" | "vulkan")
|
||||||
|
log_info "Adding sd-server to $CONTAINER_TAG"
|
||||||
|
docker build --provenance=false -f llama-swap-sd.Containerfile \
|
||||||
|
--build-arg BASE=${CONTAINER_TAG} \
|
||||||
|
--build-arg SD_IMAGE=${SD_IMAGE} --build-arg SD_TAG=${SD_TAG} \
|
||||||
|
--build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \
|
||||||
|
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;;
|
||||||
|
esac
|
||||||
|
|
||||||
if [ "$PUSH_IMAGES" == "true" ]; then
|
if [ "$PUSH_IMAGES" == "true" ]; then
|
||||||
docker push ${CONTAINER_TAG}
|
docker push ${CONTAINER_TAG}
|
||||||
docker push ${CONTAINER_LATEST}
|
docker push ${CONTAINER_LATEST}
|
||||||
|
|||||||
@@ -16,3 +16,18 @@ models:
|
|||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
|
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
|
||||||
--port 9999
|
--port 9999
|
||||||
|
|
||||||
|
z-image:
|
||||||
|
checkEndpoint: /
|
||||||
|
cmd: |
|
||||||
|
/app/sd-server
|
||||||
|
--listen-port 9999
|
||||||
|
--diffusion-fa
|
||||||
|
--diffusion-model /models/z_image_turbo-Q8_0.gguf
|
||||||
|
--vae /models/ae.safetensors
|
||||||
|
--llm /models/qwen3-4b-instruct-2507-q8_0.gguf
|
||||||
|
--offload-to-cpu
|
||||||
|
--cfg-scale 1.0
|
||||||
|
--height 512 --width 512
|
||||||
|
--steps 8
|
||||||
|
aliases: [gpt-image-1,dall-e-2,dall-e-3,gpt-image-1-mini,gpt-image-1.5]
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
ARG SD_IMAGE=ghcr.io/leejet/stable-diffusion.cpp
|
||||||
|
ARG SD_TAG=master-vulkan
|
||||||
|
ARG BASE=llama-swap:latest
|
||||||
|
|
||||||
|
FROM ${SD_IMAGE}:${SD_TAG} AS sd-source
|
||||||
|
FROM ${BASE}
|
||||||
|
|
||||||
|
ARG UID=10001
|
||||||
|
ARG GID=10001
|
||||||
|
|
||||||
|
COPY --from=sd-source --chown=${UID}:${GID} /sd-server /app/sd-server
|
||||||
@@ -123,6 +123,7 @@ type Config struct {
|
|||||||
LogTimeFormat string `yaml:"logTimeFormat"`
|
LogTimeFormat string `yaml:"logTimeFormat"`
|
||||||
LogToStdout string `yaml:"logToStdout"`
|
LogToStdout string `yaml:"logToStdout"`
|
||||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||||
|
CaptureBuffer int `yaml:"captureBuffer"`
|
||||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||||
Profiles map[string][]string `yaml:"profiles"`
|
Profiles map[string][]string `yaml:"profiles"`
|
||||||
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
|
||||||
@@ -201,6 +202,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
LogToStdout: LogToStdoutProxy,
|
LogToStdout: LogToStdoutProxy,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
|
CaptureBuffer: 5,
|
||||||
}
|
}
|
||||||
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
|
|||||||
@@ -215,6 +215,7 @@ groups:
|
|||||||
},
|
},
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
|
CaptureBuffer: 5,
|
||||||
Profiles: map[string][]string{
|
Profiles: map[string][]string{
|
||||||
"test": {"model1", "model2"},
|
"test": {"model1", "model2"},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -204,6 +204,7 @@ groups:
|
|||||||
},
|
},
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
|
CaptureBuffer: 5,
|
||||||
Profiles: map[string][]string{
|
Profiles: map[string][]string{
|
||||||
"test": {"model1", "model2"},
|
"test": {"model1", "model2"},
|
||||||
},
|
},
|
||||||
|
|||||||
+164
-8
@@ -28,6 +28,28 @@ type TokenMetrics struct {
|
|||||||
PromptPerSecond float64 `json:"prompt_per_second"`
|
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||||
TokensPerSecond float64 `json:"tokens_per_second"`
|
TokensPerSecond float64 `json:"tokens_per_second"`
|
||||||
DurationMs int `json:"duration_ms"`
|
DurationMs int `json:"duration_ms"`
|
||||||
|
HasCapture bool `json:"has_capture"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ReqRespCapture struct {
|
||||||
|
ID int `json:"id"`
|
||||||
|
ReqPath string `json:"req_path"`
|
||||||
|
ReqHeaders map[string]string `json:"req_headers"`
|
||||||
|
ReqBody []byte `json:"req_body"`
|
||||||
|
RespHeaders map[string]string `json:"resp_headers"`
|
||||||
|
RespBody []byte `json:"resp_body"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns the approximate memory usage of this capture in bytes
|
||||||
|
func (c *ReqRespCapture) Size() int {
|
||||||
|
size := len(c.ReqPath) + len(c.ReqBody) + len(c.RespBody)
|
||||||
|
for k, v := range c.ReqHeaders {
|
||||||
|
size += len(k) + len(v)
|
||||||
|
}
|
||||||
|
for k, v := range c.RespHeaders {
|
||||||
|
size += len(k) + len(v)
|
||||||
|
}
|
||||||
|
return size
|
||||||
}
|
}
|
||||||
|
|
||||||
// TokenMetricsEvent represents a token metrics event
|
// TokenMetricsEvent represents a token metrics event
|
||||||
@@ -46,19 +68,32 @@ type metricsMonitor struct {
|
|||||||
maxMetrics int
|
maxMetrics int
|
||||||
nextID int
|
nextID int
|
||||||
logger *LogMonitor
|
logger *LogMonitor
|
||||||
|
|
||||||
|
// capture fields
|
||||||
|
enableCaptures bool
|
||||||
|
captures map[int]ReqRespCapture // map for O(1) lookup by ID
|
||||||
|
captureOrder []int // track insertion order for FIFO eviction
|
||||||
|
captureSize int // current total size in bytes
|
||||||
|
maxCaptureSize int // max bytes for captures
|
||||||
}
|
}
|
||||||
|
|
||||||
func newMetricsMonitor(logger *LogMonitor, maxMetrics int) *metricsMonitor {
|
// newMetricsMonitor creates a new metricsMonitor. captureBufferMB is the
|
||||||
mp := &metricsMonitor{
|
// capture buffer size in megabytes; 0 disables captures.
|
||||||
|
func newMetricsMonitor(logger *LogMonitor, maxMetrics int, captureBufferMB int) *metricsMonitor {
|
||||||
|
return &metricsMonitor{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
maxMetrics: maxMetrics,
|
maxMetrics: maxMetrics,
|
||||||
|
enableCaptures: captureBufferMB > 0,
|
||||||
|
captures: make(map[int]ReqRespCapture),
|
||||||
|
captureOrder: make([]int, 0),
|
||||||
|
captureSize: 0,
|
||||||
|
maxCaptureSize: captureBufferMB * 1024 * 1024,
|
||||||
}
|
}
|
||||||
|
|
||||||
return mp
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// addMetrics adds a new metric to the collection and publishes an event
|
// addMetrics adds a new metric to the collection and publishes an event.
|
||||||
func (mp *metricsMonitor) addMetrics(metric TokenMetrics) {
|
// Returns the assigned metric ID.
|
||||||
|
func (mp *metricsMonitor) addMetrics(metric TokenMetrics) int {
|
||||||
mp.mu.Lock()
|
mp.mu.Lock()
|
||||||
defer mp.mu.Unlock()
|
defer mp.mu.Unlock()
|
||||||
|
|
||||||
@@ -69,6 +104,49 @@ func (mp *metricsMonitor) addMetrics(metric TokenMetrics) {
|
|||||||
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
|
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
|
||||||
}
|
}
|
||||||
event.Emit(TokenMetricsEvent{Metrics: metric})
|
event.Emit(TokenMetricsEvent{Metrics: metric})
|
||||||
|
return metric.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
// addCapture adds a new capture to the buffer with size-based eviction.
|
||||||
|
// Captures are skipped if enableCaptures is false or if capture exceeds maxCaptureSize.
|
||||||
|
func (mp *metricsMonitor) addCapture(capture ReqRespCapture) {
|
||||||
|
if !mp.enableCaptures {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
mp.mu.Lock()
|
||||||
|
defer mp.mu.Unlock()
|
||||||
|
|
||||||
|
captureSize := capture.Size()
|
||||||
|
if captureSize > mp.maxCaptureSize {
|
||||||
|
mp.logger.Warnf("capture size %d exceeds max %d, skipping", captureSize, mp.maxCaptureSize)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Evict oldest (FIFO) until room available
|
||||||
|
for mp.captureSize+captureSize > mp.maxCaptureSize && len(mp.captureOrder) > 0 {
|
||||||
|
oldestID := mp.captureOrder[0]
|
||||||
|
mp.captureOrder = mp.captureOrder[1:]
|
||||||
|
if evicted, exists := mp.captures[oldestID]; exists {
|
||||||
|
mp.captureSize -= evicted.Size()
|
||||||
|
delete(mp.captures, oldestID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mp.captures[capture.ID] = capture
|
||||||
|
mp.captureOrder = append(mp.captureOrder, capture.ID)
|
||||||
|
mp.captureSize += captureSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// getCaptureByID returns a capture by its ID, or nil if not found.
|
||||||
|
func (mp *metricsMonitor) getCaptureByID(id int) *ReqRespCapture {
|
||||||
|
mp.mu.RLock()
|
||||||
|
defer mp.mu.RUnlock()
|
||||||
|
|
||||||
|
if capture, exists := mp.captures[id]; exists {
|
||||||
|
return &capture
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// getMetrics returns a copy of the current metrics
|
// getMetrics returns a copy of the current metrics
|
||||||
@@ -97,6 +175,28 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
request *http.Request,
|
request *http.Request,
|
||||||
next func(modelID string, w http.ResponseWriter, r *http.Request) error,
|
next func(modelID string, w http.ResponseWriter, r *http.Request) error,
|
||||||
) error {
|
) error {
|
||||||
|
// Capture request body and headers if captures enabled
|
||||||
|
var reqBody []byte
|
||||||
|
var reqHeaders map[string]string
|
||||||
|
if mp.enableCaptures {
|
||||||
|
if request.Body != nil {
|
||||||
|
var err error
|
||||||
|
reqBody, err = io.ReadAll(request.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read request body for capture: %w", err)
|
||||||
|
}
|
||||||
|
request.Body.Close()
|
||||||
|
request.Body = io.NopCloser(bytes.NewBuffer(reqBody))
|
||||||
|
}
|
||||||
|
reqHeaders = make(map[string]string)
|
||||||
|
for key, values := range request.Header {
|
||||||
|
if len(values) > 0 {
|
||||||
|
reqHeaders[key] = values[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
redactHeaders(reqHeaders)
|
||||||
|
}
|
||||||
|
|
||||||
recorder := newBodyCopier(writer)
|
recorder := newBodyCopier(writer)
|
||||||
|
|
||||||
// Filter Accept-Encoding to only include encodings we can decompress for metrics
|
// Filter Accept-Encoding to only include encodings we can decompress for metrics
|
||||||
@@ -140,7 +240,6 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
|
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
|
||||||
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
||||||
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
@@ -153,6 +252,14 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
usage := parsed.Get("usage")
|
usage := parsed.Get("usage")
|
||||||
timings := parsed.Get("timings")
|
timings := parsed.Get("timings")
|
||||||
|
|
||||||
|
// extract timings for infill - response is an array, timings are in the last element
|
||||||
|
// see #463
|
||||||
|
if strings.HasPrefix(request.URL.Path, "/infill") {
|
||||||
|
if arr := parsed.Array(); len(arr) > 0 {
|
||||||
|
timings = arr[len(arr)-1].Get("timings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if usage.Exists() || timings.Exists() {
|
if usage.Exists() || timings.Exists() {
|
||||||
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
||||||
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
@@ -165,7 +272,38 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mp.addMetrics(tm)
|
// Build capture if enabled and determine if it will be stored
|
||||||
|
var capture *ReqRespCapture
|
||||||
|
if mp.enableCaptures {
|
||||||
|
respHeaders := make(map[string]string)
|
||||||
|
for key, values := range recorder.Header() {
|
||||||
|
if len(values) > 0 {
|
||||||
|
respHeaders[key] = values[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
redactHeaders(respHeaders)
|
||||||
|
delete(respHeaders, "Content-Encoding")
|
||||||
|
capture = &ReqRespCapture{
|
||||||
|
ReqPath: request.URL.Path,
|
||||||
|
ReqHeaders: reqHeaders,
|
||||||
|
ReqBody: reqBody,
|
||||||
|
RespHeaders: respHeaders,
|
||||||
|
RespBody: body,
|
||||||
|
}
|
||||||
|
// Only set HasCapture if the capture will actually be stored (not too large)
|
||||||
|
if capture.Size() <= mp.maxCaptureSize {
|
||||||
|
tm.HasCapture = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
metricID := mp.addMetrics(tm)
|
||||||
|
|
||||||
|
// Store capture if enabled
|
||||||
|
if capture != nil {
|
||||||
|
capture.ID = metricID
|
||||||
|
mp.addCapture(*capture)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -336,6 +474,24 @@ func (w *responseBodyCopier) StartTime() time.Time {
|
|||||||
return w.start
|
return w.start
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sensitiveHeaders lists headers that should be redacted in captures
|
||||||
|
var sensitiveHeaders = map[string]bool{
|
||||||
|
"authorization": true,
|
||||||
|
"proxy-authorization": true,
|
||||||
|
"cookie": true,
|
||||||
|
"set-cookie": true,
|
||||||
|
"x-api-key": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// redactHeaders replaces sensitive header values in-place with "[REDACTED]"
|
||||||
|
func redactHeaders(headers map[string]string) {
|
||||||
|
for key := range headers {
|
||||||
|
if sensitiveHeaders[strings.ToLower(key)] {
|
||||||
|
headers[key] = "[REDACTED]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// filterAcceptEncoding filters the Accept-Encoding header to only include
|
// filterAcceptEncoding filters the Accept-Encoding header to only include
|
||||||
// encodings we can decompress (gzip, deflate). This respects the client's
|
// encodings we can decompress (gzip, deflate). This respects the client's
|
||||||
// preferences while ensuring we can parse response bodies for metrics.
|
// preferences while ensuring we can parse response bodies for metrics.
|
||||||
|
|||||||
+323
-29
@@ -18,7 +18,7 @@ import (
|
|||||||
|
|
||||||
func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
||||||
t.Run("adds metrics and assigns ID", func(t *testing.T) {
|
t.Run("adds metrics and assigns ID", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
metric := TokenMetrics{
|
metric := TokenMetrics{
|
||||||
Model: "test-model",
|
Model: "test-model",
|
||||||
@@ -37,7 +37,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("increments ID for each metric", func(t *testing.T) {
|
t.Run("increments ID for each metric", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
mm.addMetrics(TokenMetrics{Model: "model"})
|
mm.addMetrics(TokenMetrics{Model: "model"})
|
||||||
@@ -51,7 +51,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("respects max metrics limit", func(t *testing.T) {
|
t.Run("respects max metrics limit", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 3)
|
mm := newMetricsMonitor(testLogger, 3, 0)
|
||||||
|
|
||||||
// Add 5 metrics
|
// Add 5 metrics
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
@@ -71,7 +71,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("emits TokenMetricsEvent", func(t *testing.T) {
|
t.Run("emits TokenMetricsEvent", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
receivedEvent := make(chan TokenMetricsEvent, 1)
|
receivedEvent := make(chan TokenMetricsEvent, 1)
|
||||||
cancel := event.On(func(e TokenMetricsEvent) {
|
cancel := event.On(func(e TokenMetricsEvent) {
|
||||||
@@ -101,14 +101,14 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
|||||||
|
|
||||||
func TestMetricsMonitor_GetMetrics(t *testing.T) {
|
func TestMetricsMonitor_GetMetrics(t *testing.T) {
|
||||||
t.Run("returns empty slice when no metrics", func(t *testing.T) {
|
t.Run("returns empty slice when no metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
metrics := mm.getMetrics()
|
metrics := mm.getMetrics()
|
||||||
assert.NotNil(t, metrics)
|
assert.NotNil(t, metrics)
|
||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 0, len(metrics))
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("returns copy of metrics", func(t *testing.T) {
|
t.Run("returns copy of metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
mm.addMetrics(TokenMetrics{Model: "model1"})
|
mm.addMetrics(TokenMetrics{Model: "model1"})
|
||||||
mm.addMetrics(TokenMetrics{Model: "model2"})
|
mm.addMetrics(TokenMetrics{Model: "model2"})
|
||||||
|
|
||||||
@@ -128,7 +128,7 @@ func TestMetricsMonitor_GetMetrics(t *testing.T) {
|
|||||||
|
|
||||||
func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
||||||
t.Run("returns valid JSON for empty metrics", func(t *testing.T) {
|
t.Run("returns valid JSON for empty metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
jsonData, err := mm.getMetricsJSON()
|
jsonData, err := mm.getMetricsJSON()
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.NotNil(t, jsonData)
|
assert.NotNil(t, jsonData)
|
||||||
@@ -140,7 +140,7 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("returns valid JSON with metrics", func(t *testing.T) {
|
t.Run("returns valid JSON with metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
mm.addMetrics(TokenMetrics{
|
mm.addMetrics(TokenMetrics{
|
||||||
Model: "model1",
|
Model: "model1",
|
||||||
InputTokens: 100,
|
InputTokens: 100,
|
||||||
@@ -168,7 +168,7 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
|||||||
|
|
||||||
func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
||||||
t.Run("successful non-streaming request with usage data", func(t *testing.T) {
|
t.Run("successful non-streaming request with usage data", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `{
|
responseBody := `{
|
||||||
"usage": {
|
"usage": {
|
||||||
@@ -199,7 +199,7 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("successful request with timings data", func(t *testing.T) {
|
t.Run("successful request with timings data", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `{
|
responseBody := `{
|
||||||
"timings": {
|
"timings": {
|
||||||
@@ -239,7 +239,7 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("streaming request with SSE format", func(t *testing.T) {
|
t.Run("streaming request with SSE format", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
// Note: SSE format requires proper line breaks - each data line followed by blank line
|
// Note: SSE format requires proper line breaks - each data line followed by blank line
|
||||||
responseBody := `data: {"choices":[{"text":"Hello"}]}
|
responseBody := `data: {"choices":[{"text":"Hello"}]}
|
||||||
@@ -275,7 +275,7 @@ data: [DONE]
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("non-OK status code does not record metrics", func(t *testing.T) {
|
t.Run("non-OK status code does not record metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
w.WriteHeader(http.StatusBadRequest)
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
@@ -295,7 +295,7 @@ data: [DONE]
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("empty response body records minimal metrics", func(t *testing.T) {
|
t.Run("empty response body records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
@@ -317,7 +317,7 @@ data: [DONE]
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("invalid JSON records minimal metrics", func(t *testing.T) {
|
t.Run("invalid JSON records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
@@ -341,7 +341,7 @@ data: [DONE]
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("next handler error is propagated", func(t *testing.T) {
|
t.Run("next handler error is propagated", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
expectedErr := assert.AnError
|
expectedErr := assert.AnError
|
||||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
@@ -360,7 +360,7 @@ data: [DONE]
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("response without usage or timings records minimal metrics", func(t *testing.T) {
|
t.Run("response without usage or timings records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `{"result": "ok"}`
|
responseBody := `{"result": "ok"}`
|
||||||
|
|
||||||
@@ -384,6 +384,75 @@ data: [DONE]
|
|||||||
assert.Equal(t, 0, metrics[0].InputTokens)
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
assert.Equal(t, 0, metrics[0].OutputTokens)
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("infill request extracts timings from last array element", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
// Infill response is an array with timings in the last element
|
||||||
|
responseBody := `[
|
||||||
|
{"content": "first chunk"},
|
||||||
|
{"content": "second chunk"},
|
||||||
|
{"content": "final", "timings": {
|
||||||
|
"prompt_n": 150,
|
||||||
|
"predicted_n": 75,
|
||||||
|
"prompt_per_second": 200.5,
|
||||||
|
"predicted_per_second": 35.5,
|
||||||
|
"prompt_ms": 600.0,
|
||||||
|
"predicted_ms": 1800.0,
|
||||||
|
"cache_n": 30
|
||||||
|
}}
|
||||||
|
]`
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/infill", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 150, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 75, metrics[0].OutputTokens)
|
||||||
|
assert.Equal(t, 30, metrics[0].CachedTokens)
|
||||||
|
assert.Equal(t, 200.5, metrics[0].PromptPerSecond)
|
||||||
|
assert.Equal(t, 35.5, metrics[0].TokensPerSecond)
|
||||||
|
assert.Equal(t, 2400, metrics[0].DurationMs) // 600 + 1800
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("infill request with empty array records minimal metrics", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
responseBody := `[]`
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/infill", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
||||||
@@ -437,7 +506,7 @@ func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
|||||||
|
|
||||||
func TestMetricsMonitor_Concurrent(t *testing.T) {
|
func TestMetricsMonitor_Concurrent(t *testing.T) {
|
||||||
t.Run("concurrent addMetrics is safe", func(t *testing.T) {
|
t.Run("concurrent addMetrics is safe", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 1000)
|
mm := newMetricsMonitor(testLogger, 1000, 0)
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
numGoroutines := 10
|
numGoroutines := 10
|
||||||
@@ -464,7 +533,7 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("concurrent reads and writes are safe", func(t *testing.T) {
|
t.Run("concurrent reads and writes are safe", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 100)
|
mm := newMetricsMonitor(testLogger, 100, 0)
|
||||||
|
|
||||||
done := make(chan bool)
|
done := make(chan bool)
|
||||||
|
|
||||||
@@ -502,7 +571,7 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
|
|||||||
|
|
||||||
func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
||||||
t.Run("prefers timings over usage data", func(t *testing.T) {
|
t.Run("prefers timings over usage data", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
// Timings should take precedence over usage
|
// Timings should take precedence over usage
|
||||||
responseBody := `{
|
responseBody := `{
|
||||||
@@ -542,7 +611,7 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("handles missing cache_n in timings", func(t *testing.T) {
|
t.Run("handles missing cache_n in timings", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `{
|
responseBody := `{
|
||||||
"timings": {
|
"timings": {
|
||||||
@@ -577,7 +646,7 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
|||||||
|
|
||||||
func TestMetricsMonitor_StreamingResponse(t *testing.T) {
|
func TestMetricsMonitor_StreamingResponse(t *testing.T) {
|
||||||
t.Run("finds metrics in last valid SSE data", func(t *testing.T) {
|
t.Run("finds metrics in last valid SSE data", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
// Metrics should be found in the last data line before [DONE]
|
// Metrics should be found in the last data line before [DONE]
|
||||||
responseBody := `data: {"choices":[{"text":"First"}]}
|
responseBody := `data: {"choices":[{"text":"First"}]}
|
||||||
@@ -611,7 +680,7 @@ data: [DONE]
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("handles streaming with no valid JSON records minimal metrics", func(t *testing.T) {
|
t.Run("handles streaming with no valid JSON records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `data: not json
|
responseBody := `data: not json
|
||||||
|
|
||||||
@@ -641,7 +710,7 @@ data: [DONE]
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := ``
|
responseBody := ``
|
||||||
|
|
||||||
@@ -669,7 +738,7 @@ data: [DONE]
|
|||||||
|
|
||||||
// Benchmark tests
|
// Benchmark tests
|
||||||
func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
|
func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
|
||||||
mm := newMetricsMonitor(testLogger, 1000)
|
mm := newMetricsMonitor(testLogger, 1000, 0)
|
||||||
|
|
||||||
metric := TokenMetrics{
|
metric := TokenMetrics{
|
||||||
Model: "test-model",
|
Model: "test-model",
|
||||||
@@ -690,7 +759,7 @@ func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
|
|||||||
|
|
||||||
func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
|
func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
|
||||||
// Test performance with a smaller buffer where wrapping occurs more frequently
|
// Test performance with a smaller buffer where wrapping occurs more frequently
|
||||||
mm := newMetricsMonitor(testLogger, 100)
|
mm := newMetricsMonitor(testLogger, 100, 0)
|
||||||
|
|
||||||
metric := TokenMetrics{
|
metric := TokenMetrics{
|
||||||
Model: "test-model",
|
Model: "test-model",
|
||||||
@@ -711,7 +780,7 @@ func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
|
|||||||
|
|
||||||
func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
||||||
t.Run("gzip encoded response", func(t *testing.T) {
|
t.Run("gzip encoded response", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
||||||
|
|
||||||
@@ -745,7 +814,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("deflate encoded response", func(t *testing.T) {
|
t.Run("deflate encoded response", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `{"usage": {"prompt_tokens": 200, "completion_tokens": 75}}`
|
responseBody := `{"usage": {"prompt_tokens": 200, "completion_tokens": 75}}`
|
||||||
|
|
||||||
@@ -779,7 +848,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("invalid gzip data records minimal metrics", func(t *testing.T) {
|
t.Run("invalid gzip data records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
// Invalid compressed data
|
// Invalid compressed data
|
||||||
invalidData := []byte("this is not gzip data")
|
invalidData := []byte("this is not gzip data")
|
||||||
@@ -807,7 +876,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("unknown encoding treated as uncompressed", func(t *testing.T) {
|
t.Run("unknown encoding treated as uncompressed", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
responseBody := `{"usage": {"prompt_tokens": 300, "completion_tokens": 100}}`
|
responseBody := `{"usage": {"prompt_tokens": 300, "completion_tokens": 100}}`
|
||||||
|
|
||||||
@@ -832,3 +901,228 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
|||||||
assert.Equal(t, 100, metrics[0].OutputTokens)
|
assert.Equal(t, 100, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReqRespCapture_Size(t *testing.T) {
|
||||||
|
t.Run("calculates size correctly", func(t *testing.T) {
|
||||||
|
capture := ReqRespCapture{
|
||||||
|
ID: 1,
|
||||||
|
ReqPath: "/v1/chat/completions", // 20 bytes
|
||||||
|
ReqHeaders: map[string]string{
|
||||||
|
"Content-Type": "application/json", // 12 + 16 = 28
|
||||||
|
},
|
||||||
|
ReqBody: []byte("request body"), // 12 bytes
|
||||||
|
RespHeaders: map[string]string{
|
||||||
|
"X-Test": "value", // 6 + 5 = 11
|
||||||
|
},
|
||||||
|
RespBody: []byte("response body"), // 13 bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expected: 20 + 12 + 13 + 28 + 11 = 84
|
||||||
|
assert.Equal(t, 84, capture.Size())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("handles empty capture", func(t *testing.T) {
|
||||||
|
capture := ReqRespCapture{}
|
||||||
|
assert.Equal(t, 0, capture.Size())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsMonitor_AddCapture(t *testing.T) {
|
||||||
|
t.Run("does nothing when captures disabled", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
capture := ReqRespCapture{
|
||||||
|
ID: 0,
|
||||||
|
ReqBody: []byte("test"),
|
||||||
|
}
|
||||||
|
mm.addCapture(capture)
|
||||||
|
|
||||||
|
// Should not store capture
|
||||||
|
assert.Nil(t, mm.getCaptureByID(0))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("adds capture when enabled", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||||
|
|
||||||
|
capture := ReqRespCapture{
|
||||||
|
ID: 0,
|
||||||
|
ReqBody: []byte("test request"),
|
||||||
|
RespBody: []byte("test response"),
|
||||||
|
}
|
||||||
|
mm.addCapture(capture)
|
||||||
|
|
||||||
|
retrieved := mm.getCaptureByID(0)
|
||||||
|
assert.NotNil(t, retrieved)
|
||||||
|
assert.Equal(t, 0, retrieved.ID)
|
||||||
|
assert.Equal(t, []byte("test request"), retrieved.ReqBody)
|
||||||
|
assert.Equal(t, []byte("test response"), retrieved.RespBody)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("evicts oldest when exceeding max size", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||||
|
mm.maxCaptureSize = 100 // Set small limit for test
|
||||||
|
|
||||||
|
// Add captures that will exceed the limit
|
||||||
|
capture1 := ReqRespCapture{ID: 0, ReqBody: make([]byte, 40)}
|
||||||
|
capture2 := ReqRespCapture{ID: 1, ReqBody: make([]byte, 40)}
|
||||||
|
capture3 := ReqRespCapture{ID: 2, ReqBody: make([]byte, 40)}
|
||||||
|
|
||||||
|
mm.addCapture(capture1)
|
||||||
|
mm.addCapture(capture2)
|
||||||
|
// Adding capture3 should evict capture1
|
||||||
|
mm.addCapture(capture3)
|
||||||
|
|
||||||
|
assert.Nil(t, mm.getCaptureByID(0), "capture 0 should be evicted")
|
||||||
|
assert.NotNil(t, mm.getCaptureByID(1), "capture 1 should exist")
|
||||||
|
assert.NotNil(t, mm.getCaptureByID(2), "capture 2 should exist")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("skips capture larger than max size", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||||
|
mm.maxCaptureSize = 100
|
||||||
|
|
||||||
|
// Add a capture larger than max
|
||||||
|
largeCapture := ReqRespCapture{ID: 0, ReqBody: make([]byte, 200)}
|
||||||
|
mm.addCapture(largeCapture)
|
||||||
|
|
||||||
|
assert.Nil(t, mm.getCaptureByID(0), "oversized capture should not be stored")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsMonitor_GetCaptureByID(t *testing.T) {
|
||||||
|
t.Run("returns nil for non-existent ID", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||||
|
|
||||||
|
assert.Nil(t, mm.getCaptureByID(999))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("returns capture by ID", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||||
|
|
||||||
|
capture := ReqRespCapture{
|
||||||
|
ID: 42,
|
||||||
|
ReqBody: []byte("test"),
|
||||||
|
}
|
||||||
|
mm.addCapture(capture)
|
||||||
|
|
||||||
|
retrieved := mm.getCaptureByID(42)
|
||||||
|
assert.NotNil(t, retrieved)
|
||||||
|
assert.Equal(t, 42, retrieved.ID)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRedactHeaders(t *testing.T) {
|
||||||
|
t.Run("redacts sensitive headers", func(t *testing.T) {
|
||||||
|
headers := map[string]string{
|
||||||
|
"Authorization": "Bearer secret-token",
|
||||||
|
"Proxy-Authorization": "Basic creds",
|
||||||
|
"Cookie": "session=abc123",
|
||||||
|
"Set-Cookie": "session=xyz789",
|
||||||
|
"X-Api-Key": "sk-12345",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-Custom": "safe-value",
|
||||||
|
}
|
||||||
|
|
||||||
|
redactHeaders(headers)
|
||||||
|
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["Authorization"])
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["Proxy-Authorization"])
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["Cookie"])
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["Set-Cookie"])
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["X-Api-Key"])
|
||||||
|
assert.Equal(t, "application/json", headers["Content-Type"])
|
||||||
|
assert.Equal(t, "safe-value", headers["X-Custom"])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("handles mixed case header names", func(t *testing.T) {
|
||||||
|
headers := map[string]string{
|
||||||
|
"authorization": "Bearer token",
|
||||||
|
"COOKIE": "session=abc",
|
||||||
|
"x-api-key": "key123",
|
||||||
|
}
|
||||||
|
|
||||||
|
redactHeaders(headers)
|
||||||
|
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["authorization"])
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["COOKIE"])
|
||||||
|
assert.Equal(t, "[REDACTED]", headers["x-api-key"])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("handles empty headers", func(t *testing.T) {
|
||||||
|
headers := map[string]string{}
|
||||||
|
redactHeaders(headers)
|
||||||
|
assert.Empty(t, headers)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsMonitor_WrapHandler_Capture(t *testing.T) {
|
||||||
|
t.Run("captures request and response when enabled", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 5)
|
||||||
|
|
||||||
|
requestBody := `{"model": "test", "prompt": "hello"}`
|
||||||
|
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("X-Custom", "header-value")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", "Bearer secret")
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Check metric was recorded
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
metricID := metrics[0].ID
|
||||||
|
|
||||||
|
// Check capture was stored with same ID
|
||||||
|
capture := mm.getCaptureByID(metricID)
|
||||||
|
assert.NotNil(t, capture)
|
||||||
|
assert.Equal(t, metricID, capture.ID)
|
||||||
|
assert.Equal(t, []byte(requestBody), capture.ReqBody)
|
||||||
|
assert.Equal(t, []byte(responseBody), capture.RespBody)
|
||||||
|
assert.Equal(t, "/test", capture.ReqPath)
|
||||||
|
assert.Equal(t, "application/json", capture.ReqHeaders["Content-Type"])
|
||||||
|
assert.Equal(t, "[REDACTED]", capture.ReqHeaders["Authorization"])
|
||||||
|
assert.Equal(t, "application/json", capture.RespHeaders["Content-Type"])
|
||||||
|
assert.Equal(t, "header-value", capture.RespHeaders["X-Custom"])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("does not capture when disabled", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10, 0)
|
||||||
|
|
||||||
|
requestBody := `{"model": "test"}`
|
||||||
|
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Metrics should still be recorded
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
|
||||||
|
// But no capture
|
||||||
|
capture := mm.getCaptureByID(metrics[0].ID)
|
||||||
|
assert.Nil(t, capture)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ func New(proxyConfig config.Config) *ProxyManager {
|
|||||||
muxLogger: muxLogger,
|
muxLogger: muxLogger,
|
||||||
upstreamLogger: upstreamLogger,
|
upstreamLogger: upstreamLogger,
|
||||||
|
|
||||||
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
|
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics, proxyConfig.CaptureBuffer),
|
||||||
|
|
||||||
processGroups: make(map[string]*ProcessGroup),
|
processGroups: make(map[string]*ProcessGroup),
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
@@ -31,6 +32,7 @@ func addApiHandlers(pm *ProxyManager) {
|
|||||||
apiGroup.GET("/events", pm.apiSendEvents)
|
apiGroup.GET("/events", pm.apiSendEvents)
|
||||||
apiGroup.GET("/metrics", pm.apiGetMetrics)
|
apiGroup.GET("/metrics", pm.apiGetMetrics)
|
||||||
apiGroup.GET("/version", pm.apiGetVersion)
|
apiGroup.GET("/version", pm.apiGetVersion)
|
||||||
|
apiGroup.GET("/captures/:id", pm.apiGetCapture)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -250,3 +252,20 @@ func (pm *ProxyManager) apiGetVersion(c *gin.Context) {
|
|||||||
"build_date": pm.buildDate,
|
"build_date": pm.buildDate,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
|
||||||
|
idStr := c.Param("id")
|
||||||
|
id, err := strconv.Atoi(idStr)
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid capture ID"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
capture := pm.metricsMonitor.getCaptureByID(id)
|
||||||
|
if capture == nil {
|
||||||
|
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(http.StatusOK, capture)
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,23 +6,28 @@
|
|||||||
import Models from "./routes/Models.svelte";
|
import Models from "./routes/Models.svelte";
|
||||||
import Activity from "./routes/Activity.svelte";
|
import Activity from "./routes/Activity.svelte";
|
||||||
import Playground from "./routes/Playground.svelte";
|
import Playground from "./routes/Playground.svelte";
|
||||||
|
import PlaygroundStub from "./routes/PlaygroundStub.svelte";
|
||||||
import { enableAPIEvents } from "./stores/api";
|
import { enableAPIEvents } from "./stores/api";
|
||||||
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
||||||
|
import { currentRoute } from "./stores/route";
|
||||||
|
|
||||||
const routes = {
|
const routes = {
|
||||||
"/": Playground,
|
"/": PlaygroundStub,
|
||||||
"/models": Models,
|
"/models": Models,
|
||||||
"/logs": LogViewer,
|
"/logs": LogViewer,
|
||||||
"/activity": Activity,
|
"/activity": Activity,
|
||||||
"*": Playground,
|
"*": PlaygroundStub,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Sync theme to document attribute
|
function handleRouteLoaded(event: { detail: { route: string | RegExp } }) {
|
||||||
|
const route = event.detail.route;
|
||||||
|
currentRoute.set(typeof route === "string" ? route : "/");
|
||||||
|
}
|
||||||
|
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
|
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
|
||||||
});
|
});
|
||||||
|
|
||||||
// Sync title to document
|
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}";
|
const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}";
|
||||||
document.title = `${icon} ${$appTitle}`;
|
document.title = `${icon} ${$appTitle}`;
|
||||||
@@ -43,6 +48,11 @@
|
|||||||
<Header />
|
<Header />
|
||||||
|
|
||||||
<main class="flex-1 overflow-auto p-4">
|
<main class="flex-1 overflow-auto p-4">
|
||||||
<Router {routes} />
|
<div class="h-full" class:hidden={$currentRoute !== "/"}>
|
||||||
|
<Playground />
|
||||||
|
</div>
|
||||||
|
<div class="h-full" class:hidden={$currentRoute === "/"}>
|
||||||
|
<Router {routes} on:routeLoaded={handleRouteLoaded} />
|
||||||
|
</div>
|
||||||
</main>
|
</main>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -0,0 +1,452 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import type { ReqRespCapture } from "../lib/types";
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
capture: ReqRespCapture | null;
|
||||||
|
open: boolean;
|
||||||
|
onclose: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { capture, open, onclose }: Props = $props();
|
||||||
|
|
||||||
|
let dialogEl: HTMLDialogElement | undefined = $state();
|
||||||
|
|
||||||
|
type BodyTab = "raw" | "pretty" | "chat";
|
||||||
|
let reqBodyTab: BodyTab = $state("pretty");
|
||||||
|
let respBodyTab: BodyTab = $state("pretty");
|
||||||
|
let copiedReq = $state(false);
|
||||||
|
let copiedResp = $state(false);
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
if (open && dialogEl) {
|
||||||
|
dialogEl.showModal();
|
||||||
|
} else if (!open && dialogEl) {
|
||||||
|
dialogEl.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Reset tabs when capture changes
|
||||||
|
$effect(() => {
|
||||||
|
if (capture) {
|
||||||
|
const reqCt = getContentType(capture.req_headers);
|
||||||
|
const respCt = getContentType(capture.resp_headers);
|
||||||
|
reqBodyTab = reqCt.includes("json") ? "pretty" : "raw";
|
||||||
|
respBodyTab = respCt.includes("text/event-stream")
|
||||||
|
? "chat"
|
||||||
|
: respCt.includes("json")
|
||||||
|
? "pretty"
|
||||||
|
: "raw";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function handleDialogClose() {
|
||||||
|
onclose();
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeBody(body: string | null | undefined): string {
|
||||||
|
if (!body) return "";
|
||||||
|
try {
|
||||||
|
const binary = atob(body);
|
||||||
|
const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
|
||||||
|
return new TextDecoder().decode(bytes);
|
||||||
|
} catch {
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatJson(str: string): string {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(str);
|
||||||
|
return JSON.stringify(parsed, null, 2);
|
||||||
|
} catch {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getContentType(
|
||||||
|
headers: Record<string, string> | null | undefined,
|
||||||
|
): string {
|
||||||
|
if (!headers) return "";
|
||||||
|
const ct = headers["Content-Type"] || headers["content-type"] || "";
|
||||||
|
return ct.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function isImageContentType(contentType: string): boolean {
|
||||||
|
return contentType.startsWith("image/");
|
||||||
|
}
|
||||||
|
|
||||||
|
function isTextContentType(contentType: string): boolean {
|
||||||
|
return (
|
||||||
|
contentType.startsWith("text/") ||
|
||||||
|
contentType.includes("application/json") ||
|
||||||
|
contentType.includes("application/xml") ||
|
||||||
|
contentType.includes("application/javascript")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getImageDataUrl(body: string, contentType: string): string {
|
||||||
|
const mimeType = contentType.split(";")[0].trim();
|
||||||
|
return `data:${mimeType};base64,${body}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SSEChat {
|
||||||
|
reasoning: string;
|
||||||
|
content: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseSSEChat(text: string): SSEChat {
|
||||||
|
const result: SSEChat = { reasoning: "", content: "" };
|
||||||
|
for (const line of text.split("\n")) {
|
||||||
|
const trimmed = line.trim();
|
||||||
|
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
||||||
|
const data = trimmed.slice(6);
|
||||||
|
if (data === "[DONE]") continue;
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(data);
|
||||||
|
const delta = parsed.choices?.[0]?.delta;
|
||||||
|
if (delta?.content) result.content += delta.content;
|
||||||
|
if (delta?.reasoning_content) result.reasoning += delta.reasoning_content;
|
||||||
|
} catch {
|
||||||
|
// skip unparseable lines
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function copyToClipboard(text: string, type: "req" | "resp") {
|
||||||
|
try {
|
||||||
|
await navigator.clipboard.writeText(text);
|
||||||
|
if (type === "req") {
|
||||||
|
copiedReq = true;
|
||||||
|
setTimeout(() => (copiedReq = false), 1500);
|
||||||
|
} else {
|
||||||
|
copiedResp = true;
|
||||||
|
setTimeout(() => (copiedResp = false), 1500);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCopyText(): string {
|
||||||
|
if (respBodyTab === "chat") {
|
||||||
|
let text = "";
|
||||||
|
if (sseChat.reasoning) text += sseChat.reasoning + "\n\n";
|
||||||
|
text += sseChat.content;
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
return displayedResponseBody;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request body derivations
|
||||||
|
let requestContentType = $derived(
|
||||||
|
capture ? getContentType(capture.req_headers) : "",
|
||||||
|
);
|
||||||
|
let isRequestJson = $derived(requestContentType.includes("json"));
|
||||||
|
|
||||||
|
let requestBodyRaw = $derived.by(() => {
|
||||||
|
if (!capture) return "";
|
||||||
|
return decodeBody(capture.req_body);
|
||||||
|
});
|
||||||
|
|
||||||
|
let requestBodyPretty = $derived.by(() => {
|
||||||
|
if (!isRequestJson) return requestBodyRaw;
|
||||||
|
return formatJson(requestBodyRaw);
|
||||||
|
});
|
||||||
|
|
||||||
|
let displayedRequestBody = $derived(
|
||||||
|
reqBodyTab === "pretty" ? requestBodyPretty : requestBodyRaw,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Response body derivations
|
||||||
|
let responseContentType = $derived(
|
||||||
|
capture ? getContentType(capture.resp_headers) : "",
|
||||||
|
);
|
||||||
|
let isResponseImage = $derived(isImageContentType(responseContentType));
|
||||||
|
let isResponseText = $derived(isTextContentType(responseContentType));
|
||||||
|
let isResponseJson = $derived(responseContentType.includes("json"));
|
||||||
|
let isSSE = $derived(responseContentType.includes("text/event-stream"));
|
||||||
|
|
||||||
|
let responseBodyRaw = $derived.by(() => {
|
||||||
|
if (!capture) return "";
|
||||||
|
return decodeBody(capture.resp_body);
|
||||||
|
});
|
||||||
|
|
||||||
|
let responseBodyPretty = $derived.by(() => {
|
||||||
|
if (!isResponseJson) return responseBodyRaw;
|
||||||
|
return formatJson(responseBodyRaw);
|
||||||
|
});
|
||||||
|
|
||||||
|
let sseChat = $derived.by(() => {
|
||||||
|
if (!isSSE || !responseBodyRaw)
|
||||||
|
return { reasoning: "", content: "" } as SSEChat;
|
||||||
|
return parseSSEChat(responseBodyRaw);
|
||||||
|
});
|
||||||
|
|
||||||
|
let displayedResponseBody = $derived.by(() => {
|
||||||
|
if (respBodyTab === "pretty") return responseBodyPretty;
|
||||||
|
return responseBodyRaw;
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<dialog
|
||||||
|
bind:this={dialogEl}
|
||||||
|
onclose={handleDialogClose}
|
||||||
|
class="bg-surface text-txtmain rounded-lg shadow-xl max-w-4xl w-full max-h-[90vh] p-0 backdrop:bg-black/50 m-auto"
|
||||||
|
>
|
||||||
|
{#if capture}
|
||||||
|
<div class="flex flex-col max-h-[90vh]">
|
||||||
|
<div
|
||||||
|
class="flex justify-between items-center p-4 border-b border-card-border"
|
||||||
|
>
|
||||||
|
<h2 class="text-xl font-bold pb-0">Capture #{capture.id + 1}{#if capture.req_path} <span class="text-base font-mono font-normal text-txtsecondary">{capture.req_path}</span>{/if}</h2>
|
||||||
|
<button
|
||||||
|
onclick={() => dialogEl?.close()}
|
||||||
|
class="text-txtsecondary hover:text-txtmain text-2xl leading-none"
|
||||||
|
>
|
||||||
|
×
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="overflow-y-auto flex-1 p-4 space-y-4">
|
||||||
|
<!-- Request Headers -->
|
||||||
|
<details class="group" open>
|
||||||
|
<summary
|
||||||
|
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||||
|
>
|
||||||
|
Request Headers
|
||||||
|
</summary>
|
||||||
|
<div
|
||||||
|
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
|
||||||
|
>
|
||||||
|
<table class="w-full text-sm">
|
||||||
|
<tbody>
|
||||||
|
{#each Object.entries(capture.req_headers || {}) as [key, value]}
|
||||||
|
<tr class="border-b border-card-border-inner last:border-0">
|
||||||
|
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
|
||||||
|
>{key}</td
|
||||||
|
>
|
||||||
|
<td class="px-3 py-1 font-mono break-all">{value}</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<!-- Request Body -->
|
||||||
|
<details class="group" open>
|
||||||
|
<summary
|
||||||
|
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||||
|
>
|
||||||
|
Request Body
|
||||||
|
</summary>
|
||||||
|
{#if requestBodyRaw}
|
||||||
|
<div class="mt-2 flex items-center justify-between">
|
||||||
|
<div class="flex gap-1">
|
||||||
|
{#if isRequestJson}
|
||||||
|
<button
|
||||||
|
class="tab-btn"
|
||||||
|
class:tab-btn-active={reqBodyTab === "pretty"}
|
||||||
|
onclick={() => (reqBodyTab = "pretty")}>Pretty</button
|
||||||
|
>
|
||||||
|
<button
|
||||||
|
class="tab-btn"
|
||||||
|
class:tab-btn-active={reqBodyTab === "raw"}
|
||||||
|
onclick={() => (reqBodyTab = "raw")}>Raw</button
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
class="tab-btn"
|
||||||
|
onclick={() =>
|
||||||
|
copyToClipboard(displayedRequestBody, "req")}
|
||||||
|
>
|
||||||
|
{#if copiedReq}
|
||||||
|
Copied!
|
||||||
|
{:else}
|
||||||
|
Copy
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||||
|
>
|
||||||
|
<pre
|
||||||
|
class="p-3 text-sm font-mono whitespace-pre-wrap break-all">{displayedRequestBody}</pre>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div
|
||||||
|
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||||
|
>
|
||||||
|
<pre class="p-3 text-sm font-mono whitespace-pre-wrap break-all"
|
||||||
|
>(empty)</pre
|
||||||
|
>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<!-- Response Headers -->
|
||||||
|
<details class="group" open>
|
||||||
|
<summary
|
||||||
|
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||||
|
>
|
||||||
|
Response Headers
|
||||||
|
</summary>
|
||||||
|
<div
|
||||||
|
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
|
||||||
|
>
|
||||||
|
<table class="w-full text-sm">
|
||||||
|
<tbody>
|
||||||
|
{#each Object.entries(capture.resp_headers || {}) as [key, value]}
|
||||||
|
<tr class="border-b border-card-border-inner last:border-0">
|
||||||
|
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
|
||||||
|
>{key}</td
|
||||||
|
>
|
||||||
|
<td class="px-3 py-1 font-mono break-all">{value}</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<!-- Response Body -->
|
||||||
|
<details class="group" open>
|
||||||
|
<summary
|
||||||
|
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
|
||||||
|
>
|
||||||
|
Response Body
|
||||||
|
</summary>
|
||||||
|
{#if isResponseImage && capture.resp_body}
|
||||||
|
<div
|
||||||
|
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||||
|
>
|
||||||
|
<div class="p-3 flex justify-center">
|
||||||
|
<img
|
||||||
|
src={getImageDataUrl(capture.resp_body, responseContentType)}
|
||||||
|
alt="Response"
|
||||||
|
class="max-w-full h-auto"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{:else if isSSE || isResponseText}
|
||||||
|
<div class="mt-2 flex items-center justify-between">
|
||||||
|
<div class="flex gap-1">
|
||||||
|
{#if isSSE}
|
||||||
|
<button
|
||||||
|
class="tab-btn"
|
||||||
|
class:tab-btn-active={respBodyTab === "chat"}
|
||||||
|
onclick={() => (respBodyTab = "chat")}>Chat</button
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
{#if isResponseJson}
|
||||||
|
<button
|
||||||
|
class="tab-btn"
|
||||||
|
class:tab-btn-active={respBodyTab === "pretty"}
|
||||||
|
onclick={() => (respBodyTab = "pretty")}>Pretty</button
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
{#if isSSE || isResponseJson}
|
||||||
|
<button
|
||||||
|
class="tab-btn"
|
||||||
|
class:tab-btn-active={respBodyTab === "raw"}
|
||||||
|
onclick={() => (respBodyTab = "raw")}>Raw</button
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
class="tab-btn"
|
||||||
|
onclick={() => copyToClipboard(getCopyText(), "resp")}
|
||||||
|
>
|
||||||
|
{#if copiedResp}
|
||||||
|
Copied!
|
||||||
|
{:else}
|
||||||
|
Copy
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||||
|
>
|
||||||
|
{#if respBodyTab === "chat"}
|
||||||
|
<div class="p-3 text-sm space-y-3">
|
||||||
|
{#if sseChat.reasoning}
|
||||||
|
<div>
|
||||||
|
<div
|
||||||
|
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
|
||||||
|
>
|
||||||
|
Reasoning
|
||||||
|
</div>
|
||||||
|
<pre
|
||||||
|
class="font-mono whitespace-pre-wrap break-all text-txtsecondary">{sseChat.reasoning}</pre>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{#if sseChat.content}
|
||||||
|
<div>
|
||||||
|
{#if sseChat.reasoning}
|
||||||
|
<div
|
||||||
|
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
|
||||||
|
>
|
||||||
|
Response
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
<pre
|
||||||
|
class="font-mono whitespace-pre-wrap break-all">{sseChat.content}</pre>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{#if !sseChat.reasoning && !sseChat.content}
|
||||||
|
<pre class="font-mono">(empty)</pre>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<pre
|
||||||
|
class="p-3 text-sm font-mono whitespace-pre-wrap break-all">{displayedResponseBody || "(empty)"}</pre>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{:else if responseBodyRaw}
|
||||||
|
<div
|
||||||
|
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||||
|
>
|
||||||
|
<div class="p-3 text-sm text-txtsecondary italic">
|
||||||
|
(binary data - {responseContentType || "unknown content type"})
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div
|
||||||
|
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
|
||||||
|
>
|
||||||
|
<pre class="p-3 text-sm font-mono">(empty)</pre>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</details>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="p-4 border-t border-card-border flex justify-end">
|
||||||
|
<button onclick={() => dialogEl?.close()} class="btn"> Close </button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.tab-btn {
|
||||||
|
padding: 2px 10px;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
color: var(--color-txtsecondary);
|
||||||
|
cursor: pointer;
|
||||||
|
border: 1px solid transparent;
|
||||||
|
background: transparent;
|
||||||
|
transition: all 0.15s;
|
||||||
|
}
|
||||||
|
.tab-btn:hover {
|
||||||
|
color: var(--color-txtmain);
|
||||||
|
background: var(--color-secondary);
|
||||||
|
}
|
||||||
|
.tab-btn-active {
|
||||||
|
color: var(--color-primary);
|
||||||
|
background: color-mix(in srgb, var(--color-primary) 12%, transparent);
|
||||||
|
border-color: color-mix(in srgb, var(--color-primary) 25%, transparent);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { link, location } from "svelte-spa-router";
|
import { link } from "svelte-spa-router";
|
||||||
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
|
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
|
||||||
|
import { currentRoute } from "../stores/route";
|
||||||
|
import { playgroundActivity } from "../stores/playgroundActivity";
|
||||||
import ConnectionStatus from "./ConnectionStatus.svelte";
|
import ConnectionStatus from "./ConnectionStatus.svelte";
|
||||||
|
|
||||||
function handleTitleChange(newTitle: string): void {
|
function handleTitleChange(newTitle: string): void {
|
||||||
@@ -22,9 +24,10 @@
|
|||||||
handleTitleChange(target.textContent || "(set title)");
|
handleTitleChange(target.textContent || "(set title)");
|
||||||
}
|
}
|
||||||
|
|
||||||
function isActive(path: string, currentLocation: string): boolean {
|
function isActive(path: string, current: string): boolean {
|
||||||
return path === "/" ? currentLocation === "/" : currentLocation.startsWith(path);
|
return path === "/" ? current === "/" : current.startsWith(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<header
|
<header
|
||||||
@@ -47,8 +50,7 @@
|
|||||||
<a
|
<a
|
||||||
href="/"
|
href="/"
|
||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="p-1 whitespace-nowrap {isActive('/', $currentRoute) ? 'font-semibold' : ''} {$playgroundActivity ? 'activity-link' : 'text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100'}"
|
||||||
class:font-semibold={isActive("/", $location)}
|
|
||||||
>
|
>
|
||||||
Playground
|
Playground
|
||||||
</a>
|
</a>
|
||||||
@@ -56,7 +58,7 @@
|
|||||||
href="/models"
|
href="/models"
|
||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/models", $location)}
|
class:font-semibold={isActive("/models", $currentRoute)}
|
||||||
>
|
>
|
||||||
Models
|
Models
|
||||||
</a>
|
</a>
|
||||||
@@ -64,7 +66,7 @@
|
|||||||
href="/activity"
|
href="/activity"
|
||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/activity", $location)}
|
class:font-semibold={isActive("/activity", $currentRoute)}
|
||||||
>
|
>
|
||||||
Activity
|
Activity
|
||||||
</a>
|
</a>
|
||||||
@@ -72,7 +74,7 @@
|
|||||||
href="/logs"
|
href="/logs"
|
||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/logs", $location)}
|
class:font-semibold={isActive("/logs", $currentRoute)}
|
||||||
>
|
>
|
||||||
Logs
|
Logs
|
||||||
</a>
|
</a>
|
||||||
@@ -96,3 +98,23 @@
|
|||||||
<ConnectionStatus />
|
<ConnectionStatus />
|
||||||
</menu>
|
</menu>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.activity-link {
|
||||||
|
background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7, #8b5cf6, #6366f1);
|
||||||
|
background-size: 200% 100%;
|
||||||
|
-webkit-background-clip: text;
|
||||||
|
background-clip: text;
|
||||||
|
-webkit-text-fill-color: transparent;
|
||||||
|
animation: gradient-shift 2s linear infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes gradient-shift {
|
||||||
|
0% {
|
||||||
|
background-position: 0% 50%;
|
||||||
|
}
|
||||||
|
100% {
|
||||||
|
background-position: 200% 50%;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { transcribeAudio } from "../../lib/audioApi";
|
import { transcribeAudio } from "../../lib/audioApi";
|
||||||
|
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
|
|
||||||
const selectedModelStore = persistentStore<string>("playground-audio-model", "");
|
const selectedModelStore = persistentStore<string>("playground-audio-model", "");
|
||||||
@@ -22,6 +23,10 @@
|
|||||||
|
|
||||||
let canTranscribe = $derived(selectedFile !== null && $selectedModelStore !== "" && !isTranscribing);
|
let canTranscribe = $derived(selectedFile !== null && $selectedModelStore !== "" && !isTranscribing);
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
playgroundStores.audioTranscribing.set(isTranscribing);
|
||||||
|
});
|
||||||
|
|
||||||
function validateFile(file: File): { valid: boolean; error?: string } {
|
function validateFile(file: File): { valid: boolean; error?: string } {
|
||||||
const ext = '.' + file.name.split('.').pop()?.toLowerCase();
|
const ext = '.' + file.name.split('.').pop()?.toLowerCase();
|
||||||
|
|
||||||
@@ -141,9 +146,6 @@
|
|||||||
<!-- Model selector -->
|
<!-- Model selector -->
|
||||||
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
<div class="shrink-0 flex flex-wrap gap-2 mb-4">
|
||||||
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an audio model..." disabled={isTranscribing} />
|
<ModelSelector bind:value={$selectedModelStore} placeholder="Select an audio model..." disabled={isTranscribing} />
|
||||||
<button class="btn" onclick={clearAll} disabled={!selectedFile && !transcriptionResult && !error}>
|
|
||||||
Clear
|
|
||||||
</button>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Empty state for no models configured -->
|
<!-- Empty state for no models configured -->
|
||||||
@@ -241,6 +243,13 @@
|
|||||||
>
|
>
|
||||||
Transcribe
|
Transcribe
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
class="btn"
|
||||||
|
onclick={clearAll}
|
||||||
|
disabled={!selectedFile && !transcriptionResult && !error}
|
||||||
|
>
|
||||||
|
Clear
|
||||||
|
</button>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { streamChatCompletion } from "../../lib/chatApi";
|
import { streamChatCompletion } from "../../lib/chatApi";
|
||||||
|
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||||
import type { ChatMessage, ContentPart } from "../../lib/types";
|
import type { ChatMessage, ContentPart } from "../../lib/types";
|
||||||
import ChatMessageComponent from "./ChatMessage.svelte";
|
import ChatMessageComponent from "./ChatMessage.svelte";
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
@@ -11,7 +12,16 @@
|
|||||||
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
||||||
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
||||||
|
|
||||||
let messages = $state<ChatMessage[]>([]);
|
function loadMessages(): ChatMessage[] {
|
||||||
|
try {
|
||||||
|
const saved = localStorage.getItem("playground-messages");
|
||||||
|
return saved ? JSON.parse(saved) : [];
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let messages = $state<ChatMessage[]>(loadMessages());
|
||||||
let userInput = $state("");
|
let userInput = $state("");
|
||||||
let isStreaming = $state(false);
|
let isStreaming = $state(false);
|
||||||
let isReasoning = $state(false);
|
let isReasoning = $state(false);
|
||||||
@@ -24,21 +34,52 @@
|
|||||||
let imageError = $state<string | null>(null);
|
let imageError = $state<string | null>(null);
|
||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
|
let userScrolledUp = $state(false);
|
||||||
|
|
||||||
// Auto-scroll when messages change
|
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
if (messages.length > 0 && messagesContainer) {
|
playgroundStores.chatStreaming.set(isStreaming);
|
||||||
|
});
|
||||||
|
|
||||||
|
function handleMessagesScroll() {
|
||||||
|
if (!messagesContainer) return;
|
||||||
|
const { scrollTop, scrollHeight, clientHeight } = messagesContainer;
|
||||||
|
// Consider "at bottom" if within 40px of the bottom
|
||||||
|
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-scroll when messages change — skip if user scrolled up
|
||||||
|
$effect(() => {
|
||||||
|
if (messages.length > 0 && messagesContainer && !userScrolledUp) {
|
||||||
messagesContainer.scrollTo({
|
messagesContainer.scrollTo({
|
||||||
top: messagesContainer.scrollHeight,
|
top: messagesContainer.scrollHeight,
|
||||||
behavior: "smooth",
|
behavior: isStreaming ? "instant" : "smooth",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Persist messages to localStorage (throttled to once per 2s)
|
||||||
|
let lastSaveTime = 0;
|
||||||
|
$effect(() => {
|
||||||
|
const json = JSON.stringify(messages);
|
||||||
|
const elapsed = Date.now() - lastSaveTime;
|
||||||
|
const save = () => {
|
||||||
|
try { localStorage.setItem("playground-messages", json); } catch {}
|
||||||
|
lastSaveTime = Date.now();
|
||||||
|
};
|
||||||
|
if (elapsed >= 2000) {
|
||||||
|
save();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const timer = setTimeout(save, 2000 - elapsed);
|
||||||
|
return () => clearTimeout(timer);
|
||||||
|
});
|
||||||
|
|
||||||
async function sendMessage() {
|
async function sendMessage() {
|
||||||
const trimmedInput = userInput.trim();
|
const trimmedInput = userInput.trim();
|
||||||
if ((!trimmedInput && attachedImages.length === 0) || !$selectedModelStore || isStreaming) return;
|
if ((!trimmedInput && attachedImages.length === 0) || !$selectedModelStore || isStreaming) return;
|
||||||
|
|
||||||
|
userScrolledUp = false;
|
||||||
|
|
||||||
// Build message content (multimodal if images attached)
|
// Build message content (multimodal if images attached)
|
||||||
let content: string | ContentPart[];
|
let content: string | ContentPart[];
|
||||||
if (attachedImages.length > 0) {
|
if (attachedImages.length > 0) {
|
||||||
@@ -321,6 +362,7 @@
|
|||||||
<div
|
<div
|
||||||
class="flex-1 overflow-y-auto mb-4 px-2"
|
class="flex-1 overflow-y-auto mb-4 px-2"
|
||||||
bind:this={messagesContainer}
|
bind:this={messagesContainer}
|
||||||
|
onscroll={handleMessagesScroll}
|
||||||
>
|
>
|
||||||
{#if messages.length === 0}
|
{#if messages.length === 0}
|
||||||
<div class="h-full flex items-center justify-center text-txtsecondary">
|
<div class="h-full flex items-center justify-center text-txtsecondary">
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { renderMarkdown, escapeHtml } from "../../lib/markdown";
|
import { renderMarkdown, escapeHtml, renderStreamingMarkdown, createStreamingCache } from "../../lib/markdown";
|
||||||
|
import type { RenderedBlock } from "../../lib/markdown";
|
||||||
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte";
|
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte";
|
||||||
import { getTextContent, getImageUrls } from "../../lib/types";
|
import { getTextContent, getImageUrls } from "../../lib/types";
|
||||||
import type { ContentPart } from "../../lib/types";
|
import type { ContentPart } from "../../lib/types";
|
||||||
@@ -22,11 +23,17 @@
|
|||||||
let hasImages = $derived(imageUrls.length > 0);
|
let hasImages = $derived(imageUrls.length > 0);
|
||||||
let canEdit = $derived(onEdit !== undefined && !hasImages);
|
let canEdit = $derived(onEdit !== undefined && !hasImages);
|
||||||
|
|
||||||
let renderedContent = $derived(
|
let streamingCache = createStreamingCache();
|
||||||
role === "assistant" && !isStreaming
|
let renderedParts = $derived.by(() => {
|
||||||
? renderMarkdown(textContent)
|
if (role !== "assistant") {
|
||||||
: escapeHtml(textContent).replace(/\n/g, '<br>')
|
return { blocks: [{ id: -1, html: escapeHtml(textContent).replace(/\n/g, '<br>') }] as RenderedBlock[], pendingHtml: "" };
|
||||||
);
|
}
|
||||||
|
if (!isStreaming) {
|
||||||
|
streamingCache = createStreamingCache();
|
||||||
|
return { blocks: [{ id: -1, html: renderMarkdown(textContent) }] as RenderedBlock[], pendingHtml: "" };
|
||||||
|
}
|
||||||
|
return renderStreamingMarkdown(textContent, streamingCache);
|
||||||
|
});
|
||||||
let copied = $state(false);
|
let copied = $state(false);
|
||||||
let showRaw = $state(false);
|
let showRaw = $state(false);
|
||||||
let isEditing = $state(false);
|
let isEditing = $state(false);
|
||||||
@@ -113,9 +120,9 @@
|
|||||||
|
|
||||||
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
|
||||||
<div
|
<div
|
||||||
class="relative group max-w-[85%] rounded-lg px-4 py-2 {role === 'user'
|
class="relative group rounded-lg px-4 py-2 {role === 'user'
|
||||||
? 'bg-primary text-btn-primary-text'
|
? 'max-w-[85%] bg-primary text-btn-primary-text'
|
||||||
: 'bg-surface border border-gray-200 dark:border-white/10'}"
|
: 'w-full sm:w-4/5 bg-surface border border-gray-200 dark:border-white/10'}"
|
||||||
>
|
>
|
||||||
{#if role === "assistant"}
|
{#if role === "assistant"}
|
||||||
{#if reasoning_content || isReasoning}
|
{#if reasoning_content || isReasoning}
|
||||||
@@ -168,7 +175,10 @@
|
|||||||
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
|
||||||
{:else}
|
{:else}
|
||||||
<div class="prose prose-sm dark:prose-invert max-w-none">
|
<div class="prose prose-sm dark:prose-invert max-w-none">
|
||||||
{@html renderedContent}
|
{#each renderedParts.blocks as block (block.id)}
|
||||||
|
{@html block.html}
|
||||||
|
{/each}
|
||||||
|
{@html renderedParts.pendingHtml}
|
||||||
{#if isStreaming && !isReasoning}
|
{#if isStreaming && !isReasoning}
|
||||||
<span class="inline-block w-2 h-4 bg-current animate-pulse ml-0.5"></span>
|
<span class="inline-block w-2 h-4 bg-current animate-pulse ml-0.5"></span>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
|
import { untrack } from "svelte";
|
||||||
import { Maximize2, X } from "lucide-svelte";
|
import { Maximize2, X } from "lucide-svelte";
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
@@ -45,7 +46,8 @@
|
|||||||
$effect(() => {
|
$effect(() => {
|
||||||
if (isExpanded && expandedTextarea) {
|
if (isExpanded && expandedTextarea) {
|
||||||
expandedTextarea.focus();
|
expandedTextarea.focus();
|
||||||
expandedTextarea.setSelectionRange(expandedValue.length, expandedValue.length);
|
const len = untrack(() => expandedValue.length);
|
||||||
|
expandedTextarea.setSelectionRange(len, len);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
@@ -60,7 +62,7 @@
|
|||||||
{disabled}
|
{disabled}
|
||||||
></textarea>
|
></textarea>
|
||||||
<button
|
<button
|
||||||
class="absolute top-2 right-2 p-1.5 rounded-lg opacity-0 group-hover:opacity-100 transition-opacity bg-surface/90 hover:bg-surface border border-gray-200 dark:border-white/10 shadow-sm"
|
class="absolute top-2 right-2 p-1.5 rounded-lg opacity-60 md:opacity-0 group-hover:opacity-100 transition-opacity bg-surface/90 hover:bg-surface border border-gray-200 dark:border-white/10 shadow-sm"
|
||||||
onclick={openExpanded}
|
onclick={openExpanded}
|
||||||
title="Expand to edit"
|
title="Expand to edit"
|
||||||
type="button"
|
type="button"
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { generateImage } from "../../lib/imageApi";
|
import { generateImage } from "../../lib/imageApi";
|
||||||
|
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||||
|
|
||||||
@@ -17,6 +18,10 @@
|
|||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
playgroundStores.imageGenerating.set(isGenerating);
|
||||||
|
});
|
||||||
|
|
||||||
async function generate() {
|
async function generate() {
|
||||||
const trimmedPrompt = prompt.trim();
|
const trimmedPrompt = prompt.trim();
|
||||||
if (!trimmedPrompt || !$selectedModelStore || isGenerating) return;
|
if (!trimmedPrompt || !$selectedModelStore || isGenerating) return;
|
||||||
@@ -60,6 +65,7 @@
|
|||||||
function clearImage() {
|
function clearImage() {
|
||||||
generatedImage = null;
|
generatedImage = null;
|
||||||
error = null;
|
error = null;
|
||||||
|
prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function downloadImage() {
|
function downloadImage() {
|
||||||
@@ -117,9 +123,6 @@
|
|||||||
<option value="1024x1792">1024x1792 (SDXL)</option>
|
<option value="1024x1792">1024x1792 (SDXL)</option>
|
||||||
</optgroup>
|
</optgroup>
|
||||||
</select>
|
</select>
|
||||||
<button class="btn" onclick={clearImage} disabled={!generatedImage && !error}>
|
|
||||||
Clear
|
|
||||||
</button>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Empty state for no models configured -->
|
<!-- Empty state for no models configured -->
|
||||||
@@ -192,6 +195,13 @@
|
|||||||
>
|
>
|
||||||
Generate
|
Generate
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
class="btn flex-1 md:flex-none"
|
||||||
|
onclick={clearImage}
|
||||||
|
disabled={!generatedImage && !error && !prompt.trim()}
|
||||||
|
>
|
||||||
|
Clear
|
||||||
|
</button>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { generateSpeech } from "../../lib/speechApi";
|
import { generateSpeech } from "../../lib/speechApi";
|
||||||
|
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||||
import ModelSelector from "./ModelSelector.svelte";
|
import ModelSelector from "./ModelSelector.svelte";
|
||||||
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
import ExpandableTextarea from "./ExpandableTextarea.svelte";
|
||||||
|
|
||||||
@@ -12,7 +13,6 @@
|
|||||||
let inputText = $state("");
|
let inputText = $state("");
|
||||||
let isGenerating = $state(false);
|
let isGenerating = $state(false);
|
||||||
let generatedAudioUrl = $state<string | null>(null);
|
let generatedAudioUrl = $state<string | null>(null);
|
||||||
let generatedText = $state<string | null>(null);
|
|
||||||
let generatedVoice = $state<string | null>(null);
|
let generatedVoice = $state<string | null>(null);
|
||||||
let generatedTimestamp = $state<Date | null>(null);
|
let generatedTimestamp = $state<Date | null>(null);
|
||||||
let error = $state<string | null>(null);
|
let error = $state<string | null>(null);
|
||||||
@@ -21,11 +21,9 @@
|
|||||||
let availableVoices = $state<string[]>(["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]);
|
let availableVoices = $state<string[]>(["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]);
|
||||||
let isLoadingVoices = $state(false);
|
let isLoadingVoices = $state(false);
|
||||||
|
|
||||||
// Default voices to fall back to if API call fails
|
|
||||||
const defaultVoices = ["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
const defaultVoices = ["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
||||||
const CACHE_KEY = "playground-speech-voices-cache";
|
const CACHE_KEY = "playground-speech-voices-cache";
|
||||||
|
|
||||||
// Load voices cache from localStorage
|
|
||||||
function getVoicesCache(): Record<string, string[]> {
|
function getVoicesCache(): Record<string, string[]> {
|
||||||
if (typeof window === "undefined") return {};
|
if (typeof window === "undefined") return {};
|
||||||
try {
|
try {
|
||||||
@@ -36,7 +34,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save voices cache to localStorage
|
|
||||||
function saveVoicesCache(cache: Record<string, string[]>) {
|
function saveVoicesCache(cache: Record<string, string[]>) {
|
||||||
if (typeof window === "undefined") return;
|
if (typeof window === "undefined") return;
|
||||||
try {
|
try {
|
||||||
@@ -48,9 +45,12 @@
|
|||||||
|
|
||||||
let hasModels = $derived($models.some((m) => !m.unlisted));
|
let hasModels = $derived($models.some((m) => !m.unlisted));
|
||||||
|
|
||||||
// Track if this is the initial page load to avoid fetching on refresh
|
|
||||||
let isInitialLoad = $state(true);
|
let isInitialLoad = $state(true);
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
playgroundStores.speechGenerating.set(isGenerating);
|
||||||
|
});
|
||||||
|
|
||||||
// On page load, restore cached voices for the selected model if available
|
// On page load, restore cached voices for the selected model if available
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
const model = $selectedModelStore;
|
const model = $selectedModelStore;
|
||||||
@@ -148,7 +148,6 @@
|
|||||||
|
|
||||||
// Create object URL for the audio blob and store metadata
|
// Create object URL for the audio blob and store metadata
|
||||||
generatedAudioUrl = URL.createObjectURL(audioBlob);
|
generatedAudioUrl = URL.createObjectURL(audioBlob);
|
||||||
generatedText = trimmedText;
|
|
||||||
generatedVoice = $selectedVoiceStore;
|
generatedVoice = $selectedVoiceStore;
|
||||||
generatedTimestamp = new Date();
|
generatedTimestamp = new Date();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -167,18 +166,6 @@
|
|||||||
abortController?.abort();
|
abortController?.abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
function clearAudio() {
|
|
||||||
if (generatedAudioUrl) {
|
|
||||||
URL.revokeObjectURL(generatedAudioUrl);
|
|
||||||
}
|
|
||||||
generatedAudioUrl = null;
|
|
||||||
generatedText = null;
|
|
||||||
generatedVoice = null;
|
|
||||||
generatedTimestamp = null;
|
|
||||||
error = null;
|
|
||||||
inputText = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
function clearInput() {
|
function clearInput() {
|
||||||
inputText = "";
|
inputText = "";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect } from "vitest";
|
import { describe, it, expect } from "vitest";
|
||||||
import { renderMarkdown, escapeHtml } from "./markdown";
|
import { renderMarkdown, escapeHtml, splitCompleteBlocks, closePendingBlock, normalizeLatexDelimiters, renderStreamingMarkdown, createStreamingCache } from "./markdown";
|
||||||
|
|
||||||
describe("renderMarkdown", () => {
|
describe("renderMarkdown", () => {
|
||||||
describe("basic markdown", () => {
|
describe("basic markdown", () => {
|
||||||
@@ -130,6 +130,35 @@ More text here.
|
|||||||
expect(result).toContain("katex");
|
expect(result).toContain("katex");
|
||||||
expect(result).toContain("sqrt");
|
expect(result).toContain("sqrt");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("renders \\[...\\] display math", () => {
|
||||||
|
const result = renderMarkdown("\\[\nx^2 + y^2 = z^2\n\\]");
|
||||||
|
expect(result).toContain("katex");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("renders \\(...\\) inline math", () => {
|
||||||
|
const result = renderMarkdown("The equation \\(E = mc^2\\) is famous.");
|
||||||
|
expect(result).toContain("katex");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("normalizeLatexDelimiters", () => {
|
||||||
|
it("converts \\[...\\] to $$...$$", () => {
|
||||||
|
expect(normalizeLatexDelimiters("\\[\nx^2\n\\]")).toBe("$$\nx^2\n$$");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("converts \\(...\\) to $...$", () => {
|
||||||
|
expect(normalizeLatexDelimiters("\\(x^2\\)")).toBe("$x^2$");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves $$ and $ delimiters unchanged", () => {
|
||||||
|
const text = "$$x^2$$ and $y$";
|
||||||
|
expect(normalizeLatexDelimiters(text)).toBe(text);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles multiple occurrences", () => {
|
||||||
|
expect(normalizeLatexDelimiters("\\(a\\) and \\(b\\)")).toBe("$a$ and $b$");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("escapeHtml", () => {
|
describe("escapeHtml", () => {
|
||||||
@@ -158,3 +187,237 @@ More text here.
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("splitCompleteBlocks", () => {
|
||||||
|
it("returns everything as pending when no blank line", () => {
|
||||||
|
const result = splitCompleteBlocks("Hello world");
|
||||||
|
expect(result.complete).toBe("");
|
||||||
|
expect(result.pending).toBe("Hello world");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty for empty input", () => {
|
||||||
|
const result = splitCompleteBlocks("");
|
||||||
|
expect(result.complete).toBe("");
|
||||||
|
expect(result.pending).toBe("");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("splits on blank line between paragraphs", () => {
|
||||||
|
const result = splitCompleteBlocks("First paragraph.\n\nSecond paragraph");
|
||||||
|
expect(result.complete).toBe("First paragraph.\n");
|
||||||
|
expect(result.pending).toBe("Second paragraph");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("splits multiple paragraphs at last blank line", () => {
|
||||||
|
const result = splitCompleteBlocks("Para 1.\n\nPara 2.\n\nPara 3");
|
||||||
|
expect(result.complete).toBe("Para 1.\n\nPara 2.\n");
|
||||||
|
expect(result.pending).toBe("Para 3");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats closed code fence as complete boundary", () => {
|
||||||
|
const text = "```js\nconst x = 1;\n```\nMore text";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("```js\nconst x = 1;\n```");
|
||||||
|
expect(result.pending).toBe("More text");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats unclosed code fence as pending", () => {
|
||||||
|
const text = "Done paragraph.\n\n```js\nconst x = 1;";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("Done paragraph.\n");
|
||||||
|
expect(result.pending).toBe("```js\nconst x = 1;");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not split on blank lines inside code fences", () => {
|
||||||
|
const text = "```\nline1\n\nline2\n```";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("```\nline1\n\nline2\n```");
|
||||||
|
expect(result.pending).toBe("");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles tilde fences", () => {
|
||||||
|
const text = "~~~py\nprint('hi')\n~~~\nAfter";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("~~~py\nprint('hi')\n~~~");
|
||||||
|
expect(result.pending).toBe("After");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not close backtick fence with tilde fence", () => {
|
||||||
|
const text = "```\ncode\n~~~\nstill code";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
// The ~~~ should not close a backtick fence, so everything from ``` onward is pending
|
||||||
|
expect(result.complete).toBe("");
|
||||||
|
expect(result.pending).toBe("```\ncode\n~~~\nstill code");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats closed math block as complete boundary", () => {
|
||||||
|
const text = "$$\nx^2\n$$\nAfter";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("$$\nx^2\n$$");
|
||||||
|
expect(result.pending).toBe("After");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats unclosed math block as pending", () => {
|
||||||
|
const text = "Before.\n\n$$\nx^2";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("Before.\n");
|
||||||
|
expect(result.pending).toBe("$$\nx^2");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats closed \\[...\\] math block as complete boundary", () => {
|
||||||
|
const text = "\\[\nx^2\n\\]\nAfter";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("\\[\nx^2\n\\]");
|
||||||
|
expect(result.pending).toBe("After");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats unclosed \\[ math block as pending", () => {
|
||||||
|
const text = "Before.\n\n\\[\nx^2";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
expect(result.complete).toBe("Before.\n");
|
||||||
|
expect(result.pending).toBe("\\[\nx^2");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles trailing blank line making everything complete", () => {
|
||||||
|
const text = "Hello world.\n";
|
||||||
|
const result = splitCompleteBlocks(text);
|
||||||
|
// Last line is empty string after split, which is a blank line
|
||||||
|
expect(result.complete).toBe("Hello world.\n");
|
||||||
|
expect(result.pending).toBe("");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("closePendingBlock", () => {
|
||||||
|
it("returns empty string for empty input", () => {
|
||||||
|
expect(closePendingBlock("")).toBe("");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns plain text unchanged", () => {
|
||||||
|
expect(closePendingBlock("Hello world")).toBe("Hello world");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("closes an open backtick code fence", () => {
|
||||||
|
const result = closePendingBlock("```python\nprint('hi')");
|
||||||
|
expect(result).toBe("```python\nprint('hi')\n```");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("closes an open tilde code fence", () => {
|
||||||
|
const result = closePendingBlock("~~~js\nconst x = 1;");
|
||||||
|
expect(result).toBe("~~~js\nconst x = 1;\n~~~");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not modify already-closed code fence", () => {
|
||||||
|
const text = "```py\ncode\n```";
|
||||||
|
expect(closePendingBlock(text)).toBe(text);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("closes an open math block", () => {
|
||||||
|
const result = closePendingBlock("$$\nx^2 + y^2");
|
||||||
|
expect(result).toBe("$$\nx^2 + y^2\n$$");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not modify already-closed math block", () => {
|
||||||
|
const text = "$$\nx^2\n$$";
|
||||||
|
expect(closePendingBlock(text)).toBe(text);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("closes an open \\[ math block with \\]", () => {
|
||||||
|
const result = closePendingBlock("\\[\nx^2 + y^2");
|
||||||
|
expect(result).toBe("\\[\nx^2 + y^2\n\\]");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not modify already-closed \\[...\\] math block", () => {
|
||||||
|
const text = "\\[\nx^2\n\\]";
|
||||||
|
expect(closePendingBlock(text)).toBe(text);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("closes code fence when preceded by regular text", () => {
|
||||||
|
const result = closePendingBlock("Some text\n```\ncode");
|
||||||
|
expect(result).toBe("Some text\n```\ncode\n```");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves headers unchanged", () => {
|
||||||
|
expect(closePendingBlock("## Hello")).toBe("## Hello");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves tables unchanged", () => {
|
||||||
|
const table = "| a | b |\n| --- | --- |\n| 1 | 2 |";
|
||||||
|
expect(closePendingBlock(table)).toBe(table);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves lists unchanged", () => {
|
||||||
|
expect(closePendingBlock("- item 1\n- item 2")).toBe("- item 1\n- item 2");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("renderStreamingMarkdown", () => {
|
||||||
|
it("renders complete blocks and pending as markdown", () => {
|
||||||
|
const cache = createStreamingCache();
|
||||||
|
const text = "# Hello\n\nWorld";
|
||||||
|
const { blocks, pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||||
|
expect(blocks).toHaveLength(1);
|
||||||
|
expect(blocks[0].html).toContain("<h1>Hello</h1>");
|
||||||
|
expect(pendingHtml).toContain("World");
|
||||||
|
expect(pendingHtml).toContain("<p>");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves existing blocks when complete portion is unchanged", () => {
|
||||||
|
const cache = createStreamingCache();
|
||||||
|
renderStreamingMarkdown("# Hello\n\nWor", cache);
|
||||||
|
const firstBlocks = cache.blocks;
|
||||||
|
|
||||||
|
const { blocks } = renderStreamingMarkdown("# Hello\n\nWorld", cache);
|
||||||
|
// Same block array reference — nothing changed in the complete section
|
||||||
|
expect(blocks).toBe(firstBlocks);
|
||||||
|
expect(cache.completeKey).toBe("# Hello\n");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("appends a new block when a new section completes", () => {
|
||||||
|
const cache = createStreamingCache();
|
||||||
|
renderStreamingMarkdown("# Hello\n\nParagraph", cache);
|
||||||
|
expect(cache.blocks).toHaveLength(1);
|
||||||
|
const firstBlock = cache.blocks[0];
|
||||||
|
|
||||||
|
renderStreamingMarkdown("# Hello\n\nParagraph.\n\nMore", cache);
|
||||||
|
expect(cache.blocks).toHaveLength(2);
|
||||||
|
// First block is preserved with the same id and html
|
||||||
|
expect(cache.blocks[0].id).toBe(firstBlock.id);
|
||||||
|
expect(cache.blocks[0].html).toBe(firstBlock.html);
|
||||||
|
// Second block contains the new paragraph
|
||||||
|
expect(cache.blocks[1].html).toContain("Paragraph.");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("assigns unique stable ids to each block", () => {
|
||||||
|
const cache = createStreamingCache();
|
||||||
|
renderStreamingMarkdown("A.\n\nB.\n\nC", cache);
|
||||||
|
expect(cache.blocks).toHaveLength(1);
|
||||||
|
const id0 = cache.blocks[0].id;
|
||||||
|
|
||||||
|
renderStreamingMarkdown("A.\n\nB.\n\nC.\n\nD", cache);
|
||||||
|
expect(cache.blocks).toHaveLength(2);
|
||||||
|
expect(cache.blocks[0].id).toBe(id0);
|
||||||
|
expect(cache.blocks[1].id).toBe(id0 + 1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("renders pending code block with syntax highlighting", () => {
|
||||||
|
const cache = createStreamingCache();
|
||||||
|
const text = "Done.\n\n```python\nprint('hello')";
|
||||||
|
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||||
|
expect(pendingHtml).toContain("<code");
|
||||||
|
expect(pendingHtml).toContain("hljs");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("renders pending table as markdown", () => {
|
||||||
|
const cache = createStreamingCache();
|
||||||
|
const text = "Done.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |";
|
||||||
|
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||||
|
expect(pendingHtml).toContain("<table>");
|
||||||
|
expect(pendingHtml).toContain("<td>");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("renders pending portion through markdown pipeline", () => {
|
||||||
|
const cache = createStreamingCache();
|
||||||
|
const text = "Done.\n\nSome **bold** text";
|
||||||
|
const { pendingHtml } = renderStreamingMarkdown(text, cache);
|
||||||
|
expect(pendingHtml).toContain("<strong>bold</strong>");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -69,13 +69,189 @@ const processor = unified()
|
|||||||
.use(rehypeHighlight)
|
.use(rehypeHighlight)
|
||||||
.use(rehypeStringify, { allowDangerousHtml: true });
|
.use(rehypeStringify, { allowDangerousHtml: true });
|
||||||
|
|
||||||
|
export function splitCompleteBlocks(text: string): { complete: string; pending: string } {
|
||||||
|
if (!text) {
|
||||||
|
return { complete: "", pending: "" };
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = text.split("\n");
|
||||||
|
let lastCompleteBoundary = -1; // index of last line that ends a complete block
|
||||||
|
let inFence = false;
|
||||||
|
let fenceChar = "";
|
||||||
|
let inMathBlock = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < lines.length; i++) {
|
||||||
|
const trimmed = lines[i].trimEnd();
|
||||||
|
|
||||||
|
if (inFence) {
|
||||||
|
// Check for closing fence: same character, at least 3, no other content
|
||||||
|
if (new RegExp(`^\\s*${fenceChar.replace(/~/g, "\\~")}{3,}\\s*$`).test(trimmed)) {
|
||||||
|
inFence = false;
|
||||||
|
fenceChar = "";
|
||||||
|
lastCompleteBoundary = i;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inMathBlock) {
|
||||||
|
if (trimmed === "$$" || trimmed === "\\]") {
|
||||||
|
inMathBlock = false;
|
||||||
|
lastCompleteBoundary = i;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for opening fence
|
||||||
|
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
|
||||||
|
if (fenceMatch) {
|
||||||
|
// Check if it's an opening fence (may have language info after)
|
||||||
|
// A line with just ``` or ~~~ could be opening or closing, but since we're not in a fence it's opening
|
||||||
|
fenceChar = fenceMatch[2][0]; // '`' or '~'
|
||||||
|
inFence = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for opening math block
|
||||||
|
if (trimmed === "$$" || trimmed === "\\[") {
|
||||||
|
inMathBlock = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Outside fences/math: blank line marks a complete boundary
|
||||||
|
if (trimmed === "") {
|
||||||
|
lastCompleteBoundary = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastCompleteBoundary < 0) {
|
||||||
|
return { complete: "", pending: text };
|
||||||
|
}
|
||||||
|
|
||||||
|
const completeLines = lines.slice(0, lastCompleteBoundary + 1);
|
||||||
|
const pendingLines = lines.slice(lastCompleteBoundary + 1);
|
||||||
|
|
||||||
|
return {
|
||||||
|
complete: completeLines.join("\n"),
|
||||||
|
pending: pendingLines.join("\n"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function closePendingBlock(pending: string): string {
|
||||||
|
if (!pending) return "";
|
||||||
|
|
||||||
|
const lines = pending.split("\n");
|
||||||
|
let inFence = false;
|
||||||
|
let fenceStr = "";
|
||||||
|
let inMathBlock = false;
|
||||||
|
let mathClose = "";
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
const trimmed = line.trimEnd();
|
||||||
|
|
||||||
|
if (inFence) {
|
||||||
|
if (new RegExp(`^\\s*${fenceStr[0] === "~" ? "~~~" : "\\`\\`\\`"}\\s*$`).test(trimmed)) {
|
||||||
|
inFence = false;
|
||||||
|
fenceStr = "";
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inMathBlock) {
|
||||||
|
if (trimmed === "$$" || trimmed === "\\]") {
|
||||||
|
inMathBlock = false;
|
||||||
|
mathClose = "";
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
|
||||||
|
if (fenceMatch) {
|
||||||
|
fenceStr = fenceMatch[2];
|
||||||
|
inFence = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trimmed === "$$") {
|
||||||
|
inMathBlock = true;
|
||||||
|
mathClose = "$$";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trimmed === "\\[") {
|
||||||
|
inMathBlock = true;
|
||||||
|
mathClose = "\\]";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inFence) return pending + "\n" + fenceStr;
|
||||||
|
if (inMathBlock) return pending + "\n" + mathClose;
|
||||||
|
return pending;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RenderedBlock {
|
||||||
|
id: number;
|
||||||
|
html: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StreamingCache {
|
||||||
|
blocks: RenderedBlock[];
|
||||||
|
nextId: number;
|
||||||
|
completeKey: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createStreamingCache(): StreamingCache {
|
||||||
|
return { blocks: [], nextId: 0, completeKey: "" };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function renderStreamingMarkdown(
|
||||||
|
text: string,
|
||||||
|
cache: StreamingCache,
|
||||||
|
): { blocks: RenderedBlock[]; pendingHtml: string } {
|
||||||
|
const { complete, pending } = splitCompleteBlocks(text);
|
||||||
|
|
||||||
|
if (complete) {
|
||||||
|
if (cache.completeKey !== complete) {
|
||||||
|
if (complete.startsWith(cache.completeKey) && cache.completeKey.length > 0) {
|
||||||
|
// Complete section grew — render only the new part as a new block
|
||||||
|
const newPart = complete.slice(cache.completeKey.length);
|
||||||
|
cache.blocks = [...cache.blocks, { id: cache.nextId++, html: renderMarkdown(newPart) }];
|
||||||
|
} else {
|
||||||
|
// Complete section changed unexpectedly — re-render as single block
|
||||||
|
cache.blocks = [{ id: cache.nextId++, html: renderMarkdown(complete) }];
|
||||||
|
}
|
||||||
|
cache.completeKey = complete;
|
||||||
|
}
|
||||||
|
} else if (cache.blocks.length > 0) {
|
||||||
|
cache.blocks = [];
|
||||||
|
cache.completeKey = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
let pendingHtml = "";
|
||||||
|
if (pending) {
|
||||||
|
const closed = closePendingBlock(pending);
|
||||||
|
pendingHtml = renderMarkdown(closed);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { blocks: cache.blocks, pendingHtml };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert \[...\] to $$...$$ and \(...\) to $...$
|
||||||
|
export function normalizeLatexDelimiters(text: string): string {
|
||||||
|
// Display math: \[...\] → $$...$$ (may span multiple lines)
|
||||||
|
text = text.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => `$$${inner}$$`);
|
||||||
|
// Inline math: \(...\) → $...$
|
||||||
|
text = text.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => `$${inner}$`);
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
export function renderMarkdown(content: string): string {
|
export function renderMarkdown(content: string): string {
|
||||||
if (!content) {
|
if (!content) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = processor.processSync(content);
|
const result = processor.processSync(normalizeLatexDelimiters(content));
|
||||||
return String(result);
|
return String(result);
|
||||||
} catch {
|
} catch {
|
||||||
// Fallback to escaped plain text if markdown parsing fails
|
// Fallback to escaped plain text if markdown parsing fails
|
||||||
|
|||||||
@@ -21,6 +21,16 @@ export interface Metrics {
|
|||||||
prompt_per_second: number;
|
prompt_per_second: number;
|
||||||
tokens_per_second: number;
|
tokens_per_second: number;
|
||||||
duration_ms: number;
|
duration_ms: number;
|
||||||
|
has_capture: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ReqRespCapture {
|
||||||
|
id: number;
|
||||||
|
req_path: string;
|
||||||
|
req_headers: Record<string, string>;
|
||||||
|
req_body: string; // base64 encoded bytes
|
||||||
|
resp_headers: Record<string, string>;
|
||||||
|
resp_body: string; // base64 encoded bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface LogData {
|
export interface LogData {
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { metrics } from "../stores/api";
|
import { metrics, getCapture } from "../stores/api";
|
||||||
import Tooltip from "../components/Tooltip.svelte";
|
import Tooltip from "../components/Tooltip.svelte";
|
||||||
|
import CaptureDialog from "../components/CaptureDialog.svelte";
|
||||||
|
import type { ReqRespCapture } from "../lib/types";
|
||||||
|
|
||||||
function formatSpeed(speed: number): string {
|
function formatSpeed(speed: number): string {
|
||||||
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
||||||
@@ -38,6 +40,25 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
let sortedMetrics = $derived([...$metrics].sort((a, b) => b.id - a.id));
|
let sortedMetrics = $derived([...$metrics].sort((a, b) => b.id - a.id));
|
||||||
|
|
||||||
|
let selectedCapture = $state<ReqRespCapture | null>(null);
|
||||||
|
let dialogOpen = $state(false);
|
||||||
|
let loadingCaptureId = $state<number | null>(null);
|
||||||
|
|
||||||
|
async function viewCapture(id: number) {
|
||||||
|
loadingCaptureId = id;
|
||||||
|
const capture = await getCapture(id);
|
||||||
|
loadingCaptureId = null;
|
||||||
|
if (capture) {
|
||||||
|
selectedCapture = capture;
|
||||||
|
dialogOpen = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeDialog() {
|
||||||
|
dialogOpen = false;
|
||||||
|
selectedCapture = null;
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="p-2">
|
<div class="p-2">
|
||||||
@@ -65,6 +86,7 @@
|
|||||||
<th class="px-6 py-3">Prompt Processing</th>
|
<th class="px-6 py-3">Prompt Processing</th>
|
||||||
<th class="px-6 py-3">Generation Speed</th>
|
<th class="px-6 py-3">Generation Speed</th>
|
||||||
<th class="px-6 py-3">Duration</th>
|
<th class="px-6 py-3">Duration</th>
|
||||||
|
<th class="px-6 py-3">Capture</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody class="divide-y">
|
<tbody class="divide-y">
|
||||||
@@ -79,6 +101,19 @@
|
|||||||
<td class="px-6 py-4">{formatSpeed(metric.prompt_per_second)}</td>
|
<td class="px-6 py-4">{formatSpeed(metric.prompt_per_second)}</td>
|
||||||
<td class="px-6 py-4">{formatSpeed(metric.tokens_per_second)}</td>
|
<td class="px-6 py-4">{formatSpeed(metric.tokens_per_second)}</td>
|
||||||
<td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
|
<td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
|
||||||
|
<td class="px-6 py-4">
|
||||||
|
{#if metric.has_capture}
|
||||||
|
<button
|
||||||
|
onclick={() => viewCapture(metric.id)}
|
||||||
|
disabled={loadingCaptureId === metric.id}
|
||||||
|
class="btn btn--sm"
|
||||||
|
>
|
||||||
|
{loadingCaptureId === metric.id ? "..." : "View"}
|
||||||
|
</button>
|
||||||
|
{:else}
|
||||||
|
<span class="text-txtsecondary">-</span>
|
||||||
|
{/if}
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{/each}
|
{/each}
|
||||||
</tbody>
|
</tbody>
|
||||||
@@ -86,3 +121,5 @@
|
|||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<CaptureDialog capture={selectedCapture} open={dialogOpen} onclose={closeDialog} />
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
<!-- empty: real Playground is always mounted in App.svelte -->
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
import { writable } from "svelte/store";
|
import { writable } from "svelte/store";
|
||||||
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope } from "../lib/types";
|
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture } from "../lib/types";
|
||||||
import { connectionState } from "./theme";
|
import { connectionState } from "./theme";
|
||||||
|
|
||||||
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
||||||
@@ -172,3 +172,19 @@ export async function loadModel(model: string): Promise<void> {
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getCapture(id: number): Promise<ReqRespCapture | null> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`/api/captures/${id}`);
|
||||||
|
if (response.status === 404) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to fetch capture: ${response.status}`);
|
||||||
|
}
|
||||||
|
return await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to fetch capture:", error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,18 @@
|
|||||||
|
import { writable, derived } from "svelte/store";
|
||||||
|
|
||||||
|
const chatStreaming = writable(false);
|
||||||
|
const imageGenerating = writable(false);
|
||||||
|
const speechGenerating = writable(false);
|
||||||
|
const audioTranscribing = writable(false);
|
||||||
|
|
||||||
|
export const playgroundActivity = derived(
|
||||||
|
[chatStreaming, imageGenerating, speechGenerating, audioTranscribing],
|
||||||
|
([$chat, $image, $speech, $audio]) => $chat || $image || $speech || $audio
|
||||||
|
);
|
||||||
|
|
||||||
|
export const playgroundStores = {
|
||||||
|
chatStreaming,
|
||||||
|
imageGenerating,
|
||||||
|
speechGenerating,
|
||||||
|
audioTranscribing,
|
||||||
|
};
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
import { writable } from "svelte/store";
|
||||||
|
|
||||||
|
export const currentRoute = writable("/");
|
||||||
Reference in New Issue
Block a user