Compare commits

...

10 Commits

Author SHA1 Message Date
Benson Wong b45102bde8 ui: smart auto-scroll in LogPanel (#530)
Pause auto-scroll when the user scrolls up to review logs, and resume
when they scroll back to the bottom.

- add `userScrolledUp` state variable
- add `handleScroll` to detect scroll position with 40px threshold
- guard the auto-scroll effect with `!userScrolledUp`

closes #529
2026-02-18 19:47:37 -08:00
Brian Mendonca 1688bdd1e9 proxy, ui: add pending requests count to the main dashboard (#516)
add a real time counter of pending (inflight) requests to the UI.
2026-02-16 09:41:15 -08:00
Benson Wong d33d51fa75 .coderabbit.yaml,AGENTS.md: small tweaks 2026-02-15 21:31:30 -08:00
Benson Wong e3bf065574 ui: persist playground state across route navigation (#525)
- Keep Playground component mounted when navigating away, preserving
streaming/generating state
- Add animated gradient effect on Playground nav link when activity is
in progress
2026-02-15 21:30:52 -08:00
Benson Wong 3e52144058 ui-svelte: incremental rendering of chat messages in the Playground (#520)
add incremental rendering to Playground > Chat
2026-02-15 11:00:44 -08:00
Benson Wong d5e52d7d00 build: disable provenance attestations in container builds (#523)
## Summary
- Add `--provenance=false` to docker build commands in
`build-container.sh`
- BuildKit attestation manifests are stored as untagged images in GHCR,
and the `delete-untagged-containers` cleanup job deletes them, breaking
the manifest list and causing `manifest unknown` errors on pull
- ref: https://github.com/actions/delete-package-versions/issues/162
2026-02-14 10:23:08 -08:00
Benson Wong 17e5263a76 .github/workflows: fix expired token in publishing images (#522)
Fixes: #517
2026-02-14 10:06:05 -08:00
Benson Wong 8d6d949ec3 proxy: support timings for /infill from llama-server (#510)
fixes: #463
2026-02-07 17:16:27 -08:00
Benson Wong b5fde8eb6d proxy,ui-svelte: add request/response capturing (#508)
Add saving request and response headers and bodies that go through
llama-swap in memory.

- captureBuffer added to configuration. Captures are enabled by default.
- 5MB of memory is allocated for req/response captures in a ring buffer.
Setting captureBuffer to 0 will disable captures.
- UI elements to view captured data added to Activity page. Includes
some
QOL features like json formatting and recombining SSE chat streams
- capture saving is done at the byte level and has minimal impact on
llama-swap performance

Fixes #464 
Ref #503
2026-02-07 15:40:01 -08:00
Nuno 7eef5defb8 docs: add stable-diffusion.cpp references (#506)
Signed-off-by: rare-magma <rare-magma@posteo.eu>
2026-02-04 20:20:39 -08:00
35 changed files with 1842 additions and 155 deletions
+1 -1
View File
@@ -4,7 +4,7 @@ early_access: false
reviews: reviews:
profile: "chill" profile: "chill"
request_changes_workflow: false request_changes_workflow: false
high_level_summary: true high_level_summary: false
poem: false poem: false
review_status: true review_status: true
collapse_walkthrough: false collapse_walkthrough: false
+7
View File
@@ -17,6 +17,13 @@ on:
- 'docker/build-container.sh' - 'docker/build-container.sh'
- 'docker/*.Containerfile' - 'docker/*.Containerfile'
# grant permissions on GITHUB_TOKEN to publish packages
# ref: https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#publishing-a-package-using-an-action
permissions:
contents: read
packages: write
id-token: write
jobs: jobs:
build-and-push: build-and-push:
runs-on: ubuntu-latest runs-on: ubuntu-latest
+50
View File
@@ -0,0 +1,50 @@
## Project Description:
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
## Tech stack
- golang
- typescript, vite and svelt5 for UI (located in ui/)
## Workflow Tasks
- when summarizing changes only include details that require further action
- just say "Done." when there is no further action
- use the github CLI `gh` to create pull requests and work with github
- Rules for creating pull requests:
- keep them short and focused on changes.
- never include a test plan
- write the summary using the same style rules as commit message
## Testing
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
- Use `make test-all` before completing work. This includes long running concurrency tests.
### Commit message example format:
```
proxy: add new feature
Add new feature that implements functionality X and Y.
- key change 1
- key change 2
- key change 3
fixes #123
```
## Code Reviews
- use three levels High, Medium, Low severity
- label each discovered issue with a label like H1, M2, L3 respectively
- High severity are must fix issues (security, race conditions, critical bugs)
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
- Low severity are nice to have changes and nits
- Include a suggestion with each discovered item
- Limit your code review to three items with the highest priority first
- Double check your discovered items and recommended remediations
+1 -49
View File
@@ -1,49 +1 @@
## Project Description: @AGENTS.md
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
## Tech stack
- golang
- typescript, vite and react for UI (located in ui/)
## Workflow Tasks
- when summarizing changes only include details that require further action
- just say "Done." when there is no further action
- use `gh` to create PRs and load issues
- do include Co-Authored-By or created by when committing changes or creating PRs
- keep PR descriptions short and focused on changes.
- never include a test plan
## Testing
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
- Use `make test-all` before completing work. This includes long running concurrency tests.
### Commit message example format:
```
proxy: add new feature
Add new feature that implements functionality X and Y.
- key change 1
- key change 2
- key change 3
fixes #123
```
## Code Reviews
- use three levels High, Medium, Low severity
- label each discovered issue with a label like H1, M2, L3 respectively
- High severity are must fix issues (security, race conditions, critical bugs)
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
- Low severity are nice to have changes and nits
- Include a suggestion with each discovered item
- Limit your code review to three items with the highest priority first
- Double check your discovered items and recommended remediations
+2 -1
View File
@@ -13,7 +13,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
- ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies - ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
- ✅ On-demand model switching - ✅ On-demand model switching
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc.) - ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, etc.)
- future proof, upgrade your inference servers at any time. - future proof, upgrade your inference servers at any time.
- ✅ OpenAI API supported endpoints: - ✅ OpenAI API supported endpoints:
- `v1/completions` - `v1/completions`
@@ -69,6 +69,7 @@ llama-swap can be installed in multiple ways
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap)) ### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md). Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
The stable-diffusion.cpp server is also included for the musa and vulkan platforms.
```shell ```shell
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda $ docker pull ghcr.io/mostlygeek/llama-swap:cuda
+6
View File
@@ -87,6 +87,12 @@
"default": 1000, "default": 1000,
"description": "Maximum number of metrics to keep in memory. Controls how many metrics are stored before older ones are discarded." "description": "Maximum number of metrics to keep in memory. Controls how many metrics are stored before older ones are discarded."
}, },
"captureBuffer": {
"type": "integer",
"minimum": 0,
"default": 5,
"description": "Size in megabytes of the buffer for storing request/response captures. Set to 0 to disable captures."
},
"startPort": { "startPort": {
"type": "integer", "type": "integer",
"default": 5800, "default": 5800,
+5
View File
@@ -50,6 +50,11 @@ logToStdout: "proxy"
# - useful for limiting memory usage when processing large volumes of metrics # - useful for limiting memory usage when processing large volumes of metrics
metricsMaxInMemory: 1000 metricsMaxInMemory: 1000
# captureBuffer: how many MBs to allocate for storing request/response captures
# - optional, default: 10
# - set to 0 to disable
captureBuffer: 15
# startPort: sets the starting port number for the automatic ${PORT} macro. # startPort: sets the starting port number for the automatic ${PORT} macro.
# - optional, default: 5800 # - optional, default: 5800
# - the ${PORT} macro can be used in model.cmd and model.proxy settings # - the ${PORT} macro can be used in model.cmd and model.proxy settings
+2 -2
View File
@@ -142,7 +142,7 @@ for CONTAINER_TYPE in non-root root; do
fi fi
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER" log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \ docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \ --build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
--build-arg BASE_IMAGE=${BASE_IMAGE} . --build-arg BASE_IMAGE=${BASE_IMAGE} .
@@ -150,7 +150,7 @@ for CONTAINER_TYPE in non-root root; do
case "$ARCH" in case "$ARCH" in
"musa" | "vulkan") "musa" | "vulkan")
log_info "Adding sd-server to $CONTAINER_TAG" log_info "Adding sd-server to $CONTAINER_TAG"
docker build -f llama-swap-sd.Containerfile \ docker build --provenance=false -f llama-swap-sd.Containerfile \
--build-arg BASE=${CONTAINER_TAG} \ --build-arg BASE=${CONTAINER_TAG} \
--build-arg SD_IMAGE=${SD_IMAGE} --build-arg SD_TAG=${SD_TAG} \ --build-arg SD_IMAGE=${SD_IMAGE} --build-arg SD_TAG=${SD_TAG} \
--build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \ --build-arg UID=${USER_UID} --build-arg GID=${USER_GID} \
+2
View File
@@ -123,6 +123,7 @@ type Config struct {
LogTimeFormat string `yaml:"logTimeFormat"` LogTimeFormat string `yaml:"logTimeFormat"`
LogToStdout string `yaml:"logToStdout"` LogToStdout string `yaml:"logToStdout"`
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
CaptureBuffer int `yaml:"captureBuffer"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Profiles map[string][]string `yaml:"profiles"` Profiles map[string][]string `yaml:"profiles"`
Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */ Groups map[string]GroupConfig `yaml:"groups"` /* key is group ID */
@@ -201,6 +202,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
LogTimeFormat: "", LogTimeFormat: "",
LogToStdout: LogToStdoutProxy, LogToStdout: LogToStdoutProxy,
MetricsMaxInMemory: 1000, MetricsMaxInMemory: 1000,
CaptureBuffer: 5,
} }
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil { if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
return Config{}, err return Config{}, err
+1
View File
@@ -215,6 +215,7 @@ groups:
}, },
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
MetricsMaxInMemory: 1000, MetricsMaxInMemory: 1000,
CaptureBuffer: 5,
Profiles: map[string][]string{ Profiles: map[string][]string{
"test": {"model1", "model2"}, "test": {"model1", "model2"},
}, },
+1
View File
@@ -204,6 +204,7 @@ groups:
}, },
HealthCheckTimeout: 15, HealthCheckTimeout: 15,
MetricsMaxInMemory: 1000, MetricsMaxInMemory: 1000,
CaptureBuffer: 5,
Profiles: map[string][]string{ Profiles: map[string][]string{
"test": {"model1", "model2"}, "test": {"model1", "model2"},
}, },
+9
View File
@@ -8,6 +8,7 @@ const ConfigFileChangedEventID = 0x03
const LogDataEventID = 0x04 const LogDataEventID = 0x04
const TokenMetricsEventID = 0x05 const TokenMetricsEventID = 0x05
const ModelPreloadedEventID = 0x06 const ModelPreloadedEventID = 0x06
const InFlightRequestsEventID = 0x07
type ProcessStateChangeEvent struct { type ProcessStateChangeEvent struct {
ProcessName string ProcessName string
@@ -58,3 +59,11 @@ type ModelPreloadedEvent struct {
func (e ModelPreloadedEvent) Type() uint32 { func (e ModelPreloadedEvent) Type() uint32 {
return ModelPreloadedEventID return ModelPreloadedEventID
} }
type InFlightRequestsEvent struct {
Total int
}
func (e InFlightRequestsEvent) Type() uint32 {
return InFlightRequestsEventID
}
+165 -9
View File
@@ -28,6 +28,28 @@ type TokenMetrics struct {
PromptPerSecond float64 `json:"prompt_per_second"` PromptPerSecond float64 `json:"prompt_per_second"`
TokensPerSecond float64 `json:"tokens_per_second"` TokensPerSecond float64 `json:"tokens_per_second"`
DurationMs int `json:"duration_ms"` DurationMs int `json:"duration_ms"`
HasCapture bool `json:"has_capture"`
}
type ReqRespCapture struct {
ID int `json:"id"`
ReqPath string `json:"req_path"`
ReqHeaders map[string]string `json:"req_headers"`
ReqBody []byte `json:"req_body"`
RespHeaders map[string]string `json:"resp_headers"`
RespBody []byte `json:"resp_body"`
}
// Size returns the approximate memory usage of this capture in bytes
func (c *ReqRespCapture) Size() int {
size := len(c.ReqPath) + len(c.ReqBody) + len(c.RespBody)
for k, v := range c.ReqHeaders {
size += len(k) + len(v)
}
for k, v := range c.RespHeaders {
size += len(k) + len(v)
}
return size
} }
// TokenMetricsEvent represents a token metrics event // TokenMetricsEvent represents a token metrics event
@@ -46,19 +68,32 @@ type metricsMonitor struct {
maxMetrics int maxMetrics int
nextID int nextID int
logger *LogMonitor logger *LogMonitor
// capture fields
enableCaptures bool
captures map[int]ReqRespCapture // map for O(1) lookup by ID
captureOrder []int // track insertion order for FIFO eviction
captureSize int // current total size in bytes
maxCaptureSize int // max bytes for captures
} }
func newMetricsMonitor(logger *LogMonitor, maxMetrics int) *metricsMonitor { // newMetricsMonitor creates a new metricsMonitor. captureBufferMB is the
mp := &metricsMonitor{ // capture buffer size in megabytes; 0 disables captures.
func newMetricsMonitor(logger *LogMonitor, maxMetrics int, captureBufferMB int) *metricsMonitor {
return &metricsMonitor{
logger: logger, logger: logger,
maxMetrics: maxMetrics, maxMetrics: maxMetrics,
enableCaptures: captureBufferMB > 0,
captures: make(map[int]ReqRespCapture),
captureOrder: make([]int, 0),
captureSize: 0,
maxCaptureSize: captureBufferMB * 1024 * 1024,
}
} }
return mp // addMetrics adds a new metric to the collection and publishes an event.
} // Returns the assigned metric ID.
func (mp *metricsMonitor) addMetrics(metric TokenMetrics) int {
// addMetrics adds a new metric to the collection and publishes an event
func (mp *metricsMonitor) addMetrics(metric TokenMetrics) {
mp.mu.Lock() mp.mu.Lock()
defer mp.mu.Unlock() defer mp.mu.Unlock()
@@ -69,6 +104,49 @@ func (mp *metricsMonitor) addMetrics(metric TokenMetrics) {
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:] mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
} }
event.Emit(TokenMetricsEvent{Metrics: metric}) event.Emit(TokenMetricsEvent{Metrics: metric})
return metric.ID
}
// addCapture adds a new capture to the buffer with size-based eviction.
// Captures are skipped if enableCaptures is false or if capture exceeds maxCaptureSize.
func (mp *metricsMonitor) addCapture(capture ReqRespCapture) {
if !mp.enableCaptures {
return
}
mp.mu.Lock()
defer mp.mu.Unlock()
captureSize := capture.Size()
if captureSize > mp.maxCaptureSize {
mp.logger.Warnf("capture size %d exceeds max %d, skipping", captureSize, mp.maxCaptureSize)
return
}
// Evict oldest (FIFO) until room available
for mp.captureSize+captureSize > mp.maxCaptureSize && len(mp.captureOrder) > 0 {
oldestID := mp.captureOrder[0]
mp.captureOrder = mp.captureOrder[1:]
if evicted, exists := mp.captures[oldestID]; exists {
mp.captureSize -= evicted.Size()
delete(mp.captures, oldestID)
}
}
mp.captures[capture.ID] = capture
mp.captureOrder = append(mp.captureOrder, capture.ID)
mp.captureSize += captureSize
}
// getCaptureByID returns a capture by its ID, or nil if not found.
func (mp *metricsMonitor) getCaptureByID(id int) *ReqRespCapture {
mp.mu.RLock()
defer mp.mu.RUnlock()
if capture, exists := mp.captures[id]; exists {
return &capture
}
return nil
} }
// getMetrics returns a copy of the current metrics // getMetrics returns a copy of the current metrics
@@ -97,6 +175,28 @@ func (mp *metricsMonitor) wrapHandler(
request *http.Request, request *http.Request,
next func(modelID string, w http.ResponseWriter, r *http.Request) error, next func(modelID string, w http.ResponseWriter, r *http.Request) error,
) error { ) error {
// Capture request body and headers if captures enabled
var reqBody []byte
var reqHeaders map[string]string
if mp.enableCaptures {
if request.Body != nil {
var err error
reqBody, err = io.ReadAll(request.Body)
if err != nil {
return fmt.Errorf("failed to read request body for capture: %w", err)
}
request.Body.Close()
request.Body = io.NopCloser(bytes.NewBuffer(reqBody))
}
reqHeaders = make(map[string]string)
for key, values := range request.Header {
if len(values) > 0 {
reqHeaders[key] = values[0]
}
}
redactHeaders(reqHeaders)
}
recorder := newBodyCopier(writer) recorder := newBodyCopier(writer)
// Filter Accept-Encoding to only include encodings we can decompress for metrics // Filter Accept-Encoding to only include encodings we can decompress for metrics
@@ -140,7 +240,6 @@ func (mp *metricsMonitor) wrapHandler(
return nil return nil
} }
} }
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") { if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil { if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path) mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
@@ -153,6 +252,14 @@ func (mp *metricsMonitor) wrapHandler(
usage := parsed.Get("usage") usage := parsed.Get("usage")
timings := parsed.Get("timings") timings := parsed.Get("timings")
// extract timings for infill - response is an array, timings are in the last element
// see #463
if strings.HasPrefix(request.URL.Path, "/infill") {
if arr := parsed.Array(); len(arr) > 0 {
timings = arr[len(arr)-1].Get("timings")
}
}
if usage.Exists() || timings.Exists() { if usage.Exists() || timings.Exists() {
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil { if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path) mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
@@ -165,7 +272,38 @@ func (mp *metricsMonitor) wrapHandler(
} }
} }
mp.addMetrics(tm) // Build capture if enabled and determine if it will be stored
var capture *ReqRespCapture
if mp.enableCaptures {
respHeaders := make(map[string]string)
for key, values := range recorder.Header() {
if len(values) > 0 {
respHeaders[key] = values[0]
}
}
redactHeaders(respHeaders)
delete(respHeaders, "Content-Encoding")
capture = &ReqRespCapture{
ReqPath: request.URL.Path,
ReqHeaders: reqHeaders,
ReqBody: reqBody,
RespHeaders: respHeaders,
RespBody: body,
}
// Only set HasCapture if the capture will actually be stored (not too large)
if capture.Size() <= mp.maxCaptureSize {
tm.HasCapture = true
}
}
metricID := mp.addMetrics(tm)
// Store capture if enabled
if capture != nil {
capture.ID = metricID
mp.addCapture(*capture)
}
return nil return nil
} }
@@ -336,6 +474,24 @@ func (w *responseBodyCopier) StartTime() time.Time {
return w.start return w.start
} }
// sensitiveHeaders lists headers that should be redacted in captures
var sensitiveHeaders = map[string]bool{
"authorization": true,
"proxy-authorization": true,
"cookie": true,
"set-cookie": true,
"x-api-key": true,
}
// redactHeaders replaces sensitive header values in-place with "[REDACTED]"
func redactHeaders(headers map[string]string) {
for key := range headers {
if sensitiveHeaders[strings.ToLower(key)] {
headers[key] = "[REDACTED]"
}
}
}
// filterAcceptEncoding filters the Accept-Encoding header to only include // filterAcceptEncoding filters the Accept-Encoding header to only include
// encodings we can decompress (gzip, deflate). This respects the client's // encodings we can decompress (gzip, deflate). This respects the client's
// preferences while ensuring we can parse response bodies for metrics. // preferences while ensuring we can parse response bodies for metrics.
+323 -29
View File
@@ -18,7 +18,7 @@ import (
func TestMetricsMonitor_AddMetrics(t *testing.T) { func TestMetricsMonitor_AddMetrics(t *testing.T) {
t.Run("adds metrics and assigns ID", func(t *testing.T) { t.Run("adds metrics and assigns ID", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
metric := TokenMetrics{ metric := TokenMetrics{
Model: "test-model", Model: "test-model",
@@ -37,7 +37,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
}) })
t.Run("increments ID for each metric", func(t *testing.T) { t.Run("increments ID for each metric", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
mm.addMetrics(TokenMetrics{Model: "model"}) mm.addMetrics(TokenMetrics{Model: "model"})
@@ -51,7 +51,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
}) })
t.Run("respects max metrics limit", func(t *testing.T) { t.Run("respects max metrics limit", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 3) mm := newMetricsMonitor(testLogger, 3, 0)
// Add 5 metrics // Add 5 metrics
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
@@ -71,7 +71,7 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
}) })
t.Run("emits TokenMetricsEvent", func(t *testing.T) { t.Run("emits TokenMetricsEvent", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
receivedEvent := make(chan TokenMetricsEvent, 1) receivedEvent := make(chan TokenMetricsEvent, 1)
cancel := event.On(func(e TokenMetricsEvent) { cancel := event.On(func(e TokenMetricsEvent) {
@@ -101,14 +101,14 @@ func TestMetricsMonitor_AddMetrics(t *testing.T) {
func TestMetricsMonitor_GetMetrics(t *testing.T) { func TestMetricsMonitor_GetMetrics(t *testing.T) {
t.Run("returns empty slice when no metrics", func(t *testing.T) { t.Run("returns empty slice when no metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
metrics := mm.getMetrics() metrics := mm.getMetrics()
assert.NotNil(t, metrics) assert.NotNil(t, metrics)
assert.Equal(t, 0, len(metrics)) assert.Equal(t, 0, len(metrics))
}) })
t.Run("returns copy of metrics", func(t *testing.T) { t.Run("returns copy of metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
mm.addMetrics(TokenMetrics{Model: "model1"}) mm.addMetrics(TokenMetrics{Model: "model1"})
mm.addMetrics(TokenMetrics{Model: "model2"}) mm.addMetrics(TokenMetrics{Model: "model2"})
@@ -128,7 +128,7 @@ func TestMetricsMonitor_GetMetrics(t *testing.T) {
func TestMetricsMonitor_GetMetricsJSON(t *testing.T) { func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
t.Run("returns valid JSON for empty metrics", func(t *testing.T) { t.Run("returns valid JSON for empty metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
jsonData, err := mm.getMetricsJSON() jsonData, err := mm.getMetricsJSON()
assert.NoError(t, err) assert.NoError(t, err)
assert.NotNil(t, jsonData) assert.NotNil(t, jsonData)
@@ -140,7 +140,7 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
}) })
t.Run("returns valid JSON with metrics", func(t *testing.T) { t.Run("returns valid JSON with metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
mm.addMetrics(TokenMetrics{ mm.addMetrics(TokenMetrics{
Model: "model1", Model: "model1",
InputTokens: 100, InputTokens: 100,
@@ -168,7 +168,7 @@ func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
func TestMetricsMonitor_WrapHandler(t *testing.T) { func TestMetricsMonitor_WrapHandler(t *testing.T) {
t.Run("successful non-streaming request with usage data", func(t *testing.T) { t.Run("successful non-streaming request with usage data", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `{ responseBody := `{
"usage": { "usage": {
@@ -199,7 +199,7 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
}) })
t.Run("successful request with timings data", func(t *testing.T) { t.Run("successful request with timings data", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `{ responseBody := `{
"timings": { "timings": {
@@ -239,7 +239,7 @@ func TestMetricsMonitor_WrapHandler(t *testing.T) {
}) })
t.Run("streaming request with SSE format", func(t *testing.T) { t.Run("streaming request with SSE format", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
// Note: SSE format requires proper line breaks - each data line followed by blank line // Note: SSE format requires proper line breaks - each data line followed by blank line
responseBody := `data: {"choices":[{"text":"Hello"}]} responseBody := `data: {"choices":[{"text":"Hello"}]}
@@ -275,7 +275,7 @@ data: [DONE]
}) })
t.Run("non-OK status code does not record metrics", func(t *testing.T) { t.Run("non-OK status code does not record metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error { nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.WriteHeader(http.StatusBadRequest) w.WriteHeader(http.StatusBadRequest)
@@ -295,7 +295,7 @@ data: [DONE]
}) })
t.Run("empty response body records minimal metrics", func(t *testing.T) { t.Run("empty response body records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error { nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
@@ -317,7 +317,7 @@ data: [DONE]
}) })
t.Run("invalid JSON records minimal metrics", func(t *testing.T) { t.Run("invalid JSON records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error { nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
@@ -341,7 +341,7 @@ data: [DONE]
}) })
t.Run("next handler error is propagated", func(t *testing.T) { t.Run("next handler error is propagated", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
expectedErr := assert.AnError expectedErr := assert.AnError
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error { nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
@@ -360,7 +360,7 @@ data: [DONE]
}) })
t.Run("response without usage or timings records minimal metrics", func(t *testing.T) { t.Run("response without usage or timings records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `{"result": "ok"}` responseBody := `{"result": "ok"}`
@@ -384,6 +384,75 @@ data: [DONE]
assert.Equal(t, 0, metrics[0].InputTokens) assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens) assert.Equal(t, 0, metrics[0].OutputTokens)
}) })
t.Run("infill request extracts timings from last array element", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
// Infill response is an array with timings in the last element
responseBody := `[
{"content": "first chunk"},
{"content": "second chunk"},
{"content": "final", "timings": {
"prompt_n": 150,
"predicted_n": 75,
"prompt_per_second": 200.5,
"predicted_per_second": 35.5,
"prompt_ms": 600.0,
"predicted_ms": 1800.0,
"cache_n": 30
}}
]`
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(responseBody))
return nil
}
req := httptest.NewRequest("POST", "/infill", nil)
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 150, metrics[0].InputTokens)
assert.Equal(t, 75, metrics[0].OutputTokens)
assert.Equal(t, 30, metrics[0].CachedTokens)
assert.Equal(t, 200.5, metrics[0].PromptPerSecond)
assert.Equal(t, 35.5, metrics[0].TokensPerSecond)
assert.Equal(t, 2400, metrics[0].DurationMs) // 600 + 1800
})
t.Run("infill request with empty array records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `[]`
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(responseBody))
return nil
}
req := httptest.NewRequest("POST", "/infill", nil)
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
assert.NoError(t, err)
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
assert.Equal(t, "test-model", metrics[0].Model)
assert.Equal(t, 0, metrics[0].InputTokens)
assert.Equal(t, 0, metrics[0].OutputTokens)
})
} }
func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) { func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
@@ -437,7 +506,7 @@ func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
func TestMetricsMonitor_Concurrent(t *testing.T) { func TestMetricsMonitor_Concurrent(t *testing.T) {
t.Run("concurrent addMetrics is safe", func(t *testing.T) { t.Run("concurrent addMetrics is safe", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 1000) mm := newMetricsMonitor(testLogger, 1000, 0)
var wg sync.WaitGroup var wg sync.WaitGroup
numGoroutines := 10 numGoroutines := 10
@@ -464,7 +533,7 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
}) })
t.Run("concurrent reads and writes are safe", func(t *testing.T) { t.Run("concurrent reads and writes are safe", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 100) mm := newMetricsMonitor(testLogger, 100, 0)
done := make(chan bool) done := make(chan bool)
@@ -502,7 +571,7 @@ func TestMetricsMonitor_Concurrent(t *testing.T) {
func TestMetricsMonitor_ParseMetrics(t *testing.T) { func TestMetricsMonitor_ParseMetrics(t *testing.T) {
t.Run("prefers timings over usage data", func(t *testing.T) { t.Run("prefers timings over usage data", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
// Timings should take precedence over usage // Timings should take precedence over usage
responseBody := `{ responseBody := `{
@@ -542,7 +611,7 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
}) })
t.Run("handles missing cache_n in timings", func(t *testing.T) { t.Run("handles missing cache_n in timings", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `{ responseBody := `{
"timings": { "timings": {
@@ -577,7 +646,7 @@ func TestMetricsMonitor_ParseMetrics(t *testing.T) {
func TestMetricsMonitor_StreamingResponse(t *testing.T) { func TestMetricsMonitor_StreamingResponse(t *testing.T) {
t.Run("finds metrics in last valid SSE data", func(t *testing.T) { t.Run("finds metrics in last valid SSE data", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
// Metrics should be found in the last data line before [DONE] // Metrics should be found in the last data line before [DONE]
responseBody := `data: {"choices":[{"text":"First"}]} responseBody := `data: {"choices":[{"text":"First"}]}
@@ -611,7 +680,7 @@ data: [DONE]
}) })
t.Run("handles streaming with no valid JSON records minimal metrics", func(t *testing.T) { t.Run("handles streaming with no valid JSON records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `data: not json responseBody := `data: not json
@@ -641,7 +710,7 @@ data: [DONE]
}) })
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) { t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `` responseBody := ``
@@ -669,7 +738,7 @@ data: [DONE]
// Benchmark tests // Benchmark tests
func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) { func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
mm := newMetricsMonitor(testLogger, 1000) mm := newMetricsMonitor(testLogger, 1000, 0)
metric := TokenMetrics{ metric := TokenMetrics{
Model: "test-model", Model: "test-model",
@@ -690,7 +759,7 @@ func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) { func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
// Test performance with a smaller buffer where wrapping occurs more frequently // Test performance with a smaller buffer where wrapping occurs more frequently
mm := newMetricsMonitor(testLogger, 100) mm := newMetricsMonitor(testLogger, 100, 0)
metric := TokenMetrics{ metric := TokenMetrics{
Model: "test-model", Model: "test-model",
@@ -711,7 +780,7 @@ func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) { func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
t.Run("gzip encoded response", func(t *testing.T) { t.Run("gzip encoded response", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}` responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
@@ -745,7 +814,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
}) })
t.Run("deflate encoded response", func(t *testing.T) { t.Run("deflate encoded response", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `{"usage": {"prompt_tokens": 200, "completion_tokens": 75}}` responseBody := `{"usage": {"prompt_tokens": 200, "completion_tokens": 75}}`
@@ -779,7 +848,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
}) })
t.Run("invalid gzip data records minimal metrics", func(t *testing.T) { t.Run("invalid gzip data records minimal metrics", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
// Invalid compressed data // Invalid compressed data
invalidData := []byte("this is not gzip data") invalidData := []byte("this is not gzip data")
@@ -807,7 +876,7 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
}) })
t.Run("unknown encoding treated as uncompressed", func(t *testing.T) { t.Run("unknown encoding treated as uncompressed", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10) mm := newMetricsMonitor(testLogger, 10, 0)
responseBody := `{"usage": {"prompt_tokens": 300, "completion_tokens": 100}}` responseBody := `{"usage": {"prompt_tokens": 300, "completion_tokens": 100}}`
@@ -832,3 +901,228 @@ func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
assert.Equal(t, 100, metrics[0].OutputTokens) assert.Equal(t, 100, metrics[0].OutputTokens)
}) })
} }
func TestReqRespCapture_Size(t *testing.T) {
t.Run("calculates size correctly", func(t *testing.T) {
capture := ReqRespCapture{
ID: 1,
ReqPath: "/v1/chat/completions", // 20 bytes
ReqHeaders: map[string]string{
"Content-Type": "application/json", // 12 + 16 = 28
},
ReqBody: []byte("request body"), // 12 bytes
RespHeaders: map[string]string{
"X-Test": "value", // 6 + 5 = 11
},
RespBody: []byte("response body"), // 13 bytes
}
// Expected: 20 + 12 + 13 + 28 + 11 = 84
assert.Equal(t, 84, capture.Size())
})
t.Run("handles empty capture", func(t *testing.T) {
capture := ReqRespCapture{}
assert.Equal(t, 0, capture.Size())
})
}
func TestMetricsMonitor_AddCapture(t *testing.T) {
t.Run("does nothing when captures disabled", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
capture := ReqRespCapture{
ID: 0,
ReqBody: []byte("test"),
}
mm.addCapture(capture)
// Should not store capture
assert.Nil(t, mm.getCaptureByID(0))
})
t.Run("adds capture when enabled", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
capture := ReqRespCapture{
ID: 0,
ReqBody: []byte("test request"),
RespBody: []byte("test response"),
}
mm.addCapture(capture)
retrieved := mm.getCaptureByID(0)
assert.NotNil(t, retrieved)
assert.Equal(t, 0, retrieved.ID)
assert.Equal(t, []byte("test request"), retrieved.ReqBody)
assert.Equal(t, []byte("test response"), retrieved.RespBody)
})
t.Run("evicts oldest when exceeding max size", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
mm.maxCaptureSize = 100 // Set small limit for test
// Add captures that will exceed the limit
capture1 := ReqRespCapture{ID: 0, ReqBody: make([]byte, 40)}
capture2 := ReqRespCapture{ID: 1, ReqBody: make([]byte, 40)}
capture3 := ReqRespCapture{ID: 2, ReqBody: make([]byte, 40)}
mm.addCapture(capture1)
mm.addCapture(capture2)
// Adding capture3 should evict capture1
mm.addCapture(capture3)
assert.Nil(t, mm.getCaptureByID(0), "capture 0 should be evicted")
assert.NotNil(t, mm.getCaptureByID(1), "capture 1 should exist")
assert.NotNil(t, mm.getCaptureByID(2), "capture 2 should exist")
})
t.Run("skips capture larger than max size", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
mm.maxCaptureSize = 100
// Add a capture larger than max
largeCapture := ReqRespCapture{ID: 0, ReqBody: make([]byte, 200)}
mm.addCapture(largeCapture)
assert.Nil(t, mm.getCaptureByID(0), "oversized capture should not be stored")
})
}
func TestMetricsMonitor_GetCaptureByID(t *testing.T) {
t.Run("returns nil for non-existent ID", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
assert.Nil(t, mm.getCaptureByID(999))
})
t.Run("returns capture by ID", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
capture := ReqRespCapture{
ID: 42,
ReqBody: []byte("test"),
}
mm.addCapture(capture)
retrieved := mm.getCaptureByID(42)
assert.NotNil(t, retrieved)
assert.Equal(t, 42, retrieved.ID)
})
}
func TestRedactHeaders(t *testing.T) {
t.Run("redacts sensitive headers", func(t *testing.T) {
headers := map[string]string{
"Authorization": "Bearer secret-token",
"Proxy-Authorization": "Basic creds",
"Cookie": "session=abc123",
"Set-Cookie": "session=xyz789",
"X-Api-Key": "sk-12345",
"Content-Type": "application/json",
"X-Custom": "safe-value",
}
redactHeaders(headers)
assert.Equal(t, "[REDACTED]", headers["Authorization"])
assert.Equal(t, "[REDACTED]", headers["Proxy-Authorization"])
assert.Equal(t, "[REDACTED]", headers["Cookie"])
assert.Equal(t, "[REDACTED]", headers["Set-Cookie"])
assert.Equal(t, "[REDACTED]", headers["X-Api-Key"])
assert.Equal(t, "application/json", headers["Content-Type"])
assert.Equal(t, "safe-value", headers["X-Custom"])
})
t.Run("handles mixed case header names", func(t *testing.T) {
headers := map[string]string{
"authorization": "Bearer token",
"COOKIE": "session=abc",
"x-api-key": "key123",
}
redactHeaders(headers)
assert.Equal(t, "[REDACTED]", headers["authorization"])
assert.Equal(t, "[REDACTED]", headers["COOKIE"])
assert.Equal(t, "[REDACTED]", headers["x-api-key"])
})
t.Run("handles empty headers", func(t *testing.T) {
headers := map[string]string{}
redactHeaders(headers)
assert.Empty(t, headers)
})
}
func TestMetricsMonitor_WrapHandler_Capture(t *testing.T) {
t.Run("captures request and response when enabled", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 5)
requestBody := `{"model": "test", "prompt": "hello"}`
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.Header().Set("Content-Type", "application/json")
w.Header().Set("X-Custom", "header-value")
w.WriteHeader(http.StatusOK)
w.Write([]byte(responseBody))
return nil
}
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer secret")
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
assert.NoError(t, err)
// Check metric was recorded
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
metricID := metrics[0].ID
// Check capture was stored with same ID
capture := mm.getCaptureByID(metricID)
assert.NotNil(t, capture)
assert.Equal(t, metricID, capture.ID)
assert.Equal(t, []byte(requestBody), capture.ReqBody)
assert.Equal(t, []byte(responseBody), capture.RespBody)
assert.Equal(t, "/test", capture.ReqPath)
assert.Equal(t, "application/json", capture.ReqHeaders["Content-Type"])
assert.Equal(t, "[REDACTED]", capture.ReqHeaders["Authorization"])
assert.Equal(t, "application/json", capture.RespHeaders["Content-Type"])
assert.Equal(t, "header-value", capture.RespHeaders["X-Custom"])
})
t.Run("does not capture when disabled", func(t *testing.T) {
mm := newMetricsMonitor(testLogger, 10, 0)
requestBody := `{"model": "test"}`
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(responseBody))
return nil
}
req := httptest.NewRequest("POST", "/test", bytes.NewBufferString(requestBody))
rec := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(rec)
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
assert.NoError(t, err)
// Metrics should still be recorded
metrics := mm.getMetrics()
assert.Equal(t, 1, len(metrics))
// But no capture
capture := mm.getCaptureByID(metrics[0].ID)
assert.Nil(t, capture)
})
}
+66 -20
View File
@@ -28,6 +28,40 @@ const (
type proxyCtxKey string type proxyCtxKey string
type InflightCounter struct {
mu sync.Mutex
total int
}
func newInflightCounter() *InflightCounter {
return &InflightCounter{}
}
func (ic *InflightCounter) Current() int {
ic.mu.Lock()
total := ic.total
ic.mu.Unlock()
return total
}
func (ic *InflightCounter) Increment() int {
ic.mu.Lock()
ic.total++
total := ic.total
ic.mu.Unlock()
return total
}
func (ic *InflightCounter) Decrement() int {
ic.mu.Lock()
if ic.total > 0 {
ic.total--
}
total := ic.total
ic.mu.Unlock()
return total
}
type ProxyManager struct { type ProxyManager struct {
sync.Mutex sync.Mutex
@@ -43,6 +77,8 @@ type ProxyManager struct {
processGroups map[string]*ProcessGroup processGroups map[string]*ProcessGroup
inFlightCounter *InflightCounter
// shutdown signaling // shutdown signaling
shutdownCtx context.Context shutdownCtx context.Context
shutdownCancel context.CancelFunc shutdownCancel context.CancelFunc
@@ -151,10 +187,12 @@ func New(proxyConfig config.Config) *ProxyManager {
muxLogger: muxLogger, muxLogger: muxLogger,
upstreamLogger: upstreamLogger, upstreamLogger: upstreamLogger,
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics), metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics, proxyConfig.CaptureBuffer),
processGroups: make(map[string]*ProcessGroup), processGroups: make(map[string]*ProcessGroup),
inFlightCounter: newInflightCounter(),
shutdownCtx: shutdownCtx, shutdownCtx: shutdownCtx,
shutdownCancel: shutdownCancel, shutdownCancel: shutdownCancel,
@@ -276,37 +314,37 @@ func (pm *ProxyManager) setupGinEngine() {
// Set up routes using the Gin engine // Set up routes using the Gin engine
// Protected routes use pm.apiKeyAuth() middleware // Protected routes use pm.apiKeyAuth() middleware
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support legacy /v1/completions api, see issue #12 // Support legacy /v1/completions api, see issue #12
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570) // Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support anthropic count_tokens API (Also added in the above PR) // Support anthropic count_tokens API (Also added in the above PR)
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support embeddings and reranking // Support embeddings and reranking
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// llama-server's /reranking endpoint + aliases // llama-server's /reranking endpoint + aliases
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// llama-server's /infill endpoint for code infilling // llama-server's /infill endpoint for code infilling
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// llama-server's /completion endpoint // llama-server's /completion endpoint
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
// Support audio/speech endpoint // Support audio/speech endpoint
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyGETModelHandler) pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler) pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.proxyInferenceHandler) pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler) pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler) pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
@@ -325,7 +363,7 @@ func (pm *ProxyManager) setupGinEngine() {
pm.ginEngine.GET("/upstream", func(c *gin.Context) { pm.ginEngine.GET("/upstream", func(c *gin.Context) {
c.Redirect(http.StatusFound, "/ui/models") c.Redirect(http.StatusFound, "/ui/models")
}) })
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.proxyToUpstream) pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyToUpstream)
pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler) pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler)
pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler) pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler)
pm.ginEngine.GET("/health", func(c *gin.Context) { pm.ginEngine.GET("/health", func(c *gin.Context) {
@@ -389,6 +427,14 @@ func (pm *ProxyManager) setupGinEngine() {
gin.DisableConsoleColor() gin.DisableConsoleColor()
} }
func (pm *ProxyManager) trackInflight() gin.HandlerFunc {
return func(c *gin.Context) {
event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Increment()})
defer event.Emit(InFlightRequestsEvent{Total: pm.inFlightCounter.Decrement()})
c.Next()
}
}
// ServeHTTP implements http.Handler interface // ServeHTTP implements http.Handler interface
func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) { func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
pm.ginEngine.ServeHTTP(w, r) pm.ginEngine.ServeHTTP(w, r)
+40
View File
@@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"sort" "sort"
"strconv"
"strings" "strings"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
@@ -31,6 +32,7 @@ func addApiHandlers(pm *ProxyManager) {
apiGroup.GET("/events", pm.apiSendEvents) apiGroup.GET("/events", pm.apiSendEvents)
apiGroup.GET("/metrics", pm.apiGetMetrics) apiGroup.GET("/metrics", pm.apiGetMetrics)
apiGroup.GET("/version", pm.apiGetVersion) apiGroup.GET("/version", pm.apiGetVersion)
apiGroup.GET("/captures/:id", pm.apiGetCapture)
} }
} }
@@ -105,6 +107,7 @@ const (
msgTypeModelStatus messageType = "modelStatus" msgTypeModelStatus messageType = "modelStatus"
msgTypeLogData messageType = "logData" msgTypeLogData messageType = "logData"
msgTypeMetrics messageType = "metrics" msgTypeMetrics messageType = "metrics"
msgTypeInFlight messageType = "inflight"
) )
type messageEnvelope struct { type messageEnvelope struct {
@@ -164,6 +167,18 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
} }
} }
sendInFlight := func(total int) {
jsonData, err := json.Marshal(gin.H{"total": total})
if err == nil {
select {
case sendBuffer <- messageEnvelope{Type: msgTypeInFlight, Data: string(jsonData)}:
case <-ctx.Done():
return
default:
}
}
}
/** /**
* Send updated models list * Send updated models list
*/ */
@@ -191,11 +206,19 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
sendMetrics([]TokenMetrics{e.Metrics}) sendMetrics([]TokenMetrics{e.Metrics})
})() })()
/**
* Send in-flight request stats related to token stats "Waiting: N" count.
*/
defer event.On(func(e InFlightRequestsEvent) {
sendInFlight(e.Total)
})()
// send initial batch of data // send initial batch of data
sendLogData("proxy", pm.proxyLogger.GetHistory()) sendLogData("proxy", pm.proxyLogger.GetHistory())
sendLogData("upstream", pm.upstreamLogger.GetHistory()) sendLogData("upstream", pm.upstreamLogger.GetHistory())
sendModels() sendModels()
sendMetrics(pm.metricsMonitor.getMetrics()) sendMetrics(pm.metricsMonitor.getMetrics())
sendInFlight(pm.inFlightCounter.Current())
for { for {
select { select {
@@ -250,3 +273,20 @@ func (pm *ProxyManager) apiGetVersion(c *gin.Context) {
"build_date": pm.buildDate, "build_date": pm.buildDate,
}) })
} }
func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
idStr := c.Param("id")
id, err := strconv.Atoi(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid capture ID"})
return
}
capture := pm.metricsMonitor.getCaptureByID(id)
if capture == nil {
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
return
}
c.JSON(http.StatusOK, capture)
}
+7
View File
@@ -925,6 +925,7 @@
"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==", "integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1", "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
"debug": "^4.4.1", "debug": "^4.4.1",
@@ -1307,6 +1308,7 @@
"integrity": "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA==", "integrity": "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"undici-types": "~7.16.0" "undici-types": "~7.16.0"
} }
@@ -1439,6 +1441,7 @@
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"license": "MIT", "license": "MIT",
"peer": true,
"bin": { "bin": {
"acorn": "bin/acorn" "acorn": "bin/acorn"
}, },
@@ -3449,6 +3452,7 @@
"integrity": "sha512-e5lPJi/aui4TO1LpAXIRLySmwXSE8k3b9zoGfd42p67wzxog4WHjiZF3M2uheQih4DGyc25QEV4yRBbpueNiUA==", "integrity": "sha512-e5lPJi/aui4TO1LpAXIRLySmwXSE8k3b9zoGfd42p67wzxog4WHjiZF3M2uheQih4DGyc25QEV4yRBbpueNiUA==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@types/estree": "1.0.8" "@types/estree": "1.0.8"
}, },
@@ -3561,6 +3565,7 @@
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.5.tgz", "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.5.tgz",
"integrity": "sha512-NB3o70OxfmnE5UPyLr8uH3IV02Q43qJVAuWigYmsSOYsS0s/rHxP0TF81blG0onF/xkhNvZw4G8NfzIX+By5ZQ==", "integrity": "sha512-NB3o70OxfmnE5UPyLr8uH3IV02Q43qJVAuWigYmsSOYsS0s/rHxP0TF81blG0onF/xkhNvZw4G8NfzIX+By5ZQ==",
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@jridgewell/remapping": "^2.3.4", "@jridgewell/remapping": "^2.3.4",
"@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/sourcemap-codec": "^1.5.0",
@@ -3716,6 +3721,7 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"dev": true, "dev": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"peer": true,
"bin": { "bin": {
"tsc": "bin/tsc", "tsc": "bin/tsc",
"tsserver": "bin/tsserver" "tsserver": "bin/tsserver"
@@ -3894,6 +3900,7 @@
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"esbuild": "^0.25.0", "esbuild": "^0.25.0",
"fdir": "^6.4.4", "fdir": "^6.4.4",
+15 -5
View File
@@ -6,23 +6,28 @@
import Models from "./routes/Models.svelte"; import Models from "./routes/Models.svelte";
import Activity from "./routes/Activity.svelte"; import Activity from "./routes/Activity.svelte";
import Playground from "./routes/Playground.svelte"; import Playground from "./routes/Playground.svelte";
import PlaygroundStub from "./routes/PlaygroundStub.svelte";
import { enableAPIEvents } from "./stores/api"; import { enableAPIEvents } from "./stores/api";
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme"; import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
import { currentRoute } from "./stores/route";
const routes = { const routes = {
"/": Playground, "/": PlaygroundStub,
"/models": Models, "/models": Models,
"/logs": LogViewer, "/logs": LogViewer,
"/activity": Activity, "/activity": Activity,
"*": Playground, "*": PlaygroundStub,
}; };
// Sync theme to document attribute function handleRouteLoaded(event: { detail: { route: string | RegExp } }) {
const route = event.detail.route;
currentRoute.set(typeof route === "string" ? route : "/");
}
$effect(() => { $effect(() => {
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light"); document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
}); });
// Sync title to document
$effect(() => { $effect(() => {
const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}"; const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}";
document.title = `${icon} ${$appTitle}`; document.title = `${icon} ${$appTitle}`;
@@ -43,6 +48,11 @@
<Header /> <Header />
<main class="flex-1 overflow-auto p-4"> <main class="flex-1 overflow-auto p-4">
<Router {routes} /> <div class="h-full" class:hidden={$currentRoute !== "/"}>
<Playground />
</div>
<div class="h-full" class:hidden={$currentRoute === "/"}>
<Router {routes} on:routeLoaded={handleRouteLoaded} />
</div>
</main> </main>
</div> </div>
@@ -0,0 +1,452 @@
<script lang="ts">
import type { ReqRespCapture } from "../lib/types";
interface Props {
capture: ReqRespCapture | null;
open: boolean;
onclose: () => void;
}
let { capture, open, onclose }: Props = $props();
let dialogEl: HTMLDialogElement | undefined = $state();
type BodyTab = "raw" | "pretty" | "chat";
let reqBodyTab: BodyTab = $state("pretty");
let respBodyTab: BodyTab = $state("pretty");
let copiedReq = $state(false);
let copiedResp = $state(false);
$effect(() => {
if (open && dialogEl) {
dialogEl.showModal();
} else if (!open && dialogEl) {
dialogEl.close();
}
});
// Reset tabs when capture changes
$effect(() => {
if (capture) {
const reqCt = getContentType(capture.req_headers);
const respCt = getContentType(capture.resp_headers);
reqBodyTab = reqCt.includes("json") ? "pretty" : "raw";
respBodyTab = respCt.includes("text/event-stream")
? "chat"
: respCt.includes("json")
? "pretty"
: "raw";
}
});
function handleDialogClose() {
onclose();
}
function decodeBody(body: string | null | undefined): string {
if (!body) return "";
try {
const binary = atob(body);
const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
return new TextDecoder().decode(bytes);
} catch {
return body;
}
}
function formatJson(str: string): string {
try {
const parsed = JSON.parse(str);
return JSON.stringify(parsed, null, 2);
} catch {
return str;
}
}
function getContentType(
headers: Record<string, string> | null | undefined,
): string {
if (!headers) return "";
const ct = headers["Content-Type"] || headers["content-type"] || "";
return ct.toLowerCase();
}
function isImageContentType(contentType: string): boolean {
return contentType.startsWith("image/");
}
function isTextContentType(contentType: string): boolean {
return (
contentType.startsWith("text/") ||
contentType.includes("application/json") ||
contentType.includes("application/xml") ||
contentType.includes("application/javascript")
);
}
function getImageDataUrl(body: string, contentType: string): string {
const mimeType = contentType.split(";")[0].trim();
return `data:${mimeType};base64,${body}`;
}
interface SSEChat {
reasoning: string;
content: string;
}
function parseSSEChat(text: string): SSEChat {
const result: SSEChat = { reasoning: "", content: "" };
for (const line of text.split("\n")) {
const trimmed = line.trim();
if (!trimmed || !trimmed.startsWith("data: ")) continue;
const data = trimmed.slice(6);
if (data === "[DONE]") continue;
try {
const parsed = JSON.parse(data);
const delta = parsed.choices?.[0]?.delta;
if (delta?.content) result.content += delta.content;
if (delta?.reasoning_content) result.reasoning += delta.reasoning_content;
} catch {
// skip unparseable lines
}
}
return result;
}
async function copyToClipboard(text: string, type: "req" | "resp") {
try {
await navigator.clipboard.writeText(text);
if (type === "req") {
copiedReq = true;
setTimeout(() => (copiedReq = false), 1500);
} else {
copiedResp = true;
setTimeout(() => (copiedResp = false), 1500);
}
} catch {
// ignore
}
}
function getCopyText(): string {
if (respBodyTab === "chat") {
let text = "";
if (sseChat.reasoning) text += sseChat.reasoning + "\n\n";
text += sseChat.content;
return text;
}
return displayedResponseBody;
}
// Request body derivations
let requestContentType = $derived(
capture ? getContentType(capture.req_headers) : "",
);
let isRequestJson = $derived(requestContentType.includes("json"));
let requestBodyRaw = $derived.by(() => {
if (!capture) return "";
return decodeBody(capture.req_body);
});
let requestBodyPretty = $derived.by(() => {
if (!isRequestJson) return requestBodyRaw;
return formatJson(requestBodyRaw);
});
let displayedRequestBody = $derived(
reqBodyTab === "pretty" ? requestBodyPretty : requestBodyRaw,
);
// Response body derivations
let responseContentType = $derived(
capture ? getContentType(capture.resp_headers) : "",
);
let isResponseImage = $derived(isImageContentType(responseContentType));
let isResponseText = $derived(isTextContentType(responseContentType));
let isResponseJson = $derived(responseContentType.includes("json"));
let isSSE = $derived(responseContentType.includes("text/event-stream"));
let responseBodyRaw = $derived.by(() => {
if (!capture) return "";
return decodeBody(capture.resp_body);
});
let responseBodyPretty = $derived.by(() => {
if (!isResponseJson) return responseBodyRaw;
return formatJson(responseBodyRaw);
});
let sseChat = $derived.by(() => {
if (!isSSE || !responseBodyRaw)
return { reasoning: "", content: "" } as SSEChat;
return parseSSEChat(responseBodyRaw);
});
let displayedResponseBody = $derived.by(() => {
if (respBodyTab === "pretty") return responseBodyPretty;
return responseBodyRaw;
});
</script>
<dialog
bind:this={dialogEl}
onclose={handleDialogClose}
class="bg-surface text-txtmain rounded-lg shadow-xl max-w-4xl w-full max-h-[90vh] p-0 backdrop:bg-black/50 m-auto"
>
{#if capture}
<div class="flex flex-col max-h-[90vh]">
<div
class="flex justify-between items-center p-4 border-b border-card-border"
>
<h2 class="text-xl font-bold pb-0">Capture #{capture.id + 1}{#if capture.req_path} <span class="text-base font-mono font-normal text-txtsecondary">{capture.req_path}</span>{/if}</h2>
<button
onclick={() => dialogEl?.close()}
class="text-txtsecondary hover:text-txtmain text-2xl leading-none"
>
&times;
</button>
</div>
<div class="overflow-y-auto flex-1 p-4 space-y-4">
<!-- Request Headers -->
<details class="group" open>
<summary
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
>
Request Headers
</summary>
<div
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
>
<table class="w-full text-sm">
<tbody>
{#each Object.entries(capture.req_headers || {}) as [key, value]}
<tr class="border-b border-card-border-inner last:border-0">
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
>{key}</td
>
<td class="px-3 py-1 font-mono break-all">{value}</td>
</tr>
{/each}
</tbody>
</table>
</div>
</details>
<!-- Request Body -->
<details class="group" open>
<summary
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
>
Request Body
</summary>
{#if requestBodyRaw}
<div class="mt-2 flex items-center justify-between">
<div class="flex gap-1">
{#if isRequestJson}
<button
class="tab-btn"
class:tab-btn-active={reqBodyTab === "pretty"}
onclick={() => (reqBodyTab = "pretty")}>Pretty</button
>
<button
class="tab-btn"
class:tab-btn-active={reqBodyTab === "raw"}
onclick={() => (reqBodyTab = "raw")}>Raw</button
>
{/if}
</div>
<button
class="tab-btn"
onclick={() =>
copyToClipboard(displayedRequestBody, "req")}
>
{#if copiedReq}
Copied!
{:else}
Copy
{/if}
</button>
</div>
<div
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
>
<pre
class="p-3 text-sm font-mono whitespace-pre-wrap break-all">{displayedRequestBody}</pre>
</div>
{:else}
<div
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
>
<pre class="p-3 text-sm font-mono whitespace-pre-wrap break-all"
>(empty)</pre
>
</div>
{/if}
</details>
<!-- Response Headers -->
<details class="group" open>
<summary
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
>
Response Headers
</summary>
<div
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-48"
>
<table class="w-full text-sm">
<tbody>
{#each Object.entries(capture.resp_headers || {}) as [key, value]}
<tr class="border-b border-card-border-inner last:border-0">
<td class="px-3 py-1 font-mono text-primary whitespace-nowrap"
>{key}</td
>
<td class="px-3 py-1 font-mono break-all">{value}</td>
</tr>
{/each}
</tbody>
</table>
</div>
</details>
<!-- Response Body -->
<details class="group" open>
<summary
class="cursor-pointer font-semibold text-sm uppercase tracking-wider text-txtsecondary hover:text-txtmain"
>
Response Body
</summary>
{#if isResponseImage && capture.resp_body}
<div
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
>
<div class="p-3 flex justify-center">
<img
src={getImageDataUrl(capture.resp_body, responseContentType)}
alt="Response"
class="max-w-full h-auto"
/>
</div>
</div>
{:else if isSSE || isResponseText}
<div class="mt-2 flex items-center justify-between">
<div class="flex gap-1">
{#if isSSE}
<button
class="tab-btn"
class:tab-btn-active={respBodyTab === "chat"}
onclick={() => (respBodyTab = "chat")}>Chat</button
>
{/if}
{#if isResponseJson}
<button
class="tab-btn"
class:tab-btn-active={respBodyTab === "pretty"}
onclick={() => (respBodyTab = "pretty")}>Pretty</button
>
{/if}
{#if isSSE || isResponseJson}
<button
class="tab-btn"
class:tab-btn-active={respBodyTab === "raw"}
onclick={() => (respBodyTab = "raw")}>Raw</button
>
{/if}
</div>
<button
class="tab-btn"
onclick={() => copyToClipboard(getCopyText(), "resp")}
>
{#if copiedResp}
Copied!
{:else}
Copy
{/if}
</button>
</div>
<div
class="mt-1 bg-background rounded border border-card-border overflow-auto max-h-96"
>
{#if respBodyTab === "chat"}
<div class="p-3 text-sm space-y-3">
{#if sseChat.reasoning}
<div>
<div
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
>
Reasoning
</div>
<pre
class="font-mono whitespace-pre-wrap break-all text-txtsecondary">{sseChat.reasoning}</pre>
</div>
{/if}
{#if sseChat.content}
<div>
{#if sseChat.reasoning}
<div
class="text-xs font-semibold uppercase tracking-wider text-txtsecondary mb-1"
>
Response
</div>
{/if}
<pre
class="font-mono whitespace-pre-wrap break-all">{sseChat.content}</pre>
</div>
{/if}
{#if !sseChat.reasoning && !sseChat.content}
<pre class="font-mono">(empty)</pre>
{/if}
</div>
{:else}
<pre
class="p-3 text-sm font-mono whitespace-pre-wrap break-all">{displayedResponseBody || "(empty)"}</pre>
{/if}
</div>
{:else if responseBodyRaw}
<div
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
>
<div class="p-3 text-sm text-txtsecondary italic">
(binary data - {responseContentType || "unknown content type"})
</div>
</div>
{:else}
<div
class="mt-2 bg-background rounded border border-card-border overflow-auto max-h-96"
>
<pre class="p-3 text-sm font-mono">(empty)</pre>
</div>
{/if}
</details>
</div>
<div class="p-4 border-t border-card-border flex justify-end">
<button onclick={() => dialogEl?.close()} class="btn"> Close </button>
</div>
</div>
{/if}
</dialog>
<style>
.tab-btn {
padding: 2px 10px;
font-size: 0.75rem;
border-radius: 4px;
color: var(--color-txtsecondary);
cursor: pointer;
border: 1px solid transparent;
background: transparent;
transition: all 0.15s;
}
.tab-btn:hover {
color: var(--color-txtmain);
background: var(--color-secondary);
}
.tab-btn-active {
color: var(--color-primary);
background: color-mix(in srgb, var(--color-primary) 12%, transparent);
border-color: color-mix(in srgb, var(--color-primary) 25%, transparent);
}
</style>
+30 -8
View File
@@ -1,6 +1,8 @@
<script lang="ts"> <script lang="ts">
import { link, location } from "svelte-spa-router"; import { link } from "svelte-spa-router";
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme"; import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
import { currentRoute } from "../stores/route";
import { playgroundActivity } from "../stores/playgroundActivity";
import ConnectionStatus from "./ConnectionStatus.svelte"; import ConnectionStatus from "./ConnectionStatus.svelte";
function handleTitleChange(newTitle: string): void { function handleTitleChange(newTitle: string): void {
@@ -22,9 +24,10 @@
handleTitleChange(target.textContent || "(set title)"); handleTitleChange(target.textContent || "(set title)");
} }
function isActive(path: string, currentLocation: string): boolean { function isActive(path: string, current: string): boolean {
return path === "/" ? currentLocation === "/" : currentLocation.startsWith(path); return path === "/" ? current === "/" : current.startsWith(path);
} }
</script> </script>
<header <header
@@ -47,8 +50,7 @@
<a <a
href="/" href="/"
use:link use:link
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap" class="p-1 whitespace-nowrap {isActive('/', $currentRoute) ? 'font-semibold' : ''} {$playgroundActivity ? 'activity-link' : 'text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100'}"
class:font-semibold={isActive("/", $location)}
> >
Playground Playground
</a> </a>
@@ -56,7 +58,7 @@
href="/models" href="/models"
use:link use:link
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap" class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
class:font-semibold={isActive("/models", $location)} class:font-semibold={isActive("/models", $currentRoute)}
> >
Models Models
</a> </a>
@@ -64,7 +66,7 @@
href="/activity" href="/activity"
use:link use:link
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap" class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
class:font-semibold={isActive("/activity", $location)} class:font-semibold={isActive("/activity", $currentRoute)}
> >
Activity Activity
</a> </a>
@@ -72,7 +74,7 @@
href="/logs" href="/logs"
use:link use:link
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap" class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
class:font-semibold={isActive("/logs", $location)} class:font-semibold={isActive("/logs", $currentRoute)}
> >
Logs Logs
</a> </a>
@@ -96,3 +98,23 @@
<ConnectionStatus /> <ConnectionStatus />
</menu> </menu>
</header> </header>
<style>
.activity-link {
background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7, #8b5cf6, #6366f1);
background-size: 200% 100%;
-webkit-background-clip: text;
background-clip: text;
-webkit-text-fill-color: transparent;
animation: gradient-shift 2s linear infinite;
}
@keyframes gradient-shift {
0% {
background-position: 0% 50%;
}
100% {
background-position: 200% 50%;
}
}
</style>
+10 -3
View File
@@ -65,10 +65,17 @@
}); });
let preElement: HTMLPreElement; let preElement: HTMLPreElement;
let userScrolledUp = $state(false);
// Auto scroll to bottom when logs change function handleScroll() {
if (!preElement) return;
const { scrollTop, scrollHeight, clientHeight } = preElement;
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
}
// Auto scroll to bottom when logs change, unless user has scrolled up
$effect(() => { $effect(() => {
if (preElement && filteredLogs) { if (preElement && filteredLogs && !userScrolledUp) {
preElement.scrollTop = preElement.scrollHeight; preElement.scrollTop = preElement.scrollHeight;
} }
}); });
@@ -127,6 +134,6 @@
{/if} {/if}
</div> </div>
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden"> <div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
<pre bind:this={preElement} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre> <pre bind:this={preElement} onscroll={handleScroll} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
</div> </div>
</div> </div>
+24 -4
View File
@@ -1,5 +1,5 @@
<script lang="ts"> <script lang="ts">
import { metrics } from "../stores/api"; import { inFlightRequests, metrics } from "../stores/api";
import TokenHistogram from "./TokenHistogram.svelte"; import TokenHistogram from "./TokenHistogram.svelte";
interface HistogramData { interface HistogramData {
@@ -15,7 +15,14 @@
let stats = $derived.by(() => { let stats = $derived.by(() => {
const totalRequests = $metrics.length; const totalRequests = $metrics.length;
if (totalRequests === 0) { if (totalRequests === 0) {
return { totalRequests: 0, totalInputTokens: 0, totalOutputTokens: 0, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null }; return {
totalRequests: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
inFlightRequests: $inFlightRequests,
tokenStats: { p99: "0", p95: "0", p50: "0" },
histogramData: null,
};
} }
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0); const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
@@ -24,7 +31,14 @@
// Calculate token statistics using output_tokens and duration_ms // Calculate token statistics using output_tokens and duration_ms
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0); const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
if (validMetrics.length === 0) { if (validMetrics.length === 0) {
return { totalRequests, totalInputTokens, totalOutputTokens, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null }; return {
totalRequests,
totalInputTokens,
totalOutputTokens,
inFlightRequests: $inFlightRequests,
tokenStats: { p99: "0", p95: "0", p50: "0" },
histogramData: null,
};
} }
// Calculate tokens/second for each valid metric // Calculate tokens/second for each valid metric
@@ -63,6 +77,7 @@
totalRequests, totalRequests,
totalInputTokens, totalInputTokens,
totalOutputTokens, totalOutputTokens,
inFlightRequests: $inFlightRequests,
tokenStats: { tokenStats: {
p99: p99.toFixed(2), p99: p99.toFixed(2),
p95: p95.toFixed(2), p95: p95.toFixed(2),
@@ -95,7 +110,12 @@
<tbody class="bg-surface divide-y divide-card-border-inner"> <tbody class="bg-surface divide-y divide-card-border-inner">
<tr class="hover:bg-secondary"> <tr class="hover:bg-secondary">
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">{stats.totalRequests}</td> <td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">
<div class="flex flex-col gap-1">
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Completed: {nf.format(stats.totalRequests)}</span>
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Waiting: {nf.format(stats.inFlightRequests)}</span>
</div>
</td>
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10"> <td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
<div class="flex items-center gap-2"> <div class="flex items-center gap-2">
@@ -2,6 +2,7 @@
import { models } from "../../stores/api"; import { models } from "../../stores/api";
import { persistentStore } from "../../stores/persistent"; import { persistentStore } from "../../stores/persistent";
import { transcribeAudio } from "../../lib/audioApi"; import { transcribeAudio } from "../../lib/audioApi";
import { playgroundStores } from "../../stores/playgroundActivity";
import ModelSelector from "./ModelSelector.svelte"; import ModelSelector from "./ModelSelector.svelte";
const selectedModelStore = persistentStore<string>("playground-audio-model", ""); const selectedModelStore = persistentStore<string>("playground-audio-model", "");
@@ -22,6 +23,10 @@
let canTranscribe = $derived(selectedFile !== null && $selectedModelStore !== "" && !isTranscribing); let canTranscribe = $derived(selectedFile !== null && $selectedModelStore !== "" && !isTranscribing);
$effect(() => {
playgroundStores.audioTranscribing.set(isTranscribing);
});
function validateFile(file: File): { valid: boolean; error?: string } { function validateFile(file: File): { valid: boolean; error?: string } {
const ext = '.' + file.name.split('.').pop()?.toLowerCase(); const ext = '.' + file.name.split('.').pop()?.toLowerCase();
@@ -2,6 +2,7 @@
import { models } from "../../stores/api"; import { models } from "../../stores/api";
import { persistentStore } from "../../stores/persistent"; import { persistentStore } from "../../stores/persistent";
import { streamChatCompletion } from "../../lib/chatApi"; import { streamChatCompletion } from "../../lib/chatApi";
import { playgroundStores } from "../../stores/playgroundActivity";
import type { ChatMessage, ContentPart } from "../../lib/types"; import type { ChatMessage, ContentPart } from "../../lib/types";
import ChatMessageComponent from "./ChatMessage.svelte"; import ChatMessageComponent from "./ChatMessage.svelte";
import ModelSelector from "./ModelSelector.svelte"; import ModelSelector from "./ModelSelector.svelte";
@@ -11,7 +12,16 @@
const systemPromptStore = persistentStore<string>("playground-system-prompt", ""); const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
const temperatureStore = persistentStore<number>("playground-temperature", 0.7); const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
let messages = $state<ChatMessage[]>([]); function loadMessages(): ChatMessage[] {
try {
const saved = localStorage.getItem("playground-messages");
return saved ? JSON.parse(saved) : [];
} catch {
return [];
}
}
let messages = $state<ChatMessage[]>(loadMessages());
let userInput = $state(""); let userInput = $state("");
let isStreaming = $state(false); let isStreaming = $state(false);
let isReasoning = $state(false); let isReasoning = $state(false);
@@ -24,21 +34,52 @@
let imageError = $state<string | null>(null); let imageError = $state<string | null>(null);
let hasModels = $derived($models.some((m) => !m.unlisted)); let hasModels = $derived($models.some((m) => !m.unlisted));
let userScrolledUp = $state(false);
// Auto-scroll when messages change
$effect(() => { $effect(() => {
if (messages.length > 0 && messagesContainer) { playgroundStores.chatStreaming.set(isStreaming);
});
function handleMessagesScroll() {
if (!messagesContainer) return;
const { scrollTop, scrollHeight, clientHeight } = messagesContainer;
// Consider "at bottom" if within 40px of the bottom
userScrolledUp = scrollHeight - scrollTop - clientHeight > 40;
}
// Auto-scroll when messages change — skip if user scrolled up
$effect(() => {
if (messages.length > 0 && messagesContainer && !userScrolledUp) {
messagesContainer.scrollTo({ messagesContainer.scrollTo({
top: messagesContainer.scrollHeight, top: messagesContainer.scrollHeight,
behavior: "smooth", behavior: isStreaming ? "instant" : "smooth",
}); });
} }
}); });
// Persist messages to localStorage (throttled to once per 2s)
let lastSaveTime = 0;
$effect(() => {
const json = JSON.stringify(messages);
const elapsed = Date.now() - lastSaveTime;
const save = () => {
try { localStorage.setItem("playground-messages", json); } catch {}
lastSaveTime = Date.now();
};
if (elapsed >= 2000) {
save();
return;
}
const timer = setTimeout(save, 2000 - elapsed);
return () => clearTimeout(timer);
});
async function sendMessage() { async function sendMessage() {
const trimmedInput = userInput.trim(); const trimmedInput = userInput.trim();
if ((!trimmedInput && attachedImages.length === 0) || !$selectedModelStore || isStreaming) return; if ((!trimmedInput && attachedImages.length === 0) || !$selectedModelStore || isStreaming) return;
userScrolledUp = false;
// Build message content (multimodal if images attached) // Build message content (multimodal if images attached)
let content: string | ContentPart[]; let content: string | ContentPart[];
if (attachedImages.length > 0) { if (attachedImages.length > 0) {
@@ -321,6 +362,7 @@
<div <div
class="flex-1 overflow-y-auto mb-4 px-2" class="flex-1 overflow-y-auto mb-4 px-2"
bind:this={messagesContainer} bind:this={messagesContainer}
onscroll={handleMessagesScroll}
> >
{#if messages.length === 0} {#if messages.length === 0}
<div class="h-full flex items-center justify-center text-txtsecondary"> <div class="h-full flex items-center justify-center text-txtsecondary">
@@ -1,5 +1,6 @@
<script lang="ts"> <script lang="ts">
import { renderMarkdown, escapeHtml } from "../../lib/markdown"; import { renderMarkdown, escapeHtml, renderStreamingMarkdown, createStreamingCache } from "../../lib/markdown";
import type { RenderedBlock } from "../../lib/markdown";
import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte"; import { Copy, Check, Pencil, X, Save, RefreshCw, ChevronDown, ChevronRight, Brain, Code } from "lucide-svelte";
import { getTextContent, getImageUrls } from "../../lib/types"; import { getTextContent, getImageUrls } from "../../lib/types";
import type { ContentPart } from "../../lib/types"; import type { ContentPart } from "../../lib/types";
@@ -22,11 +23,17 @@
let hasImages = $derived(imageUrls.length > 0); let hasImages = $derived(imageUrls.length > 0);
let canEdit = $derived(onEdit !== undefined && !hasImages); let canEdit = $derived(onEdit !== undefined && !hasImages);
let renderedContent = $derived( let streamingCache = createStreamingCache();
role === "assistant" && !isStreaming let renderedParts = $derived.by(() => {
? renderMarkdown(textContent) if (role !== "assistant") {
: escapeHtml(textContent).replace(/\n/g, '<br>') return { blocks: [{ id: -1, html: escapeHtml(textContent).replace(/\n/g, '<br>') }] as RenderedBlock[], pendingHtml: "" };
); }
if (!isStreaming) {
streamingCache = createStreamingCache();
return { blocks: [{ id: -1, html: renderMarkdown(textContent) }] as RenderedBlock[], pendingHtml: "" };
}
return renderStreamingMarkdown(textContent, streamingCache);
});
let copied = $state(false); let copied = $state(false);
let showRaw = $state(false); let showRaw = $state(false);
let isEditing = $state(false); let isEditing = $state(false);
@@ -113,9 +120,9 @@
<div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4"> <div class="flex {role === 'user' ? 'justify-end' : 'justify-start'} mb-4">
<div <div
class="relative group max-w-[85%] rounded-lg px-4 py-2 {role === 'user' class="relative group rounded-lg px-4 py-2 {role === 'user'
? 'bg-primary text-btn-primary-text' ? 'max-w-[85%] bg-primary text-btn-primary-text'
: 'bg-surface border border-gray-200 dark:border-white/10'}" : 'w-full sm:w-4/5 bg-surface border border-gray-200 dark:border-white/10'}"
> >
{#if role === "assistant"} {#if role === "assistant"}
{#if reasoning_content || isReasoning} {#if reasoning_content || isReasoning}
@@ -168,7 +175,10 @@
<div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div> <div class="whitespace-pre-wrap font-mono text-sm">{textContent}</div>
{:else} {:else}
<div class="prose prose-sm dark:prose-invert max-w-none"> <div class="prose prose-sm dark:prose-invert max-w-none">
{@html renderedContent} {#each renderedParts.blocks as block (block.id)}
{@html block.html}
{/each}
{@html renderedParts.pendingHtml}
{#if isStreaming && !isReasoning} {#if isStreaming && !isReasoning}
<span class="inline-block w-2 h-4 bg-current animate-pulse ml-0.5"></span> <span class="inline-block w-2 h-4 bg-current animate-pulse ml-0.5"></span>
{/if} {/if}
@@ -2,6 +2,7 @@
import { models } from "../../stores/api"; import { models } from "../../stores/api";
import { persistentStore } from "../../stores/persistent"; import { persistentStore } from "../../stores/persistent";
import { generateImage } from "../../lib/imageApi"; import { generateImage } from "../../lib/imageApi";
import { playgroundStores } from "../../stores/playgroundActivity";
import ModelSelector from "./ModelSelector.svelte"; import ModelSelector from "./ModelSelector.svelte";
import ExpandableTextarea from "./ExpandableTextarea.svelte"; import ExpandableTextarea from "./ExpandableTextarea.svelte";
@@ -17,6 +18,10 @@
let hasModels = $derived($models.some((m) => !m.unlisted)); let hasModels = $derived($models.some((m) => !m.unlisted));
$effect(() => {
playgroundStores.imageGenerating.set(isGenerating);
});
async function generate() { async function generate() {
const trimmedPrompt = prompt.trim(); const trimmedPrompt = prompt.trim();
if (!trimmedPrompt || !$selectedModelStore || isGenerating) return; if (!trimmedPrompt || !$selectedModelStore || isGenerating) return;
@@ -2,6 +2,7 @@
import { models } from "../../stores/api"; import { models } from "../../stores/api";
import { persistentStore } from "../../stores/persistent"; import { persistentStore } from "../../stores/persistent";
import { generateSpeech } from "../../lib/speechApi"; import { generateSpeech } from "../../lib/speechApi";
import { playgroundStores } from "../../stores/playgroundActivity";
import ModelSelector from "./ModelSelector.svelte"; import ModelSelector from "./ModelSelector.svelte";
import ExpandableTextarea from "./ExpandableTextarea.svelte"; import ExpandableTextarea from "./ExpandableTextarea.svelte";
@@ -20,11 +21,9 @@
let availableVoices = $state<string[]>(["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]); let availableVoices = $state<string[]>(["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]);
let isLoadingVoices = $state(false); let isLoadingVoices = $state(false);
// Default voices to fall back to if API call fails
const defaultVoices = ["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"]; const defaultVoices = ["coral", "alloy", "echo", "fable", "onyx", "nova", "shimmer"];
const CACHE_KEY = "playground-speech-voices-cache"; const CACHE_KEY = "playground-speech-voices-cache";
// Load voices cache from localStorage
function getVoicesCache(): Record<string, string[]> { function getVoicesCache(): Record<string, string[]> {
if (typeof window === "undefined") return {}; if (typeof window === "undefined") return {};
try { try {
@@ -35,7 +34,6 @@
} }
} }
// Save voices cache to localStorage
function saveVoicesCache(cache: Record<string, string[]>) { function saveVoicesCache(cache: Record<string, string[]>) {
if (typeof window === "undefined") return; if (typeof window === "undefined") return;
try { try {
@@ -47,9 +45,12 @@
let hasModels = $derived($models.some((m) => !m.unlisted)); let hasModels = $derived($models.some((m) => !m.unlisted));
// Track if this is the initial page load to avoid fetching on refresh
let isInitialLoad = $state(true); let isInitialLoad = $state(true);
$effect(() => {
playgroundStores.speechGenerating.set(isGenerating);
});
// On page load, restore cached voices for the selected model if available // On page load, restore cached voices for the selected model if available
$effect(() => { $effect(() => {
const model = $selectedModelStore; const model = $selectedModelStore;
+264 -1
View File
@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest"; import { describe, it, expect } from "vitest";
import { renderMarkdown, escapeHtml } from "./markdown"; import { renderMarkdown, escapeHtml, splitCompleteBlocks, closePendingBlock, normalizeLatexDelimiters, renderStreamingMarkdown, createStreamingCache } from "./markdown";
describe("renderMarkdown", () => { describe("renderMarkdown", () => {
describe("basic markdown", () => { describe("basic markdown", () => {
@@ -130,6 +130,35 @@ More text here.
expect(result).toContain("katex"); expect(result).toContain("katex");
expect(result).toContain("sqrt"); expect(result).toContain("sqrt");
}); });
it("renders \\[...\\] display math", () => {
const result = renderMarkdown("\\[\nx^2 + y^2 = z^2\n\\]");
expect(result).toContain("katex");
});
it("renders \\(...\\) inline math", () => {
const result = renderMarkdown("The equation \\(E = mc^2\\) is famous.");
expect(result).toContain("katex");
});
});
describe("normalizeLatexDelimiters", () => {
it("converts \\[...\\] to $$...$$", () => {
expect(normalizeLatexDelimiters("\\[\nx^2\n\\]")).toBe("$$\nx^2\n$$");
});
it("converts \\(...\\) to $...$", () => {
expect(normalizeLatexDelimiters("\\(x^2\\)")).toBe("$x^2$");
});
it("leaves $$ and $ delimiters unchanged", () => {
const text = "$$x^2$$ and $y$";
expect(normalizeLatexDelimiters(text)).toBe(text);
});
it("handles multiple occurrences", () => {
expect(normalizeLatexDelimiters("\\(a\\) and \\(b\\)")).toBe("$a$ and $b$");
});
}); });
describe("escapeHtml", () => { describe("escapeHtml", () => {
@@ -158,3 +187,237 @@ More text here.
}); });
}); });
}); });
describe("splitCompleteBlocks", () => {
it("returns everything as pending when no blank line", () => {
const result = splitCompleteBlocks("Hello world");
expect(result.complete).toBe("");
expect(result.pending).toBe("Hello world");
});
it("returns empty for empty input", () => {
const result = splitCompleteBlocks("");
expect(result.complete).toBe("");
expect(result.pending).toBe("");
});
it("splits on blank line between paragraphs", () => {
const result = splitCompleteBlocks("First paragraph.\n\nSecond paragraph");
expect(result.complete).toBe("First paragraph.\n");
expect(result.pending).toBe("Second paragraph");
});
it("splits multiple paragraphs at last blank line", () => {
const result = splitCompleteBlocks("Para 1.\n\nPara 2.\n\nPara 3");
expect(result.complete).toBe("Para 1.\n\nPara 2.\n");
expect(result.pending).toBe("Para 3");
});
it("treats closed code fence as complete boundary", () => {
const text = "```js\nconst x = 1;\n```\nMore text";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("```js\nconst x = 1;\n```");
expect(result.pending).toBe("More text");
});
it("treats unclosed code fence as pending", () => {
const text = "Done paragraph.\n\n```js\nconst x = 1;";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("Done paragraph.\n");
expect(result.pending).toBe("```js\nconst x = 1;");
});
it("does not split on blank lines inside code fences", () => {
const text = "```\nline1\n\nline2\n```";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("```\nline1\n\nline2\n```");
expect(result.pending).toBe("");
});
it("handles tilde fences", () => {
const text = "~~~py\nprint('hi')\n~~~\nAfter";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("~~~py\nprint('hi')\n~~~");
expect(result.pending).toBe("After");
});
it("does not close backtick fence with tilde fence", () => {
const text = "```\ncode\n~~~\nstill code";
const result = splitCompleteBlocks(text);
// The ~~~ should not close a backtick fence, so everything from ``` onward is pending
expect(result.complete).toBe("");
expect(result.pending).toBe("```\ncode\n~~~\nstill code");
});
it("treats closed math block as complete boundary", () => {
const text = "$$\nx^2\n$$\nAfter";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("$$\nx^2\n$$");
expect(result.pending).toBe("After");
});
it("treats unclosed math block as pending", () => {
const text = "Before.\n\n$$\nx^2";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("Before.\n");
expect(result.pending).toBe("$$\nx^2");
});
it("treats closed \\[...\\] math block as complete boundary", () => {
const text = "\\[\nx^2\n\\]\nAfter";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("\\[\nx^2\n\\]");
expect(result.pending).toBe("After");
});
it("treats unclosed \\[ math block as pending", () => {
const text = "Before.\n\n\\[\nx^2";
const result = splitCompleteBlocks(text);
expect(result.complete).toBe("Before.\n");
expect(result.pending).toBe("\\[\nx^2");
});
it("handles trailing blank line making everything complete", () => {
const text = "Hello world.\n";
const result = splitCompleteBlocks(text);
// Last line is empty string after split, which is a blank line
expect(result.complete).toBe("Hello world.\n");
expect(result.pending).toBe("");
});
});
describe("closePendingBlock", () => {
it("returns empty string for empty input", () => {
expect(closePendingBlock("")).toBe("");
});
it("returns plain text unchanged", () => {
expect(closePendingBlock("Hello world")).toBe("Hello world");
});
it("closes an open backtick code fence", () => {
const result = closePendingBlock("```python\nprint('hi')");
expect(result).toBe("```python\nprint('hi')\n```");
});
it("closes an open tilde code fence", () => {
const result = closePendingBlock("~~~js\nconst x = 1;");
expect(result).toBe("~~~js\nconst x = 1;\n~~~");
});
it("does not modify already-closed code fence", () => {
const text = "```py\ncode\n```";
expect(closePendingBlock(text)).toBe(text);
});
it("closes an open math block", () => {
const result = closePendingBlock("$$\nx^2 + y^2");
expect(result).toBe("$$\nx^2 + y^2\n$$");
});
it("does not modify already-closed math block", () => {
const text = "$$\nx^2\n$$";
expect(closePendingBlock(text)).toBe(text);
});
it("closes an open \\[ math block with \\]", () => {
const result = closePendingBlock("\\[\nx^2 + y^2");
expect(result).toBe("\\[\nx^2 + y^2\n\\]");
});
it("does not modify already-closed \\[...\\] math block", () => {
const text = "\\[\nx^2\n\\]";
expect(closePendingBlock(text)).toBe(text);
});
it("closes code fence when preceded by regular text", () => {
const result = closePendingBlock("Some text\n```\ncode");
expect(result).toBe("Some text\n```\ncode\n```");
});
it("leaves headers unchanged", () => {
expect(closePendingBlock("## Hello")).toBe("## Hello");
});
it("leaves tables unchanged", () => {
const table = "| a | b |\n| --- | --- |\n| 1 | 2 |";
expect(closePendingBlock(table)).toBe(table);
});
it("leaves lists unchanged", () => {
expect(closePendingBlock("- item 1\n- item 2")).toBe("- item 1\n- item 2");
});
});
describe("renderStreamingMarkdown", () => {
it("renders complete blocks and pending as markdown", () => {
const cache = createStreamingCache();
const text = "# Hello\n\nWorld";
const { blocks, pendingHtml } = renderStreamingMarkdown(text, cache);
expect(blocks).toHaveLength(1);
expect(blocks[0].html).toContain("<h1>Hello</h1>");
expect(pendingHtml).toContain("World");
expect(pendingHtml).toContain("<p>");
});
it("preserves existing blocks when complete portion is unchanged", () => {
const cache = createStreamingCache();
renderStreamingMarkdown("# Hello\n\nWor", cache);
const firstBlocks = cache.blocks;
const { blocks } = renderStreamingMarkdown("# Hello\n\nWorld", cache);
// Same block array reference — nothing changed in the complete section
expect(blocks).toBe(firstBlocks);
expect(cache.completeKey).toBe("# Hello\n");
});
it("appends a new block when a new section completes", () => {
const cache = createStreamingCache();
renderStreamingMarkdown("# Hello\n\nParagraph", cache);
expect(cache.blocks).toHaveLength(1);
const firstBlock = cache.blocks[0];
renderStreamingMarkdown("# Hello\n\nParagraph.\n\nMore", cache);
expect(cache.blocks).toHaveLength(2);
// First block is preserved with the same id and html
expect(cache.blocks[0].id).toBe(firstBlock.id);
expect(cache.blocks[0].html).toBe(firstBlock.html);
// Second block contains the new paragraph
expect(cache.blocks[1].html).toContain("Paragraph.");
});
it("assigns unique stable ids to each block", () => {
const cache = createStreamingCache();
renderStreamingMarkdown("A.\n\nB.\n\nC", cache);
expect(cache.blocks).toHaveLength(1);
const id0 = cache.blocks[0].id;
renderStreamingMarkdown("A.\n\nB.\n\nC.\n\nD", cache);
expect(cache.blocks).toHaveLength(2);
expect(cache.blocks[0].id).toBe(id0);
expect(cache.blocks[1].id).toBe(id0 + 1);
});
it("renders pending code block with syntax highlighting", () => {
const cache = createStreamingCache();
const text = "Done.\n\n```python\nprint('hello')";
const { pendingHtml } = renderStreamingMarkdown(text, cache);
expect(pendingHtml).toContain("<code");
expect(pendingHtml).toContain("hljs");
});
it("renders pending table as markdown", () => {
const cache = createStreamingCache();
const text = "Done.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |";
const { pendingHtml } = renderStreamingMarkdown(text, cache);
expect(pendingHtml).toContain("<table>");
expect(pendingHtml).toContain("<td>");
});
it("renders pending portion through markdown pipeline", () => {
const cache = createStreamingCache();
const text = "Done.\n\nSome **bold** text";
const { pendingHtml } = renderStreamingMarkdown(text, cache);
expect(pendingHtml).toContain("<strong>bold</strong>");
});
});
+177 -1
View File
@@ -69,13 +69,189 @@ const processor = unified()
.use(rehypeHighlight) .use(rehypeHighlight)
.use(rehypeStringify, { allowDangerousHtml: true }); .use(rehypeStringify, { allowDangerousHtml: true });
export function splitCompleteBlocks(text: string): { complete: string; pending: string } {
if (!text) {
return { complete: "", pending: "" };
}
const lines = text.split("\n");
let lastCompleteBoundary = -1; // index of last line that ends a complete block
let inFence = false;
let fenceChar = "";
let inMathBlock = false;
for (let i = 0; i < lines.length; i++) {
const trimmed = lines[i].trimEnd();
if (inFence) {
// Check for closing fence: same character, at least 3, no other content
if (new RegExp(`^\\s*${fenceChar.replace(/~/g, "\\~")}{3,}\\s*$`).test(trimmed)) {
inFence = false;
fenceChar = "";
lastCompleteBoundary = i;
}
continue;
}
if (inMathBlock) {
if (trimmed === "$$" || trimmed === "\\]") {
inMathBlock = false;
lastCompleteBoundary = i;
}
continue;
}
// Check for opening fence
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
if (fenceMatch) {
// Check if it's an opening fence (may have language info after)
// A line with just ``` or ~~~ could be opening or closing, but since we're not in a fence it's opening
fenceChar = fenceMatch[2][0]; // '`' or '~'
inFence = true;
continue;
}
// Check for opening math block
if (trimmed === "$$" || trimmed === "\\[") {
inMathBlock = true;
continue;
}
// Outside fences/math: blank line marks a complete boundary
if (trimmed === "") {
lastCompleteBoundary = i;
}
}
if (lastCompleteBoundary < 0) {
return { complete: "", pending: text };
}
const completeLines = lines.slice(0, lastCompleteBoundary + 1);
const pendingLines = lines.slice(lastCompleteBoundary + 1);
return {
complete: completeLines.join("\n"),
pending: pendingLines.join("\n"),
};
}
export function closePendingBlock(pending: string): string {
if (!pending) return "";
const lines = pending.split("\n");
let inFence = false;
let fenceStr = "";
let inMathBlock = false;
let mathClose = "";
for (const line of lines) {
const trimmed = line.trimEnd();
if (inFence) {
if (new RegExp(`^\\s*${fenceStr[0] === "~" ? "~~~" : "\\`\\`\\`"}\\s*$`).test(trimmed)) {
inFence = false;
fenceStr = "";
}
continue;
}
if (inMathBlock) {
if (trimmed === "$$" || trimmed === "\\]") {
inMathBlock = false;
mathClose = "";
}
continue;
}
const fenceMatch = trimmed.match(/^(\s*)(```|~~~)/);
if (fenceMatch) {
fenceStr = fenceMatch[2];
inFence = true;
continue;
}
if (trimmed === "$$") {
inMathBlock = true;
mathClose = "$$";
continue;
}
if (trimmed === "\\[") {
inMathBlock = true;
mathClose = "\\]";
continue;
}
}
if (inFence) return pending + "\n" + fenceStr;
if (inMathBlock) return pending + "\n" + mathClose;
return pending;
}
export interface RenderedBlock {
id: number;
html: string;
}
export interface StreamingCache {
blocks: RenderedBlock[];
nextId: number;
completeKey: string;
}
export function createStreamingCache(): StreamingCache {
return { blocks: [], nextId: 0, completeKey: "" };
}
export function renderStreamingMarkdown(
text: string,
cache: StreamingCache,
): { blocks: RenderedBlock[]; pendingHtml: string } {
const { complete, pending } = splitCompleteBlocks(text);
if (complete) {
if (cache.completeKey !== complete) {
if (complete.startsWith(cache.completeKey) && cache.completeKey.length > 0) {
// Complete section grew — render only the new part as a new block
const newPart = complete.slice(cache.completeKey.length);
cache.blocks = [...cache.blocks, { id: cache.nextId++, html: renderMarkdown(newPart) }];
} else {
// Complete section changed unexpectedly — re-render as single block
cache.blocks = [{ id: cache.nextId++, html: renderMarkdown(complete) }];
}
cache.completeKey = complete;
}
} else if (cache.blocks.length > 0) {
cache.blocks = [];
cache.completeKey = "";
}
let pendingHtml = "";
if (pending) {
const closed = closePendingBlock(pending);
pendingHtml = renderMarkdown(closed);
}
return { blocks: cache.blocks, pendingHtml };
}
// Convert \[...\] to $$...$$ and \(...\) to $...$
export function normalizeLatexDelimiters(text: string): string {
// Display math: \[...\] → $$...$$ (may span multiple lines)
text = text.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => `$$${inner}$$`);
// Inline math: \(...\) → $...$
text = text.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => `$${inner}$`);
return text;
}
export function renderMarkdown(content: string): string { export function renderMarkdown(content: string): string {
if (!content) { if (!content) {
return ""; return "";
} }
try { try {
const result = processor.processSync(content); const result = processor.processSync(normalizeLatexDelimiters(content));
return String(result); return String(result);
} catch { } catch {
// Fallback to escaped plain text if markdown parsing fails // Fallback to escaped plain text if markdown parsing fails
+15 -1
View File
@@ -21,6 +21,16 @@ export interface Metrics {
prompt_per_second: number; prompt_per_second: number;
tokens_per_second: number; tokens_per_second: number;
duration_ms: number; duration_ms: number;
has_capture: boolean;
}
export interface ReqRespCapture {
id: number;
req_path: string;
req_headers: Record<string, string>;
req_body: string; // base64 encoded bytes
resp_headers: Record<string, string>;
resp_body: string; // base64 encoded bytes
} }
export interface LogData { export interface LogData {
@@ -28,8 +38,12 @@ export interface LogData {
data: string; data: string;
} }
export interface InFlightStats {
total: number;
}
export interface APIEventEnvelope { export interface APIEventEnvelope {
type: "modelStatus" | "logData" | "metrics"; type: "modelStatus" | "logData" | "metrics" | "inflight";
data: string; data: string;
} }
+38 -1
View File
@@ -1,6 +1,8 @@
<script lang="ts"> <script lang="ts">
import { metrics } from "../stores/api"; import { metrics, getCapture } from "../stores/api";
import Tooltip from "../components/Tooltip.svelte"; import Tooltip from "../components/Tooltip.svelte";
import CaptureDialog from "../components/CaptureDialog.svelte";
import type { ReqRespCapture } from "../lib/types";
function formatSpeed(speed: number): string { function formatSpeed(speed: number): string {
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s"; return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
@@ -38,6 +40,25 @@
} }
let sortedMetrics = $derived([...$metrics].sort((a, b) => b.id - a.id)); let sortedMetrics = $derived([...$metrics].sort((a, b) => b.id - a.id));
let selectedCapture = $state<ReqRespCapture | null>(null);
let dialogOpen = $state(false);
let loadingCaptureId = $state<number | null>(null);
async function viewCapture(id: number) {
loadingCaptureId = id;
const capture = await getCapture(id);
loadingCaptureId = null;
if (capture) {
selectedCapture = capture;
dialogOpen = true;
}
}
function closeDialog() {
dialogOpen = false;
selectedCapture = null;
}
</script> </script>
<div class="p-2"> <div class="p-2">
@@ -65,6 +86,7 @@
<th class="px-6 py-3">Prompt Processing</th> <th class="px-6 py-3">Prompt Processing</th>
<th class="px-6 py-3">Generation Speed</th> <th class="px-6 py-3">Generation Speed</th>
<th class="px-6 py-3">Duration</th> <th class="px-6 py-3">Duration</th>
<th class="px-6 py-3">Capture</th>
</tr> </tr>
</thead> </thead>
<tbody class="divide-y"> <tbody class="divide-y">
@@ -79,6 +101,19 @@
<td class="px-6 py-4">{formatSpeed(metric.prompt_per_second)}</td> <td class="px-6 py-4">{formatSpeed(metric.prompt_per_second)}</td>
<td class="px-6 py-4">{formatSpeed(metric.tokens_per_second)}</td> <td class="px-6 py-4">{formatSpeed(metric.tokens_per_second)}</td>
<td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td> <td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
<td class="px-6 py-4">
{#if metric.has_capture}
<button
onclick={() => viewCapture(metric.id)}
disabled={loadingCaptureId === metric.id}
class="btn btn--sm"
>
{loadingCaptureId === metric.id ? "..." : "View"}
</button>
{:else}
<span class="text-txtsecondary">-</span>
{/if}
</td>
</tr> </tr>
{/each} {/each}
</tbody> </tbody>
@@ -86,3 +121,5 @@
</div> </div>
{/if} {/if}
</div> </div>
<CaptureDialog capture={selectedCapture} open={dialogOpen} onclose={closeDialog} />
@@ -0,0 +1 @@
<!-- empty: real Playground is always mounted in App.svelte -->
+25 -1
View File
@@ -1,5 +1,5 @@
import { writable } from "svelte/store"; import { writable } from "svelte/store";
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope } from "../lib/types"; import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture, InFlightStats } from "../lib/types";
import { connectionState } from "./theme"; import { connectionState } from "./theme";
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */ const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
@@ -9,6 +9,7 @@ export const models = writable<Model[]>([]);
export const proxyLogs = writable<string>(""); export const proxyLogs = writable<string>("");
export const upstreamLogs = writable<string>(""); export const upstreamLogs = writable<string>("");
export const metrics = writable<Metrics[]>([]); export const metrics = writable<Metrics[]>([]);
export const inFlightRequests = writable<number>(0);
export const versionInfo = writable<VersionInfo>({ export const versionInfo = writable<VersionInfo>({
build_date: "unknown", build_date: "unknown",
commit: "unknown", commit: "unknown",
@@ -29,6 +30,7 @@ export function enableAPIEvents(enabled: boolean): void {
apiEventSource?.close(); apiEventSource?.close();
apiEventSource = null; apiEventSource = null;
metrics.set([]); metrics.set([]);
inFlightRequests.set(0);
return; return;
} }
@@ -46,6 +48,7 @@ export function enableAPIEvents(enabled: boolean): void {
proxyLogs.set(""); proxyLogs.set("");
upstreamLogs.set(""); upstreamLogs.set("");
metrics.set([]); metrics.set([]);
inFlightRequests.set(0);
models.set([]); models.set([]);
retryCount = 0; retryCount = 0;
connectionState.set("connected"); connectionState.set("connected");
@@ -83,6 +86,11 @@ export function enableAPIEvents(enabled: boolean): void {
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]); metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
break; break;
} }
case "inflight": {
const stats = JSON.parse(message.data) as InFlightStats;
inFlightRequests.set(stats.total ?? 0);
break;
}
} }
} catch (err) { } catch (err) {
console.error(e.data, err); console.error(e.data, err);
@@ -172,3 +180,19 @@ export async function loadModel(model: string): Promise<void> {
throw error; throw error;
} }
} }
export async function getCapture(id: number): Promise<ReqRespCapture | null> {
try {
const response = await fetch(`/api/captures/${id}`);
if (response.status === 404) {
return null;
}
if (!response.ok) {
throw new Error(`Failed to fetch capture: ${response.status}`);
}
return await response.json();
} catch (error) {
console.error("Failed to fetch capture:", error);
return null;
}
}
@@ -0,0 +1,18 @@
import { writable, derived } from "svelte/store";
const chatStreaming = writable(false);
const imageGenerating = writable(false);
const speechGenerating = writable(false);
const audioTranscribing = writable(false);
export const playgroundActivity = derived(
[chatStreaming, imageGenerating, speechGenerating, audioTranscribing],
([$chat, $image, $speech, $audio]) => $chat || $image || $speech || $audio
);
export const playgroundStores = {
chatStreaming,
imageGenerating,
speechGenerating,
audioTranscribing,
};
+3
View File
@@ -0,0 +1,3 @@
import { writable } from "svelte/store";
export const currentRoute = writable("/");