Compare commits
36 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5de387dbf9 | |||
| 6f8e7ccb57 | |||
| 4384315b44 | |||
| 6439ab1515 | |||
| f94226122c | |||
| 7493618fdc | |||
| 205efd40a1 | |||
| 14207f8492 | |||
| 4e850c2834 | |||
| 75fced579e | |||
| b73f367f22 | |||
| 8f2137c72b | |||
| 124007cc98 | |||
| eb5bfff0b0 | |||
| 3edb180c08 | |||
| 66d555e625 | |||
| 4f863fd9fc | |||
| 267c030457 | |||
| c19309fe7e | |||
| 4413881b2d | |||
| 8df5e8563b | |||
| 7931212d3e | |||
| 3dc36032fb | |||
| addb98646f | |||
| 37d74efc2d | |||
| 22e098ac8b | |||
| 9864f9f517 | |||
| 53b32f3601 | |||
| 565c44766d | |||
| e6a9e210ba | |||
| d3f329f924 | |||
| 98879b38c1 | |||
| 7b3b0f5eae | |||
| 021ccceef1 | |||
| f03871c50a | |||
| dc00d17abe |
@@ -8,8 +8,15 @@ reviews:
|
|||||||
poem: false
|
poem: false
|
||||||
review_status: true
|
review_status: true
|
||||||
collapse_walkthrough: false
|
collapse_walkthrough: false
|
||||||
|
sequence_diagrams: false
|
||||||
|
finishing_touches:
|
||||||
|
docstrings:
|
||||||
|
enabled: false
|
||||||
auto_review:
|
auto_review:
|
||||||
enabled: true
|
enabled: true
|
||||||
drafts: false
|
drafts: false
|
||||||
chat:
|
chat:
|
||||||
auto_reply: true
|
auto_reply: true
|
||||||
|
issue_enrichment:
|
||||||
|
planning:
|
||||||
|
enabled: false
|
||||||
|
|||||||
@@ -10,17 +10,36 @@ on:
|
|||||||
# Allows manual triggering of the workflow
|
# Allows manual triggering of the workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
|
# Run on workflow file changes (without pushing)
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/containers.yml'
|
||||||
|
- 'docker/build-container.sh'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-and-push:
|
build-and-push:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
platform: [intel, cuda, vulkan, cpu, musa]
|
platform: [intel, cuda, vulkan, cpu, musa, rocm]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Free up disk space
|
||||||
|
if: matrix.platform == 'rocm'
|
||||||
|
run: |
|
||||||
|
echo "Before cleanup:"
|
||||||
|
df -h
|
||||||
|
sudo rm -rf /usr/share/dotnet
|
||||||
|
sudo rm -rf /usr/local/lib/android
|
||||||
|
sudo rm -rf /opt/ghc
|
||||||
|
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
||||||
|
sudo docker system prune -af
|
||||||
|
echo "After cleanup:"
|
||||||
|
df -h
|
||||||
|
|
||||||
- name: Log in to GitHub Container Registry
|
- name: Log in to GitHub Container Registry
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v2
|
||||||
with:
|
with:
|
||||||
@@ -31,7 +50,7 @@ jobs:
|
|||||||
- name: Run build-container
|
- name: Run build-container
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
run: ./docker/build-container.sh ${{ matrix.platform }} true
|
run: ./docker/build-container.sh ${{ matrix.platform }} ${{ github.event_name != 'push' }}
|
||||||
|
|
||||||
# note make sure mostlygeek/llama-swap has admin rights to the llama-swap package
|
# note make sure mostlygeek/llama-swap has admin rights to the llama-swap package
|
||||||
# see: https://github.com/actions/delete-package-versions/issues/74
|
# see: https://github.com/actions/delete-package-versions/issues/74
|
||||||
|
|||||||
@@ -3,13 +3,13 @@ name: goreleaser
|
|||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
tags:
|
tags:
|
||||||
- '*'
|
- "*"
|
||||||
|
|
||||||
# Allows manual triggering of the workflow
|
# Allows manual triggering of the workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
tag:
|
tag:
|
||||||
description: 'Tag version to release (e.g. v144)'
|
description: "Tag version to release (e.g. v144)"
|
||||||
required: true
|
required: true
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
@@ -19,35 +19,30 @@ jobs:
|
|||||||
goreleaser:
|
goreleaser:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
-
|
- name: Checkout
|
||||||
name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
ref: ${{ github.event.inputs.tag || github.ref }}
|
ref: ${{ github.event.inputs.tag || github.ref }}
|
||||||
-
|
- name: Set up Go
|
||||||
name: Set up Go
|
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v5
|
||||||
-
|
- name: Set up Node.js
|
||||||
name: Set up Node.js
|
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: '23'
|
node-version: "23"
|
||||||
-
|
- name: Install dependencies and build UI
|
||||||
name: Install dependencies and build UI
|
|
||||||
run: |
|
run: |
|
||||||
cd ui
|
cd ui-svelte
|
||||||
npm ci
|
npm ci
|
||||||
npm run build
|
npm run build
|
||||||
|
|
||||||
-
|
- name: Run GoReleaser
|
||||||
name: Run GoReleaser
|
|
||||||
uses: goreleaser/goreleaser-action@v6
|
uses: goreleaser/goreleaser-action@v6
|
||||||
with:
|
with:
|
||||||
# either 'goreleaser' (default) or 'goreleaser-pro'
|
# either 'goreleaser' (default) or 'goreleaser-pro'
|
||||||
distribution: goreleaser
|
distribution: goreleaser
|
||||||
# 'latest', 'nightly', or a semver
|
# 'latest', 'nightly', or a semver
|
||||||
version: '~> v2'
|
version: "~> v2"
|
||||||
args: release --clean
|
args: release --clean
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@@ -76,4 +71,4 @@ jobs:
|
|||||||
"release": {
|
"release": {
|
||||||
"tag_name": "${{ steps.tag.outputs.tag }}"
|
"tag_name": "${{ steps.tag.outputs.tag }}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
# Project: llama-swap
|
|
||||||
|
|
||||||
## Project Description:
|
## Project Description:
|
||||||
|
|
||||||
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
|
||||||
@@ -7,37 +5,45 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
|
|||||||
## Tech stack
|
## Tech stack
|
||||||
|
|
||||||
- golang
|
- golang
|
||||||
- typescript, vite and react for UI (ui/)
|
- typescript, vite and react for UI (located in ui/)
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
- `make test-dev` - Use this when making iterative changes. Runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
|
||||||
- `make test-all` - runs at the end before completing work. Includes long running concurrency tests.
|
|
||||||
|
|
||||||
## Workflow Tasks
|
## Workflow Tasks
|
||||||
|
|
||||||
### Plan Improvements
|
- when summarizing changes only include details that require further action
|
||||||
|
- just say "Done." when there is no further action
|
||||||
|
- use `gh` to create PRs and load issues
|
||||||
|
- do include Co-Authored-By or created by when committing changes or creating PRs
|
||||||
|
- keep PR descriptions short and focused on changes.
|
||||||
|
- never include a test plan
|
||||||
|
|
||||||
Work plans are located in ai-plans/. Plans written by the user may be incomplete, contain inconsistencies or errors.
|
## Testing
|
||||||
|
|
||||||
When the user asks to improve a plan follow these guidelines for expanding and improving it.
|
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
|
||||||
|
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
|
||||||
|
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||||
|
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||||
|
|
||||||
- Identify any inconsistencies.
|
### Commit message example format:
|
||||||
- Expand plans out to be detailed specification of requirements and changes to be made.
|
|
||||||
- Plans should have at least these sections:
|
|
||||||
- Title - very short, describes changes
|
|
||||||
- Overview: A more detailed summary of goal and outcomes desired
|
|
||||||
- Design Requirements: Detailed descriptions of what needs to be done
|
|
||||||
- Testing Plan: Tests to be implemented
|
|
||||||
- Checklist: A detailed list of changes to be made
|
|
||||||
|
|
||||||
Look for "plan expansion" as explicit instructions to improve a plan.
|
```
|
||||||
|
proxy: add new feature
|
||||||
|
|
||||||
### Implementation of plans
|
Add new feature that implements functionality X and Y.
|
||||||
|
|
||||||
When the user says "paint it", respond with "commencing automated assembly". Then implement the changes as described by the plan. Update the checklist as you complete items.
|
- key change 1
|
||||||
|
- key change 2
|
||||||
|
- key change 3
|
||||||
|
|
||||||
## General Rules
|
fixes #123
|
||||||
|
```
|
||||||
|
|
||||||
- when summarizing changes only include details that require further action (action items)
|
## Code Reviews
|
||||||
- when there are no action items, just say "Done."
|
|
||||||
|
- use three levels High, Medium, Low severity
|
||||||
|
- label each discovered issue with a label like H1, M2, L3 respectively
|
||||||
|
- High severity are must fix issues (security, race conditions, critical bugs)
|
||||||
|
- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
|
||||||
|
- Low severity are nice to have changes and nits
|
||||||
|
- Include a suggestion with each discovered item
|
||||||
|
- Limit your code review to three items with the highest priority first
|
||||||
|
- Double check your discovered items and recommended remediations
|
||||||
|
|||||||
@@ -36,11 +36,11 @@ test-all: proxy/ui_dist/placeholder.txt
|
|||||||
go test -race -count=1 ./proxy/...
|
go test -race -count=1 ./proxy/...
|
||||||
|
|
||||||
ui/node_modules:
|
ui/node_modules:
|
||||||
cd ui && npm install
|
cd ui-svelte && npm install
|
||||||
|
|
||||||
# build react UI
|
# build react UI
|
||||||
ui: ui/node_modules
|
ui: ui/node_modules
|
||||||
cd ui && npm run build
|
cd ui-svelte && npm run build
|
||||||
|
|
||||||
# Build OSX binary
|
# Build OSX binary
|
||||||
mac: ui
|
mac: ui
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||

|

|
||||||

|

|
||||||

|

|
||||||

|

|
||||||
@@ -13,14 +13,21 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
|
|
||||||
- ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
|
- ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
|
||||||
- ✅ On-demand model switching
|
- ✅ On-demand model switching
|
||||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc.)
|
||||||
- future proof, upgrade your inference servers at any time.
|
- future proof, upgrade your inference servers at any time.
|
||||||
- ✅ OpenAI API supported endpoints:
|
- ✅ OpenAI API supported endpoints:
|
||||||
- `v1/completions`
|
- `v1/completions`
|
||||||
- `v1/chat/completions`
|
- `v1/chat/completions`
|
||||||
|
- `v1/responses`
|
||||||
- `v1/embeddings`
|
- `v1/embeddings`
|
||||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||||
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
||||||
|
- `v1/audio/voices`
|
||||||
|
- `v1/images/generations`
|
||||||
|
- `v1/images/edits`
|
||||||
|
- ✅ Anthropic API supported endpoints:
|
||||||
|
- `v1/messages`
|
||||||
|
- `v1/messages/count_tokens`
|
||||||
- ✅ llama-server (llama.cpp) supported endpoints
|
- ✅ llama-server (llama.cpp) supported endpoints
|
||||||
- `v1/rerank`, `v1/reranking`, `/rerank`
|
- `v1/rerank`, `v1/reranking`, `/rerank`
|
||||||
- `/infill` - for code infilling
|
- `/infill` - for code infilling
|
||||||
@@ -32,6 +39,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
- `/running` - list currently running models ([#61](https://github.com/mostlygeek/llama-swap/issues/61))
|
- `/running` - list currently running models ([#61](https://github.com/mostlygeek/llama-swap/issues/61))
|
||||||
- `/log` - remote log monitoring
|
- `/log` - remote log monitoring
|
||||||
- `/health` - just returns "OK"
|
- `/health` - just returns "OK"
|
||||||
|
- ✅ API Key support - define keys to restrict access to API endpoints
|
||||||
- ✅ Customizable
|
- ✅ Customizable
|
||||||
- Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
|
- Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
|
||||||
- Automatic unloading of models after timeout by setting a `ttl`
|
- Automatic unloading of models after timeout by setting a `ttl`
|
||||||
@@ -44,7 +52,6 @@ llama-swap includes a real time web interface for monitoring logs and controllin
|
|||||||
|
|
||||||
<img width="1164" height="745" alt="image" src="https://github.com/user-attachments/assets/bacf3f9d-819f-430b-9ed2-1bfaa8d54579" />
|
<img width="1164" height="745" alt="image" src="https://github.com/user-attachments/assets/bacf3f9d-819f-430b-9ed2-1bfaa8d54579" />
|
||||||
|
|
||||||
|
|
||||||
The Activity Page shows recent requests:
|
The Activity Page shows recent requests:
|
||||||
|
|
||||||
<img width="1360" height="963" alt="image" src="https://github.com/user-attachments/assets/5f3edee6-d03a-4ae5-ae06-b20ac1f135bd" />
|
<img width="1360" height="963" alt="image" src="https://github.com/user-attachments/assets/5f3edee6-d03a-4ae5-ae06-b20ac1f135bd" />
|
||||||
@@ -61,7 +68,7 @@ llama-swap can be installed in multiple ways
|
|||||||
|
|
||||||
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
||||||
|
|
||||||
Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc).
|
Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda
|
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda
|
||||||
@@ -71,6 +78,14 @@ $ docker run -it --rm --runtime nvidia -p 9292:8080 \
|
|||||||
-v /path/to/models:/models \
|
-v /path/to/models:/models \
|
||||||
-v /path/to/custom/config.yaml:/app/config.yaml \
|
-v /path/to/custom/config.yaml:/app/config.yaml \
|
||||||
ghcr.io/mostlygeek/llama-swap:cuda
|
ghcr.io/mostlygeek/llama-swap:cuda
|
||||||
|
|
||||||
|
# configuration hot reload supported with a
|
||||||
|
# directory volume mount
|
||||||
|
$ docker run -it --rm --runtime nvidia -p 9292:8080 \
|
||||||
|
-v /path/to/models:/models \
|
||||||
|
-v /path/to/custom/config.yaml:/app/config.yaml \
|
||||||
|
-v /path/to/config:/config \
|
||||||
|
ghcr.io/mostlygeek/llama-swap:cuda -config /config/config.yaml -watch-config
|
||||||
```
|
```
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
@@ -89,6 +104,9 @@ docker pull ghcr.io/mostlygeek/llama-swap:musa
|
|||||||
# tagged llama-swap, platform and llama-server version images
|
# tagged llama-swap, platform and llama-server version images
|
||||||
docker pull ghcr.io/mostlygeek/llama-swap:v166-cuda-b6795
|
docker pull ghcr.io/mostlygeek/llama-swap:v166-cuda-b6795
|
||||||
|
|
||||||
|
# non-root cuda
|
||||||
|
docker pull ghcr.io/mostlygeek/llama-swap:cuda-non-root
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
@@ -191,23 +209,26 @@ As a safeguard, llama-swap also sets `X-Accel-Buffering: no` on SSE responses. H
|
|||||||
|
|
||||||
## Monitoring Logs on the CLI
|
## Monitoring Logs on the CLI
|
||||||
|
|
||||||
```shell
|
```sh
|
||||||
# sends up to the last 10KB of logs
|
# sends up to the last 10KB of logs
|
||||||
curl http://host/logs'
|
$ curl http://host/logs
|
||||||
|
|
||||||
# streams combined logs
|
# streams combined logs
|
||||||
curl -Ns 'http://host/logs/stream'
|
curl -Ns http://host/logs/stream
|
||||||
|
|
||||||
# just llama-swap's logs
|
# stream llama-swap's proxy status logs
|
||||||
curl -Ns 'http://host/logs/stream/proxy'
|
curl -Ns http://host/logs/stream/proxy
|
||||||
|
|
||||||
# just upstream's logs
|
# stream logs from upstream processes that llama-swap loads
|
||||||
curl -Ns 'http://host/logs/stream/upstream'
|
curl -Ns http://host/logs/stream/upstream
|
||||||
|
|
||||||
|
# stream logs only from a specific model
|
||||||
|
curl -Ns http://host/logs/stream/{model_id}
|
||||||
|
|
||||||
# stream and filter logs with linux pipes
|
# stream and filter logs with linux pipes
|
||||||
curl -Ns http://host/logs/stream | grep 'eval time'
|
curl -Ns http://host/logs/stream | grep 'eval time'
|
||||||
|
|
||||||
# skips history and just streams new log entries
|
# appending ?no-history will disable sending buffered history first
|
||||||
curl -Ns 'http://host/logs/stream?no-history'
|
curl -Ns 'http://host/logs/stream?no-history'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,85 @@
|
|||||||
|
# Replace ring.Ring with Efficient Circular Byte Buffer
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Replace the inefficient `container/ring.Ring` implementation in `logMonitor.go` with a simple circular byte buffer that uses a single contiguous `[]byte` slice. This eliminates per-write allocations, improves cache locality, and correctly implements a 10KB buffer.
|
||||||
|
|
||||||
|
## Current Issues
|
||||||
|
|
||||||
|
1. `ring.New(10 * 1024)` creates 10,240 ring **elements**, not 10KB of storage
|
||||||
|
2. Every `Write()` call allocates a new `[]byte` slice inside the lock
|
||||||
|
3. `GetHistory()` iterates all 10,240 elements and appends repeatedly (geometric reallocs)
|
||||||
|
4. Linked list structure has poor cache locality and pointer overhead
|
||||||
|
|
||||||
|
## Design Requirements
|
||||||
|
|
||||||
|
### New CircularBuffer Type
|
||||||
|
|
||||||
|
Create a simple circular byte buffer with:
|
||||||
|
- Single pre-allocated `[]byte` of fixed capacity (10KB)
|
||||||
|
- `head` and `size` integers to track write position and data length
|
||||||
|
- No per-write allocations
|
||||||
|
|
||||||
|
### API Requirements
|
||||||
|
|
||||||
|
The new buffer must support:
|
||||||
|
1. **Write(p []byte)** - Append bytes, overwriting oldest data when full
|
||||||
|
2. **GetHistory() []byte** - Return all buffered data in correct order (oldest to newest)
|
||||||
|
|
||||||
|
### Implementation Details
|
||||||
|
|
||||||
|
```go
|
||||||
|
type circularBuffer struct {
|
||||||
|
data []byte // pre-allocated capacity
|
||||||
|
head int // next write position
|
||||||
|
size int // current number of bytes stored (0 to cap)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Write logic:**
|
||||||
|
- If `len(p) >= capacity`: just keep the last `capacity` bytes
|
||||||
|
- Otherwise: write bytes at `head`, wrapping around if needed
|
||||||
|
- Update `head` and `size` accordingly
|
||||||
|
- Data is copied into the internal buffer (not stored by reference)
|
||||||
|
|
||||||
|
**GetHistory logic:**
|
||||||
|
- Calculate start position: `(head - size + cap) % cap`
|
||||||
|
- If not wrapped: single slice copy
|
||||||
|
- If wrapped: two copies (end of buffer + beginning)
|
||||||
|
- Returns a **new slice** (copy), not a view into internal buffer
|
||||||
|
|
||||||
|
### Immutability Guarantees (must preserve)
|
||||||
|
|
||||||
|
Per existing tests:
|
||||||
|
1. Modifying input `[]byte` after `Write()` must not affect stored data
|
||||||
|
2. `GetHistory()` returns independent copy - modifications don't affect buffer
|
||||||
|
|
||||||
|
## Files to Modify
|
||||||
|
|
||||||
|
- `proxy/logMonitor.go` - Replace `buffer *ring.Ring` with new circular buffer
|
||||||
|
|
||||||
|
## Testing Plan
|
||||||
|
|
||||||
|
Existing tests in `logMonitor_test.go` should continue to pass:
|
||||||
|
- `TestLogMonitor` - Basic write/read and subscriber notification
|
||||||
|
- `TestWrite_ImmutableBuffer` - Verify writes don't affect returned history
|
||||||
|
- `TestWrite_LogTimeFormat` - Timestamp formatting
|
||||||
|
|
||||||
|
Add new tests:
|
||||||
|
- Test buffer wrap-around behavior
|
||||||
|
- Test large writes that exceed buffer capacity
|
||||||
|
- Test exact capacity boundary conditions
|
||||||
|
|
||||||
|
## Checklist
|
||||||
|
|
||||||
|
- [ ] Create `circularBuffer` struct in `logMonitor.go`
|
||||||
|
- [ ] Implement `Write()` method for circular buffer
|
||||||
|
- [ ] Implement `GetHistory()` method for circular buffer
|
||||||
|
- [ ] Update `LogMonitor` struct to use new buffer
|
||||||
|
- [ ] Update `NewLogMonitorWriter()` to initialize new buffer
|
||||||
|
- [ ] Update `LogMonitor.Write()` to use new buffer
|
||||||
|
- [ ] Update `LogMonitor.GetHistory()` to use new buffer
|
||||||
|
- [ ] Remove `"container/ring"` import
|
||||||
|
- [ ] Run `make test-dev` to verify existing tests pass
|
||||||
|
- [ ] Add wrap-around test case
|
||||||
|
- [ ] Run `make test-all` for final validation
|
||||||
@@ -188,11 +188,17 @@
|
|||||||
"default": "",
|
"default": "",
|
||||||
"pattern": "^[a-zA-Z0-9_, ]*$",
|
"pattern": "^[a-zA-Z0-9_, ]*$",
|
||||||
"description": "Comma separated list of parameters to remove from the request. Used for server-side enforcement of sampling parameters."
|
"description": "Comma separated list of parameters to remove from the request. Used for server-side enforcement of sampling parameters."
|
||||||
|
},
|
||||||
|
"setParams": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"default": {},
|
||||||
|
"description": "Dictionary of parameters to set/override in requests. Useful for enforcing specific parameter values. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"default": {},
|
"default": {},
|
||||||
"description": "Dictionary of filter settings. Only stripParams is supported."
|
"description": "Dictionary of filter settings. Supports stripParams and setParams."
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -273,6 +279,78 @@
|
|||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"description": "A dictionary of event triggers and actions. Only supported hook is on_startup."
|
"description": "A dictionary of event triggers and actions. Only supported hook is on_startup."
|
||||||
|
},
|
||||||
|
"logToStdout": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"proxy",
|
||||||
|
"upstream",
|
||||||
|
"both",
|
||||||
|
"none"
|
||||||
|
],
|
||||||
|
"default": "proxy",
|
||||||
|
"description": "Controls what is logged to stdout. 'proxy': logs generated by llama-swap, 'upstream': copy of upstream process stdout logs, 'both': both interleaved together, 'none': no logs written to stdout."
|
||||||
|
},
|
||||||
|
"apiKeys": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"default": [],
|
||||||
|
"description": "Require an API key when making requests to inference endpoints. When empty, authorization will not be checked. Each key is a non-empty string."
|
||||||
|
},
|
||||||
|
"peers": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "object",
|
||||||
|
"required": [
|
||||||
|
"proxy",
|
||||||
|
"models"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"proxy": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "uri",
|
||||||
|
"description": "A valid base URL to proxy requests to. Requested path to llama-swap will be appended to the end of the proxy value."
|
||||||
|
},
|
||||||
|
"apiKey": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "",
|
||||||
|
"description": "A string key to be injected into the request. If blank, no key will be added. Key will be injected into headers: Authorization: Bearer <key> and x-api-key: <key>."
|
||||||
|
},
|
||||||
|
"models": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1
|
||||||
|
},
|
||||||
|
"description": "A list of models served by the peer."
|
||||||
|
},
|
||||||
|
"filters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"stripParams": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "",
|
||||||
|
"pattern": "^[a-zA-Z0-9_, ]*$",
|
||||||
|
"description": "Comma separated list of parameters to remove from the request. Useful for removing parameters that the peer doesn't support."
|
||||||
|
},
|
||||||
|
"setParams": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true,
|
||||||
|
"default": {},
|
||||||
|
"description": "Dictionary of parameters to set/override in requests to this peer. Useful for injecting provider-specific settings. Protected params like 'model' cannot be overridden. Values can be strings, numbers, booleans, arrays, or objects."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"default": {},
|
||||||
|
"description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"default": {},
|
||||||
|
"description": "A dictionary of remote peers and models they provide. Peers can be another llama-swap or any server that provides the /v1/ generative API endpoints supported by llama-swap."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,6 +34,16 @@ logLevel: info
|
|||||||
# - For more info, read: https://pkg.go.dev/time#pkg-constants
|
# - For more info, read: https://pkg.go.dev/time#pkg-constants
|
||||||
logTimeFormat: ""
|
logTimeFormat: ""
|
||||||
|
|
||||||
|
# logToStdout: controls what is logged to stdout
|
||||||
|
# - optional, default: "proxy"
|
||||||
|
# - valid values:
|
||||||
|
# - "proxy": logs generated by llama-swap when swapping models,
|
||||||
|
# handling requests, etc.
|
||||||
|
# - "upstream": a copy of an upstream processes stdout logs
|
||||||
|
# - "both": both the proxy and upstream logs interleaved together
|
||||||
|
# - "none": no logs are ever written to stdout
|
||||||
|
logToStdout: "proxy"
|
||||||
|
|
||||||
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
||||||
# - optional, default: 1000
|
# - optional, default: 1000
|
||||||
# - controls how many metrics are stored in memory before older ones are discarded
|
# - controls how many metrics are stored in memory before older ones are discarded
|
||||||
@@ -70,6 +80,9 @@ includeAliasesInList: false
|
|||||||
# - macro names must not be a reserved name: PORT or MODEL_ID
|
# - macro names must not be a reserved name: PORT or MODEL_ID
|
||||||
# - macro values can be numbers, bools, or strings
|
# - macro values can be numbers, bools, or strings
|
||||||
# - macros can contain other macros, but they must be defined before they are used
|
# - macros can contain other macros, but they must be defined before they are used
|
||||||
|
# - environment variables can be referenced with ${env.VAR_NAME} syntax
|
||||||
|
# - env macros are substituted first, before regular macros
|
||||||
|
# - if the env var is not set, config loading will fail with an error
|
||||||
macros:
|
macros:
|
||||||
# Example of a multi-line macro
|
# Example of a multi-line macro
|
||||||
"latest-llama": >
|
"latest-llama": >
|
||||||
@@ -82,6 +95,24 @@ macros:
|
|||||||
# but they must be previously declared.
|
# but they must be previously declared.
|
||||||
"default_args": "--ctx-size ${default_ctx}"
|
"default_args": "--ctx-size ${default_ctx}"
|
||||||
|
|
||||||
|
# Example of environment variable macros
|
||||||
|
# - ${env.VAR_NAME} pulls the value from the system environment
|
||||||
|
# - useful for paths, secrets, or machine-specific configuration
|
||||||
|
"models_dir": "${env.HOME}/models"
|
||||||
|
|
||||||
|
# apiKeys: require an API key when making requests to inference endpoints
|
||||||
|
# - optional, default: []
|
||||||
|
# - when empty (the default) authorization will not be checked as llama-swap is default-allow
|
||||||
|
# - each key is a non-empty string
|
||||||
|
apiKeys:
|
||||||
|
- "sk-hunter2"
|
||||||
|
# tip, one liner: printf "sk-%s\n" "$(head -c 48 /dev/urandom | base64 )"
|
||||||
|
- "sk-gyCPiKUcIfPlaM4OSMZekkprgijPx6+OsmQs8Rsg0xZ9qpy6gKWsIKqHOk+cgXVx"
|
||||||
|
|
||||||
|
# use environment variable macros to keep secrets out of the config
|
||||||
|
- "${env.API_KEY_1}"
|
||||||
|
- "${env.API_KEY_2}"
|
||||||
|
|
||||||
# models: a dictionary of model configurations
|
# models: a dictionary of model configurations
|
||||||
# - required
|
# - required
|
||||||
# - each key is the model's ID, used in API requests
|
# - each key is the model's ID, used in API requests
|
||||||
@@ -165,7 +196,7 @@ models:
|
|||||||
|
|
||||||
# filters: a dictionary of filter settings
|
# filters: a dictionary of filter settings
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - only stripParams is currently supported
|
# - same capabilities as peer filters (stripParams, setParams)
|
||||||
filters:
|
filters:
|
||||||
# stripParams: a comma separated list of parameters to remove from the request
|
# stripParams: a comma separated list of parameters to remove from the request
|
||||||
# - optional, default: ""
|
# - optional, default: ""
|
||||||
@@ -175,6 +206,16 @@ models:
|
|||||||
# - recommended to stick to sampling parameters
|
# - recommended to stick to sampling parameters
|
||||||
stripParams: "temperature, top_p, top_k"
|
stripParams: "temperature, top_p, top_k"
|
||||||
|
|
||||||
|
# setParams: a dictionary of parameters to set/override in requests
|
||||||
|
# - optional, default: empty dictionary
|
||||||
|
# - useful for enforcing specific parameter values
|
||||||
|
# - protected params like "model" cannot be overridden
|
||||||
|
# - values can be strings, numbers, booleans, arrays, or objects
|
||||||
|
setParams:
|
||||||
|
# Example: enforce specific sampling parameters
|
||||||
|
temperature: 0.7
|
||||||
|
top_p: 0.9
|
||||||
|
|
||||||
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
# metadata: a dictionary of arbitrary values that are included in /v1/models
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - while metadata can contains complex types it is recommended to keep it simple
|
# - while metadata can contains complex types it is recommended to keep it simple
|
||||||
@@ -321,3 +362,56 @@ hooks:
|
|||||||
# otherwise models will be loaded and swapped out
|
# otherwise models will be loaded and swapped out
|
||||||
preload:
|
preload:
|
||||||
- "llama"
|
- "llama"
|
||||||
|
|
||||||
|
# peers: a dictionary of remote peers and models they provide
|
||||||
|
# - optional, default empty dictionary
|
||||||
|
# - peers can be another llama-swap
|
||||||
|
# - peers can be any server that provides the /v1/ generative api endpoints supported by llama-swap
|
||||||
|
peers:
|
||||||
|
# keys is the peer'd ID
|
||||||
|
llama-swap-peer:
|
||||||
|
# proxy: a valid base URL to proxy requests to
|
||||||
|
# - required
|
||||||
|
# - requested path to llama-swap will be appended to the end of the proxy value
|
||||||
|
proxy: http://192.168.1.23
|
||||||
|
# models: a list of models served by the peer
|
||||||
|
# - required
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
- model_b
|
||||||
|
- embeddings/model_c
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
# apiKey: a string key to be injected into the request
|
||||||
|
# - optional, default: ""
|
||||||
|
# - if blank, no key will be added to the request
|
||||||
|
# - key will be injected into headers: Authorization: Bearer <key> and x-api-key: <key>
|
||||||
|
# - can be a string or a macro
|
||||||
|
apiKey: ${env.OPENROUTER_API_KEY}
|
||||||
|
models:
|
||||||
|
- meta-llama/llama-3.1-8b-instruct
|
||||||
|
- qwen/qwen3-235b-a22b-2507
|
||||||
|
- deepseek/deepseek-v3.2
|
||||||
|
- z-ai/glm-4.7
|
||||||
|
- moonshotai/kimi-k2-0905
|
||||||
|
- minimax/minimax-m2.1
|
||||||
|
# filters: a dictionary of filter settings for peer requests
|
||||||
|
# - optional, default: empty dictionary
|
||||||
|
# - same capabilities as model filters (stripParams, setParams)
|
||||||
|
filters:
|
||||||
|
# stripParams: a comma separated list of parameters to remove from the request
|
||||||
|
# - optional, default: ""
|
||||||
|
# - useful for removing parameters that the peer doesn't support
|
||||||
|
# - the `model` parameter can never be removed
|
||||||
|
stripParams: "temperature, top_p"
|
||||||
|
|
||||||
|
# setParams: a dictionary of parameters to set/override in requests to this peer
|
||||||
|
# - optional, default: empty dictionary
|
||||||
|
# - useful for injecting provider-specific settings like data retention policies
|
||||||
|
# - protected params like "model" cannot be overridden
|
||||||
|
# - values can be strings, numbers, booleans, arrays, or objects
|
||||||
|
setParams:
|
||||||
|
# Example: enforce zero-data-retention for OpenRouter
|
||||||
|
provider:
|
||||||
|
data_collection: "deny"
|
||||||
|
zdr: true
|
||||||
|
|||||||
@@ -2,21 +2,37 @@
|
|||||||
|
|
||||||
cd $(dirname "$0")
|
cd $(dirname "$0")
|
||||||
|
|
||||||
|
# use this to test locally, example:
|
||||||
|
# GITHUB_TOKEN=$(gh auth token) LOG_DEBUG=1 DEBUG_ABORT_BUILD=1 ./docker/build-container.sh rocm
|
||||||
|
# you need read:package scope on the token. Generate a personal access token with
|
||||||
|
# the scopes: gist, read:org, repo, write:packages
|
||||||
|
# then: gh auth login (and copy/paste the new token)
|
||||||
|
|
||||||
|
log_debug() {
|
||||||
|
if [ "$LOG_DEBUG" = "1" ]; then
|
||||||
|
echo "[DEBUG] $*"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
log_info() {
|
||||||
|
echo "[INFO] $*"
|
||||||
|
}
|
||||||
|
|
||||||
ARCH=$1
|
ARCH=$1
|
||||||
PUSH_IMAGES=${2:-false}
|
PUSH_IMAGES=${2:-false}
|
||||||
|
|
||||||
# List of allowed architectures
|
# List of allowed architectures
|
||||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu")
|
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm")
|
||||||
|
|
||||||
# Check if ARCH is in the allowed list
|
# Check if ARCH is in the allowed list
|
||||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||||
echo "Error: ARCH must be one of the following: ${ALLOWED_ARCHS[@]}"
|
log_info "Error: ARCH must be one of the following: ${ALLOWED_ARCHS[@]}"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if GITHUB_TOKEN is set and not empty
|
# Check if GITHUB_TOKEN is set and not empty
|
||||||
if [[ -z "$GITHUB_TOKEN" ]]; then
|
if [[ -z "$GITHUB_TOKEN" ]]; then
|
||||||
echo "Error: GITHUB_TOKEN is not set or is empty."
|
log_info "Error: GITHUB_TOKEN is not set or is empty."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -32,25 +48,74 @@ LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap}
|
|||||||
# have to strip out the 'v' due to .tar.gz file naming
|
# have to strip out the 'v' due to .tar.gz file naming
|
||||||
LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//')
|
LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//')
|
||||||
|
|
||||||
|
# Fetches the most recent llama.cpp tag matching the given prefix
|
||||||
|
# Handles pagination to search beyond the first 100 results
|
||||||
|
# $1 - tag_prefix (e.g., "server" or "server-vulkan")
|
||||||
|
# Returns: the version number extracted from the tag
|
||||||
|
fetch_llama_tag() {
|
||||||
|
local tag_prefix=$1
|
||||||
|
local page=1
|
||||||
|
local per_page=100
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
log_debug "Fetching page $page for tag prefix: $tag_prefix"
|
||||||
|
|
||||||
|
local response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
||||||
|
"https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions?per_page=${per_page}&page=${page}")
|
||||||
|
|
||||||
|
# Check for API errors
|
||||||
|
if echo "$response" | jq -e '.message' > /dev/null 2>&1; then
|
||||||
|
local error_msg=$(echo "$response" | jq -r '.message')
|
||||||
|
log_info "GitHub API error: $error_msg"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if response is empty array (no more pages)
|
||||||
|
if [ "$(echo "$response" | jq 'length')" -eq 0 ]; then
|
||||||
|
log_debug "No more pages (empty response)"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract matching tag from this page
|
||||||
|
local found_tag=$(echo "$response" | jq -r \
|
||||||
|
".[] | select(.metadata.container.tags[]? | startswith(\"$tag_prefix\")) | .metadata.container.tags[] | select(startswith(\"$tag_prefix\"))" \
|
||||||
|
| sort -r | head -n1)
|
||||||
|
|
||||||
|
if [ -n "$found_tag" ]; then
|
||||||
|
log_debug "Found tag: $found_tag on page $page"
|
||||||
|
echo "$found_tag" | awk -F '-' '{print $NF}'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
page=$((page + 1))
|
||||||
|
|
||||||
|
# Safety limit to prevent infinite loops
|
||||||
|
if [ $page -gt 50 ]; then
|
||||||
|
log_info "Reached pagination safety limit (50 pages)"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
if [ "$ARCH" == "cpu" ]; then
|
if [ "$ARCH" == "cpu" ]; then
|
||||||
# cpu only containers just use the server tag
|
LCPP_TAG=$(fetch_llama_tag "server")
|
||||||
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
|
||||||
"https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions" \
|
|
||||||
| jq -r '.[] | select(.metadata.container.tags[] | startswith("server")) | .metadata.container.tags[]' \
|
|
||||||
| sort -r | head -n1 | awk -F '-' '{print $3}')
|
|
||||||
BASE_TAG=server-${LCPP_TAG}
|
BASE_TAG=server-${LCPP_TAG}
|
||||||
else
|
else
|
||||||
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
LCPP_TAG=$(fetch_llama_tag "server-${ARCH}")
|
||||||
"https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions" \
|
|
||||||
| jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \
|
|
||||||
| sort -r | head -n1 | awk -F '-' '{print $3}')
|
|
||||||
BASE_TAG=server-${ARCH}-${LCPP_TAG}
|
BASE_TAG=server-${ARCH}-${LCPP_TAG}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Abort if LCPP_TAG is empty.
|
# Abort if LCPP_TAG is empty.
|
||||||
if [[ -z "$LCPP_TAG" ]]; then
|
if [[ -z "$LCPP_TAG" ]]; then
|
||||||
echo "Abort: Could not find llama-server container for arch: $ARCH"
|
log_info "Abort: Could not find llama-server container for arch: $ARCH"
|
||||||
exit 1
|
exit 1
|
||||||
|
else
|
||||||
|
log_info "LCPP_TAG: $LCPP_TAG"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -z "$DEBUG_ABORT_BUILD" ]]; then
|
||||||
|
log_info "Abort: DEBUG_ABORT_BUILD set"
|
||||||
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for CONTAINER_TYPE in non-root root; do
|
for CONTAINER_TYPE in non-root root; do
|
||||||
@@ -68,7 +133,7 @@ for CONTAINER_TYPE in non-root root; do
|
|||||||
USER_HOME=/app
|
USER_HOME=/app
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
||||||
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||||
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||||
|
|||||||
@@ -29,6 +29,10 @@ RUN chown --recursive $UID:$GID $HOME /app
|
|||||||
USER $UID:$GID
|
USER $UID:$GID
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Add /app to PATH
|
||||||
|
ENV PATH="/app:${PATH}"
|
||||||
|
|
||||||
RUN \
|
RUN \
|
||||||
curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
||||||
tar -zxf "llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
tar -zxf "llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 261 KiB After Width: | Height: | Size: 261 KiB |
|
Before Width: | Height: | Size: 351 KiB After Width: | Height: | Size: 351 KiB |
|
After Width: | Height: | Size: 198 KiB |
@@ -86,9 +86,12 @@ llama-swap supports many more features to customize how you want to manage your
|
|||||||
## Full Configuration Example
|
## Full Configuration Example
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> This is a copy of `config.example.yaml`. Always check that for the most up to date examples.
|
> Always check [config.example.yaml](https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml) for the most up to date reference for all example configurations.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
# add this modeline for validation in vscode
|
||||||
|
# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
|
||||||
|
#
|
||||||
# llama-swap YAML configuration example
|
# llama-swap YAML configuration example
|
||||||
# -------------------------------------
|
# -------------------------------------
|
||||||
#
|
#
|
||||||
@@ -114,6 +117,24 @@ healthCheckTimeout: 500
|
|||||||
# - Valid log levels: debug, info, warn, error
|
# - Valid log levels: debug, info, warn, error
|
||||||
logLevel: info
|
logLevel: info
|
||||||
|
|
||||||
|
# logTimeFormat: enables and sets the logging timestamp format
|
||||||
|
# - optional, default (disabled): ""
|
||||||
|
# - Valid values: "", "ansic", "unixdate", "rubydate", "rfc822", "rfc822z",
|
||||||
|
# "rfc850", "rfc1123", "rfc1123z", "rfc3339", "rfc3339nano", "kitchen",
|
||||||
|
# "stamp", "stampmilli", "stampmicro", and "stampnano".
|
||||||
|
# - For more info, read: https://pkg.go.dev/time#pkg-constants
|
||||||
|
logTimeFormat: ""
|
||||||
|
|
||||||
|
# logToStdout: controls what is logged to stdout
|
||||||
|
# - optional, default: "proxy"
|
||||||
|
# - valid values:
|
||||||
|
# - "proxy": logs generated by llama-swap when swapping models,
|
||||||
|
# handling requests, etc.
|
||||||
|
# - "upstream": a copy of an upstream processes stdout logs
|
||||||
|
# - "both": both the proxy and upstream logs interleaved together
|
||||||
|
# - "none": no logs are ever written to stdout
|
||||||
|
logToStdout: "proxy"
|
||||||
|
|
||||||
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
# metricsMaxInMemory: maximum number of metrics to keep in memory
|
||||||
# - optional, default: 1000
|
# - optional, default: 1000
|
||||||
# - controls how many metrics are stored in memory before older ones are discarded
|
# - controls how many metrics are stored in memory before older ones are discarded
|
||||||
@@ -126,6 +147,30 @@ metricsMaxInMemory: 1000
|
|||||||
# - it is automatically incremented for every model that uses it
|
# - it is automatically incremented for every model that uses it
|
||||||
startPort: 10001
|
startPort: 10001
|
||||||
|
|
||||||
|
# sendLoadingState: inject loading status updates into the reasoning (thinking)
|
||||||
|
# field
|
||||||
|
# - optional, default: false
|
||||||
|
# - when true, a stream of loading messages will be sent to the client in the
|
||||||
|
# reasoning field so chat UIs can show that loading is in progress.
|
||||||
|
# - see #366 for more details
|
||||||
|
sendLoadingState: true
|
||||||
|
|
||||||
|
# includeAliasesInList: present aliases within the /v1/models OpenAI API listing
|
||||||
|
# - optional, default: false
|
||||||
|
# - when true, model aliases will be output to the API model listing duplicating
|
||||||
|
# all fields except for Id so chat UIs can use the alias equivalent to the original.
|
||||||
|
includeAliasesInList: false
|
||||||
|
|
||||||
|
# apiKeys: require an API key when making requests to inference endpoints
|
||||||
|
# - optional, default: []
|
||||||
|
# - when empty (the default) authorization will not be checked as llama-swap is default-allow
|
||||||
|
# - each key is a non-empty string
|
||||||
|
apiKeys:
|
||||||
|
- "sk-hunter2"
|
||||||
|
# hint, one liner: printf "sk-%s\n" "$(head -c 48 /dev/urandom | base64 )"
|
||||||
|
- "sk-gyCPiKUcIfPlaM4OSMZekkprgijPx6+OsmQs8Rsg0xZ9qpy6gKWsIKqHOk+cgXVx"
|
||||||
|
- "sk-+QtIn0Zjj4UHjiaZYiZEnru4mrwKM9RzhmJeK5SobNXLl8QMFXxGz1/2lEuvQpkb"
|
||||||
|
|
||||||
# macros: a dictionary of string substitutions
|
# macros: a dictionary of string substitutions
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
# - macros are reusable snippets
|
# - macros are reusable snippets
|
||||||
@@ -274,6 +319,10 @@ models:
|
|||||||
# - recommended to be omitted and the default used
|
# - recommended to be omitted and the default used
|
||||||
concurrencyLimit: 0
|
concurrencyLimit: 0
|
||||||
|
|
||||||
|
# sendLoadingState: overrides the global sendLoadingState setting for this model
|
||||||
|
# - optional, default: undefined (use global setting)
|
||||||
|
sendLoadingState: false
|
||||||
|
|
||||||
# Unlisted model example:
|
# Unlisted model example:
|
||||||
"qwen-unlisted":
|
"qwen-unlisted":
|
||||||
# unlisted: boolean, true or false
|
# unlisted: boolean, true or false
|
||||||
@@ -383,4 +432,36 @@ hooks:
|
|||||||
# otherwise models will be loaded and swapped out
|
# otherwise models will be loaded and swapped out
|
||||||
preload:
|
preload:
|
||||||
- "llama"
|
- "llama"
|
||||||
|
|
||||||
|
# peers: a dictionary of remote peers and models they provide
|
||||||
|
# - optional, default empty dictionary
|
||||||
|
# - peers can be another llama-swap
|
||||||
|
# - peers can be any server that provides the /v1/ generative api endpoints supported by llama-swap
|
||||||
|
peers:
|
||||||
|
# keys is the peer'd ID
|
||||||
|
llama-swap-peer:
|
||||||
|
# proxy: a valid base URL to proxy requests to
|
||||||
|
# - required
|
||||||
|
# - requested path to llama-swap will be appended to the end of the proxy value
|
||||||
|
proxy: http://192.168.1.23
|
||||||
|
# models: a list of models served by the peer
|
||||||
|
# - required
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
- model_b
|
||||||
|
- embeddings/model_c
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
# apiKey: a string key to be injected into the request
|
||||||
|
# - optional, default: ""
|
||||||
|
# - if blank, no key will be added to the request
|
||||||
|
# - key will be injected into headers: Authorization: Bearer <key> and x-api-key: <key>
|
||||||
|
apiKey: sk-your-openrouter-key
|
||||||
|
models:
|
||||||
|
- meta-llama/llama-3.1-8b-instruct
|
||||||
|
- qwen/qwen3-235b-a22b-2507
|
||||||
|
- deepseek/deepseek-v3.2
|
||||||
|
- z-ai/glm-4.7
|
||||||
|
- moonshotai/kimi-k2-0905
|
||||||
|
- minimax/minimax-m2.1
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
## Container Security
|
||||||
|
|
||||||
|
For convenience, the default container images use the **root** user within the container. This permits simplified access to host resources including volume mounts and hardware devices under `/dev/dri` (_for Vulkan support_). But this can widen the attack surface to privilege escalation exploits.
|
||||||
|
|
||||||
|
Alternative images, tagged as `non-root`, are also available. For example, `llama-swap:cpu-non-root` uses the unprivileged **app** user by default. Depending on deployment requirements, additional configuration may be necessary to ensure that the container retains access to required hosts resources. This might entail customizing host filesystem permissions/ownership appropriately or injecting host group membership into the container.
|
||||||
|
|
||||||
|
Docker offers a [system-wide option enabling user namespace remapping](https://docs.docker.com/engine/security/userns-remap/) to accommodate situations were a **root** container user is required but also mentions that _"The best way to prevent privilege-escalation attacks from within a container is to configure your container's applications to run as unprivileged users."_ Podman offers similar capability, per-container, to [set UID/GID mapping in a new user namespace](https://docs.podman.io/en/latest/markdown/podman-run.1.html#set-uid-gid-mapping-in-a-new-user-namespace).
|
||||||
|
|
||||||
|
The Large Language Model (_LLM/AI_) ecosystem is rapidly evolving and [serious security vulnerabilities have surfaced in the past](https://huggingface.co/docs/hub/security-pickle). These alternative _non-root_ images could reduce the impact of future unknown problems. However, proper planning and configuration is recommended to utilize them.
|
||||||
@@ -15,6 +15,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_GROUP_ID = "(default)"
|
const DEFAULT_GROUP_ID = "(default)"
|
||||||
|
const (
|
||||||
|
LogToStdoutProxy = "proxy"
|
||||||
|
LogToStdoutUpstream = "upstream"
|
||||||
|
LogToStdoutBoth = "both"
|
||||||
|
LogToStdoutNone = "none"
|
||||||
|
)
|
||||||
|
|
||||||
type MacroEntry struct {
|
type MacroEntry struct {
|
||||||
Name string
|
Name string
|
||||||
@@ -81,6 +87,7 @@ type GroupConfig struct {
|
|||||||
var (
|
var (
|
||||||
macroNameRegex = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
|
macroNameRegex = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
|
||||||
macroPatternRegex = regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
|
macroPatternRegex = regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
|
||||||
|
envMacroRegex = regexp.MustCompile(`\$\{env\.([a-zA-Z_][a-zA-Z0-9_]*)\}`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// set default values for GroupConfig
|
// set default values for GroupConfig
|
||||||
@@ -114,6 +121,7 @@ type Config struct {
|
|||||||
LogRequests bool `yaml:"logRequests"`
|
LogRequests bool `yaml:"logRequests"`
|
||||||
LogLevel string `yaml:"logLevel"`
|
LogLevel string `yaml:"logLevel"`
|
||||||
LogTimeFormat string `yaml:"logTimeFormat"`
|
LogTimeFormat string `yaml:"logTimeFormat"`
|
||||||
|
LogToStdout string `yaml:"logToStdout"`
|
||||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||||
Profiles map[string][]string `yaml:"profiles"`
|
Profiles map[string][]string `yaml:"profiles"`
|
||||||
@@ -136,6 +144,12 @@ type Config struct {
|
|||||||
|
|
||||||
// present aliases to /v1/models OpenAI API listing
|
// present aliases to /v1/models OpenAI API listing
|
||||||
IncludeAliasesInList bool `yaml:"includeAliasesInList"`
|
IncludeAliasesInList bool `yaml:"includeAliasesInList"`
|
||||||
|
|
||||||
|
// support API keys, see issue #433, #50, #251
|
||||||
|
RequiredAPIKeys []string `yaml:"apiKeys"`
|
||||||
|
|
||||||
|
// support remote peers, see issue #433, #296
|
||||||
|
Peers PeerDictionaryConfig `yaml:"peers"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Config) RealModelName(search string) (string, bool) {
|
func (c *Config) RealModelName(search string) (string, bool) {
|
||||||
@@ -170,22 +184,29 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
}
|
}
|
||||||
|
yamlStr := string(data)
|
||||||
|
|
||||||
// default configuration values
|
// Phase 1: Substitute all ${env.VAR} macros at string level
|
||||||
|
// This is safe because env values are simple strings without YAML formatting
|
||||||
|
yamlStr, err = substituteEnvMacros(yamlStr)
|
||||||
|
if err != nil {
|
||||||
|
return Config{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmarshal into full Config with defaults
|
||||||
config := Config{
|
config := Config{
|
||||||
HealthCheckTimeout: 120,
|
HealthCheckTimeout: 120,
|
||||||
StartPort: 5800,
|
StartPort: 5800,
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
|
LogToStdout: LogToStdoutProxy,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
}
|
}
|
||||||
err = yaml.Unmarshal(data, &config)
|
if err = yaml.Unmarshal([]byte(yamlStr), &config); err != nil {
|
||||||
if err != nil {
|
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.HealthCheckTimeout < 15 {
|
if config.HealthCheckTimeout < 15 {
|
||||||
// set a minimum of 15 seconds
|
|
||||||
config.HealthCheckTimeout = 15
|
config.HealthCheckTimeout = 15
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -193,6 +214,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch config.LogToStdout {
|
||||||
|
case LogToStdoutProxy, LogToStdoutUpstream, LogToStdoutBoth, LogToStdoutNone:
|
||||||
|
default:
|
||||||
|
return Config{}, fmt.Errorf("logToStdout must be one of: proxy, upstream, both, none")
|
||||||
|
}
|
||||||
|
|
||||||
// Populate the aliases map
|
// Populate the aliases map
|
||||||
config.aliases = make(map[string]string)
|
config.aliases = make(map[string]string)
|
||||||
for modelName, modelConfig := range config.Models {
|
for modelName, modelConfig := range config.Models {
|
||||||
@@ -204,55 +231,46 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check macro constraint rules:
|
// Validate global macros
|
||||||
|
|
||||||
- name must fit the regex ^[a-zA-Z0-9_-]+$
|
|
||||||
- names must be less than 64 characters (no reason, just cause)
|
|
||||||
- name can not be any reserved macros: PORT, MODEL_ID
|
|
||||||
- macro values must be less than 1024 characters
|
|
||||||
*/
|
|
||||||
for _, macro := range config.Macros {
|
for _, macro := range config.Macros {
|
||||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get and sort all model IDs first, makes testing more consistent
|
// Get and sort all model IDs for consistent port assignment
|
||||||
modelIds := make([]string, 0, len(config.Models))
|
modelIds := make([]string, 0, len(config.Models))
|
||||||
for modelId := range config.Models {
|
for modelId := range config.Models {
|
||||||
modelIds = append(modelIds, modelId)
|
modelIds = append(modelIds, modelId)
|
||||||
}
|
}
|
||||||
sort.Strings(modelIds) // This guarantees stable iteration order
|
sort.Strings(modelIds)
|
||||||
|
|
||||||
nextPort := config.StartPort
|
nextPort := config.StartPort
|
||||||
for _, modelId := range modelIds {
|
for _, modelId := range modelIds {
|
||||||
modelConfig := config.Models[modelId]
|
modelConfig := config.Models[modelId]
|
||||||
|
|
||||||
// Strip comments from command fields before macro expansion
|
// Strip comments from command fields
|
||||||
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
modelConfig.Cmd = StripComments(modelConfig.Cmd)
|
||||||
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
modelConfig.CmdStop = StripComments(modelConfig.CmdStop)
|
||||||
|
|
||||||
// validate model macros
|
// Validate model macros
|
||||||
for _, macro := range modelConfig.Macros {
|
for _, macro := range modelConfig.Macros {
|
||||||
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
if err = validateMacro(macro.Name, macro.Value); err != nil {
|
||||||
return Config{}, fmt.Errorf("model %s: %s", modelId, err.Error())
|
return Config{}, fmt.Errorf("model %s: %s", modelId, err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge global config and model macros. Model macros take precedence
|
// Build merged macro list: MODEL_ID + global macros + model macros (model overrides global)
|
||||||
mergedMacros := make(MacroList, 0, len(config.Macros)+len(modelConfig.Macros))
|
mergedMacros := make(MacroList, 0, len(config.Macros)+len(modelConfig.Macros)+1)
|
||||||
mergedMacros = append(mergedMacros, MacroEntry{Name: "MODEL_ID", Value: modelId})
|
mergedMacros = append(mergedMacros, MacroEntry{Name: "MODEL_ID", Value: modelId})
|
||||||
|
|
||||||
// Add global macros first
|
|
||||||
mergedMacros = append(mergedMacros, config.Macros...)
|
mergedMacros = append(mergedMacros, config.Macros...)
|
||||||
|
|
||||||
// Add model macros (can override global)
|
// Add model macros (override globals with same name)
|
||||||
for _, entry := range modelConfig.Macros {
|
for _, entry := range modelConfig.Macros {
|
||||||
// Remove any existing global macro with same name
|
|
||||||
found := false
|
found := false
|
||||||
for i, existing := range mergedMacros {
|
for i, existing := range mergedMacros {
|
||||||
if existing.Name == entry.Name {
|
if existing.Name == entry.Name {
|
||||||
mergedMacros[i] = entry // Override
|
mergedMacros[i] = entry
|
||||||
found = true
|
found = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -262,23 +280,20 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// First pass: Substitute user-defined macros in reverse order (LIFO - last defined first)
|
// Substitute remaining macros in model fields (LIFO order)
|
||||||
// This allows later macros to reference earlier ones
|
|
||||||
for i := len(mergedMacros) - 1; i >= 0; i-- {
|
for i := len(mergedMacros) - 1; i >= 0; i-- {
|
||||||
entry := mergedMacros[i]
|
entry := mergedMacros[i]
|
||||||
macroSlug := fmt.Sprintf("${%s}", entry.Name)
|
macroSlug := fmt.Sprintf("${%s}", entry.Name)
|
||||||
macroStr := fmt.Sprintf("%v", entry.Value)
|
macroStr := fmt.Sprintf("%v", entry.Value)
|
||||||
|
|
||||||
// Substitute in command fields
|
|
||||||
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
|
||||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
|
||||||
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
|
||||||
|
|
||||||
// Substitute in metadata (recursive)
|
// Substitute in metadata (type-preserving)
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
var err error
|
|
||||||
result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value)
|
result, err := substituteMacroInValue(modelConfig.Metadata, entry.Name, entry.Value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
||||||
@@ -287,18 +302,14 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final pass: check if PORT macro is needed after macro expansion
|
// Handle PORT macro - only allocate if cmd uses it
|
||||||
// ${PORT} is a resource on the local machine so a new port is only allocated
|
|
||||||
// if it is required in either cmd or proxy keys
|
|
||||||
cmdHasPort := strings.Contains(modelConfig.Cmd, "${PORT}")
|
cmdHasPort := strings.Contains(modelConfig.Cmd, "${PORT}")
|
||||||
proxyHasPort := strings.Contains(modelConfig.Proxy, "${PORT}")
|
proxyHasPort := strings.Contains(modelConfig.Proxy, "${PORT}")
|
||||||
if cmdHasPort || proxyHasPort { // either has it
|
if cmdHasPort || proxyHasPort {
|
||||||
if !cmdHasPort && proxyHasPort { // but both don't have it
|
if !cmdHasPort && proxyHasPort {
|
||||||
return Config{}, fmt.Errorf("model %s: proxy uses ${PORT} but cmd does not - ${PORT} is only available when used in cmd", modelId)
|
return Config{}, fmt.Errorf("model %s: proxy uses ${PORT} but cmd does not - ${PORT} is only available when used in cmd", modelId)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add PORT macro and substitute it
|
|
||||||
portEntry := MacroEntry{Name: "PORT", Value: nextPort}
|
|
||||||
macroSlug := "${PORT}"
|
macroSlug := "${PORT}"
|
||||||
macroStr := fmt.Sprintf("%v", nextPort)
|
macroStr := fmt.Sprintf("%v", nextPort)
|
||||||
|
|
||||||
@@ -306,10 +317,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
|
||||||
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
|
||||||
|
|
||||||
// Substitute PORT in metadata
|
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
var err error
|
result, err := substituteMacroInValue(modelConfig.Metadata, "PORT", nextPort)
|
||||||
result, err := substituteMacroInValue(modelConfig.Metadata, portEntry.Name, portEntry.Value)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
return Config{}, fmt.Errorf("model %s metadata: %s", modelId, err.Error())
|
||||||
}
|
}
|
||||||
@@ -319,7 +328,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
nextPort++
|
nextPort++
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure there are no unknown macros that have not been replaced
|
// Validate no unknown macros remain
|
||||||
fieldMap := map[string]string{
|
fieldMap := map[string]string{
|
||||||
"cmd": modelConfig.Cmd,
|
"cmd": modelConfig.Cmd,
|
||||||
"cmdStop": modelConfig.CmdStop,
|
"cmdStop": modelConfig.CmdStop,
|
||||||
@@ -333,35 +342,27 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
for _, match := range matches {
|
for _, match := range matches {
|
||||||
macroName := match[1]
|
macroName := match[1]
|
||||||
if macroName == "PID" && fieldName == "cmdStop" {
|
if macroName == "PID" && fieldName == "cmdStop" {
|
||||||
continue // this is ok, has to be replaced by process later
|
continue // replaced at runtime
|
||||||
}
|
}
|
||||||
// Reserved macros are always valid (they should have been substituted already)
|
|
||||||
if macroName == "PORT" || macroName == "MODEL_ID" {
|
if macroName == "PORT" || macroName == "MODEL_ID" {
|
||||||
return Config{}, fmt.Errorf("macro '${%s}' should have been substituted in %s.%s", macroName, modelId, fieldName)
|
return Config{}, fmt.Errorf("macro '${%s}' should have been substituted in %s.%s", macroName, modelId, fieldName)
|
||||||
}
|
}
|
||||||
// Any other macro is unknown
|
|
||||||
return Config{}, fmt.Errorf("unknown macro '${%s}' found in %s.%s", macroName, modelId, fieldName)
|
return Config{}, fmt.Errorf("unknown macro '${%s}' found in %s.%s", macroName, modelId, fieldName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for unknown macros in metadata
|
|
||||||
if len(modelConfig.Metadata) > 0 {
|
if len(modelConfig.Metadata) > 0 {
|
||||||
if err := validateMetadataForUnknownMacros(modelConfig.Metadata, modelId); err != nil {
|
if err := validateNestedForUnknownMacros(modelConfig.Metadata, fmt.Sprintf("model %s metadata", modelId)); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate the proxy URL.
|
|
||||||
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
if _, err := url.Parse(modelConfig.Proxy); err != nil {
|
||||||
return Config{}, fmt.Errorf(
|
return Config{}, fmt.Errorf("model %s: invalid proxy URL: %w", modelId, err)
|
||||||
"model %s: invalid proxy URL: %w", modelId, err,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if sendLoadingState is nil, set it to the global config value
|
|
||||||
// see #366
|
|
||||||
if modelConfig.SendLoadingState == nil {
|
if modelConfig.SendLoadingState == nil {
|
||||||
v := config.SendLoadingState // copy it
|
v := config.SendLoadingState
|
||||||
modelConfig.SendLoadingState = &v
|
modelConfig.SendLoadingState = &v
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -369,18 +370,17 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
config = AddDefaultGroupToConfig(config)
|
config = AddDefaultGroupToConfig(config)
|
||||||
// check that members are all unique in the groups
|
|
||||||
memberUsage := make(map[string]string) // maps member to group it appears in
|
// Validate group members
|
||||||
|
memberUsage := make(map[string]string)
|
||||||
for groupID, groupConfig := range config.Groups {
|
for groupID, groupConfig := range config.Groups {
|
||||||
prevSet := make(map[string]bool)
|
prevSet := make(map[string]bool)
|
||||||
for _, member := range groupConfig.Members {
|
for _, member := range groupConfig.Members {
|
||||||
// Check for duplicates within this group
|
|
||||||
if _, found := prevSet[member]; found {
|
if _, found := prevSet[member]; found {
|
||||||
return Config{}, fmt.Errorf("duplicate model member %s found in group: %s", member, groupID)
|
return Config{}, fmt.Errorf("duplicate model member %s found in group: %s", member, groupID)
|
||||||
}
|
}
|
||||||
prevSet[member] = true
|
prevSet[member] = true
|
||||||
|
|
||||||
// Check if member is used in another group
|
|
||||||
if existingGroup, exists := memberUsage[member]; exists {
|
if existingGroup, exists := memberUsage[member]; exists {
|
||||||
return Config{}, fmt.Errorf("model member %s is used in multiple groups: %s and %s", member, existingGroup, groupID)
|
return Config{}, fmt.Errorf("model member %s is used in multiple groups: %s and %s", member, existingGroup, groupID)
|
||||||
}
|
}
|
||||||
@@ -388,7 +388,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// clean up hooks preload
|
// Clean up hooks preload
|
||||||
if len(config.Hooks.OnStartup.Preload) > 0 {
|
if len(config.Hooks.OnStartup.Preload) > 0 {
|
||||||
var toPreload []string
|
var toPreload []string
|
||||||
for _, modelID := range config.Hooks.OnStartup.Preload {
|
for _, modelID := range config.Hooks.OnStartup.Preload {
|
||||||
@@ -400,10 +400,56 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
toPreload = append(toPreload, real)
|
toPreload = append(toPreload, real)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
config.Hooks.OnStartup.Preload = toPreload
|
config.Hooks.OnStartup.Preload = toPreload
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate API keys (env macros already substituted at string level)
|
||||||
|
for i, apikey := range config.RequiredAPIKeys {
|
||||||
|
if apikey == "" {
|
||||||
|
return Config{}, fmt.Errorf("empty api key found in apiKeys")
|
||||||
|
}
|
||||||
|
if strings.Contains(apikey, " ") {
|
||||||
|
return Config{}, fmt.Errorf("api key cannot contain spaces: `%s`", apikey)
|
||||||
|
}
|
||||||
|
config.RequiredAPIKeys[i] = apikey
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process peers with global macro substitution
|
||||||
|
for peerName, peerConfig := range config.Peers {
|
||||||
|
// Substitute global macros (LIFO order)
|
||||||
|
for i := len(config.Macros) - 1; i >= 0; i-- {
|
||||||
|
entry := config.Macros[i]
|
||||||
|
macroSlug := fmt.Sprintf("${%s}", entry.Name)
|
||||||
|
macroStr := fmt.Sprintf("%v", entry.Value)
|
||||||
|
|
||||||
|
peerConfig.ApiKey = strings.ReplaceAll(peerConfig.ApiKey, macroSlug, macroStr)
|
||||||
|
peerConfig.Filters.StripParams = strings.ReplaceAll(peerConfig.Filters.StripParams, macroSlug, macroStr)
|
||||||
|
|
||||||
|
// Substitute in setParams (type-preserving)
|
||||||
|
if len(peerConfig.Filters.SetParams) > 0 {
|
||||||
|
result, err := substituteMacroInValue(peerConfig.Filters.SetParams, entry.Name, entry.Value)
|
||||||
|
if err != nil {
|
||||||
|
return Config{}, fmt.Errorf("peers.%s.filters.setParams: %w", peerName, err)
|
||||||
|
}
|
||||||
|
peerConfig.Filters.SetParams = result.(map[string]any)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate no unknown macros remain
|
||||||
|
if matches := macroPatternRegex.FindAllStringSubmatch(peerConfig.ApiKey, -1); len(matches) > 0 {
|
||||||
|
return Config{}, fmt.Errorf("peers.%s.apiKey: unknown macro '${%s}'", peerName, matches[0][1])
|
||||||
|
}
|
||||||
|
if matches := macroPatternRegex.FindAllStringSubmatch(peerConfig.Filters.StripParams, -1); len(matches) > 0 {
|
||||||
|
return Config{}, fmt.Errorf("peers.%s.filters.stripParams: unknown macro '${%s}'", peerName, matches[0][1])
|
||||||
|
}
|
||||||
|
if len(peerConfig.Filters.SetParams) > 0 {
|
||||||
|
if err := validateNestedForUnknownMacros(peerConfig.Filters.SetParams, fmt.Sprintf("peers.%s.filters.setParams", peerName)); err != nil {
|
||||||
|
return Config{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
config.Peers[peerName] = peerConfig
|
||||||
|
}
|
||||||
|
|
||||||
return config, nil
|
return config, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -534,20 +580,26 @@ func validateMacro(name string, value any) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateMetadataForUnknownMacros recursively checks for any remaining macro references in metadata
|
// validateNestedForUnknownMacros recursively checks for any remaining macro references in nested structures
|
||||||
func validateMetadataForUnknownMacros(value any, modelId string) error {
|
func validateNestedForUnknownMacros(value any, context string) error {
|
||||||
switch v := value.(type) {
|
switch v := value.(type) {
|
||||||
case string:
|
case string:
|
||||||
matches := macroPatternRegex.FindAllStringSubmatch(v, -1)
|
matches := macroPatternRegex.FindAllStringSubmatch(v, -1)
|
||||||
for _, match := range matches {
|
for _, match := range matches {
|
||||||
macroName := match[1]
|
macroName := match[1]
|
||||||
return fmt.Errorf("model %s metadata: unknown macro '${%s}'", modelId, macroName)
|
return fmt.Errorf("%s: unknown macro '${%s}'", context, macroName)
|
||||||
|
}
|
||||||
|
// Check for unsubstituted env macros
|
||||||
|
envMatches := envMacroRegex.FindAllStringSubmatch(v, -1)
|
||||||
|
for _, match := range envMatches {
|
||||||
|
varName := match[1]
|
||||||
|
return fmt.Errorf("%s: environment variable '%s' not set", context, varName)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
case map[string]any:
|
case map[string]any:
|
||||||
for _, val := range v {
|
for _, val := range v {
|
||||||
if err := validateMetadataForUnknownMacros(val, modelId); err != nil {
|
if err := validateNestedForUnknownMacros(val, context); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -555,7 +607,7 @@ func validateMetadataForUnknownMacros(value any, modelId string) error {
|
|||||||
|
|
||||||
case []any:
|
case []any:
|
||||||
for _, val := range v {
|
for _, val := range v {
|
||||||
if err := validateMetadataForUnknownMacros(val, modelId); err != nil {
|
if err := validateNestedForUnknownMacros(val, context); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -614,3 +666,46 @@ func substituteMacroInValue(value any, macroName string, macroValue any) (any, e
|
|||||||
return value, nil
|
return value, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// substituteEnvMacros replaces ${env.VAR_NAME} with environment variable values
|
||||||
|
// Returns error if any env var is not set or contains invalid characters
|
||||||
|
func substituteEnvMacros(s string) (string, error) {
|
||||||
|
result := s
|
||||||
|
matches := envMacroRegex.FindAllStringSubmatch(s, -1)
|
||||||
|
for _, match := range matches {
|
||||||
|
fullMatch := match[0] // ${env.VAR_NAME}
|
||||||
|
varName := match[1] // VAR_NAME
|
||||||
|
|
||||||
|
value, exists := os.LookupEnv(varName)
|
||||||
|
if !exists {
|
||||||
|
return "", fmt.Errorf("environment variable '%s' is not set", varName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sanitize the value for safe YAML substitution
|
||||||
|
value, err := sanitizeEnvValueForYAML(value, varName)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
result = strings.ReplaceAll(result, fullMatch, value)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// sanitizeEnvValueForYAML ensures an environment variable value is safe for YAML substitution.
|
||||||
|
// It rejects values with characters that break YAML structure and escapes quotes/backslashes
|
||||||
|
// for compatibility with double-quoted YAML strings.
|
||||||
|
func sanitizeEnvValueForYAML(value, varName string) (string, error) {
|
||||||
|
// Reject values that would break YAML structure regardless of quoting context
|
||||||
|
if strings.ContainsAny(value, "\n\r\x00") {
|
||||||
|
return "", fmt.Errorf("environment variable '%s' contains newlines or null bytes which are not allowed in YAML substitution", varName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escape backslashes and double quotes for safe use in double-quoted YAML strings.
|
||||||
|
// In unquoted contexts, these escapes appear literally (harmless for most use cases).
|
||||||
|
// In double-quoted contexts, they are interpreted correctly.
|
||||||
|
value = strings.ReplaceAll(value, `\`, `\\`)
|
||||||
|
value = strings.ReplaceAll(value, `"`, `\"`)
|
||||||
|
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -166,6 +166,7 @@ groups:
|
|||||||
expected := Config{
|
expected := Config{
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
|
LogToStdout: LogToStdoutProxy,
|
||||||
StartPort: 5800,
|
StartPort: 5800,
|
||||||
Macros: MacroList{
|
Macros: MacroList{
|
||||||
{"svr-path", "path/to/server"},
|
{"svr-path", "path/to/server"},
|
||||||
|
|||||||
@@ -761,3 +761,551 @@ models:
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_APIKeys_Invalid(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
content string
|
||||||
|
expectedErr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty string",
|
||||||
|
content: `apiKeys: [""]`,
|
||||||
|
expectedErr: "empty api key found in apiKeys",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "blank spaces only",
|
||||||
|
content: `apiKeys: [" "]`,
|
||||||
|
expectedErr: "api key cannot contain spaces: ` `",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "contains leading space",
|
||||||
|
content: `apiKeys: [" key123"]`,
|
||||||
|
expectedErr: "api key cannot contain spaces: ` key123`",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "contains trailing space",
|
||||||
|
content: `apiKeys: ["key123 "]`,
|
||||||
|
expectedErr: "api key cannot contain spaces: `key123 `",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "contains middle space",
|
||||||
|
content: `apiKeys: ["key 123"]`,
|
||||||
|
expectedErr: "api key cannot contain spaces: `key 123`",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty in list with valid keys",
|
||||||
|
content: `apiKeys: ["valid-key", "", "another-key"]`,
|
||||||
|
expectedErr: "empty api key found in apiKeys",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(tt.content))
|
||||||
|
if assert.Error(t, err) {
|
||||||
|
assert.Equal(t, tt.expectedErr, err.Error())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_APIKeys_EnvMacros(t *testing.T) {
|
||||||
|
t.Run("env substitution in apiKeys", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_API_KEY", "secret-key-123")
|
||||||
|
|
||||||
|
content := `apiKeys: ["${env.TEST_API_KEY}"]`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, []string{"secret-key-123"}, config.RequiredAPIKeys)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("multiple env substitutions in apiKeys", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_API_KEY_1", "key-one")
|
||||||
|
t.Setenv("TEST_API_KEY_2", "key-two")
|
||||||
|
|
||||||
|
content := `apiKeys: ["${env.TEST_API_KEY_1}", "${env.TEST_API_KEY_2}", "static-key"]`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, []string{"key-one", "key-two", "static-key"}, config.RequiredAPIKeys)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing env var in apiKeys", func(t *testing.T) {
|
||||||
|
content := `apiKeys: ["${env.NONEXISTENT_API_KEY}"]`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
// With string-level env substitution, error only includes var name
|
||||||
|
assert.Contains(t, err.Error(), "NONEXISTENT_API_KEY")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env substitution results in empty key", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_EMPTY_KEY", "")
|
||||||
|
|
||||||
|
content := `apiKeys: ["${env.TEST_EMPTY_KEY}"]`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Equal(t, "empty api key found in apiKeys", err.Error())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_EnvMacros(t *testing.T) {
|
||||||
|
t.Run("basic env substitution in cmd", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_MODEL_PATH", "/opt/models")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "${env.TEST_MODEL_PATH}/llama-server"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "/opt/models/llama-server", config.Models["test"].Cmd)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env substitution in multiple fields", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_HOST", "myserver")
|
||||||
|
t.Setenv("TEST_PORT", "9999")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --host ${env.TEST_HOST}"
|
||||||
|
proxy: "http://${env.TEST_HOST}:${env.TEST_PORT}"
|
||||||
|
checkEndpoint: "http://${env.TEST_HOST}/health"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "server --host myserver", config.Models["test"].Cmd)
|
||||||
|
assert.Equal(t, "http://myserver:9999", config.Models["test"].Proxy)
|
||||||
|
assert.Equal(t, "http://myserver/health", config.Models["test"].CheckEndpoint)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env in global macro value", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_BASE_PATH", "/usr/local")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
macros:
|
||||||
|
SERVER_PATH: "${env.TEST_BASE_PATH}/bin/server"
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "${SERVER_PATH} --port 8080"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "/usr/local/bin/server --port 8080", config.Models["test"].Cmd)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env in model-level macro value", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_MODEL_DIR", "/models/llama")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
macros:
|
||||||
|
MODEL_FILE: "${env.TEST_MODEL_DIR}/model.gguf"
|
||||||
|
cmd: "server --model ${MODEL_FILE}"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "server --model /models/llama/model.gguf", config.Models["test"].Cmd)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env in metadata", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_API_KEY", "secret123")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
metadata:
|
||||||
|
api_key: "${env.TEST_API_KEY}"
|
||||||
|
nested:
|
||||||
|
key: "${env.TEST_API_KEY}"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "secret123", config.Models["test"].Metadata["api_key"])
|
||||||
|
nested := config.Models["test"].Metadata["nested"].(map[string]any)
|
||||||
|
assert.Equal(t, "secret123", nested["key"])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env in filters.stripParams", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_STRIP_PARAMS", "temperature,top_p")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
filters:
|
||||||
|
stripParams: "${env.TEST_STRIP_PARAMS}"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "temperature,top_p", config.Models["test"].Filters.StripParams)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env in cmdStop", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_KILL_SIGNAL", "SIGTERM")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --port ${PORT}"
|
||||||
|
cmdStop: "kill -${env.TEST_KILL_SIGNAL} ${PID}"
|
||||||
|
proxy: "http://localhost:${PORT}"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Contains(t, config.Models["test"].CmdStop, "-SIGTERM")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing env var returns error", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "${env.UNDEFINED_VAR_12345}/server"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
if assert.Error(t, err) {
|
||||||
|
assert.Contains(t, err.Error(), "UNDEFINED_VAR_12345")
|
||||||
|
assert.Contains(t, err.Error(), "not set")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing env var in global macro", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
macros:
|
||||||
|
PATH: "${env.UNDEFINED_GLOBAL_VAR}"
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
if assert.Error(t, err) {
|
||||||
|
assert.Contains(t, err.Error(), "UNDEFINED_GLOBAL_VAR")
|
||||||
|
assert.Contains(t, err.Error(), "not set")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing env var in model macro", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
macros:
|
||||||
|
MY_PATH: "${env.UNDEFINED_MODEL_VAR}"
|
||||||
|
cmd: "server"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
if assert.Error(t, err) {
|
||||||
|
assert.Contains(t, err.Error(), "UNDEFINED_MODEL_VAR")
|
||||||
|
assert.Contains(t, err.Error(), "not set")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing env var in metadata", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
metadata:
|
||||||
|
key: "${env.UNDEFINED_META_VAR}"
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
if assert.Error(t, err) {
|
||||||
|
assert.Contains(t, err.Error(), "UNDEFINED_META_VAR")
|
||||||
|
assert.Contains(t, err.Error(), "not set")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env combined with regular macros", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_ROOT", "/data")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
macros:
|
||||||
|
MODEL_BASE: "${env.TEST_ROOT}/models"
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --model ${MODEL_BASE}/${MODEL_ID}.gguf"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "server --model /data/models/test.gguf", config.Models["test"].Cmd)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("multiple env vars in same string", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_USER", "admin")
|
||||||
|
t.Setenv("TEST_PASS", "secret")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --auth ${env.TEST_USER}:${env.TEST_PASS}"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "server --auth admin:secret", config.Models["test"].Cmd)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env value with newline is rejected", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_MULTILINE", "line1\nline2")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --config ${env.TEST_MULTILINE}"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
if assert.Error(t, err) {
|
||||||
|
assert.Contains(t, err.Error(), "TEST_MULTILINE")
|
||||||
|
assert.Contains(t, err.Error(), "newlines")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env value with carriage return is rejected", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_CR", "line1\rline2")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --config ${env.TEST_CR}"
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
if assert.Error(t, err) {
|
||||||
|
assert.Contains(t, err.Error(), "TEST_CR")
|
||||||
|
assert.Contains(t, err.Error(), "newlines")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env value with quotes is escaped for YAML", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_QUOTED", `value with "quotes"`)
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --arg \"${env.TEST_QUOTED}\""
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
// Quotes are escaped before YAML parsing, then YAML unescapes them
|
||||||
|
// Final result preserves the original value with quotes
|
||||||
|
assert.Contains(t, config.Models["test"].Cmd, `"quotes"`)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env value with backslash is escaped for YAML", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_BACKSLASH", `path\to\file`)
|
||||||
|
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
test:
|
||||||
|
cmd: "server --path \"${env.TEST_BACKSLASH}\""
|
||||||
|
proxy: "http://localhost:8080"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
// Backslashes are escaped before YAML parsing, then YAML unescapes them
|
||||||
|
// Final result preserves the original value with backslashes
|
||||||
|
assert.Contains(t, config.Models["test"].Cmd, `path\to\file`)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_PeerApiKey_EnvMacros(t *testing.T) {
|
||||||
|
t.Run("env substitution in peer apiKey", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_PEER_API_KEY", "sk-peer-secret-123")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: "${env.TEST_PEER_API_KEY}"
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "sk-peer-secret-123", config.Peers["openrouter"].ApiKey)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing env var in peer apiKey", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: "${env.NONEXISTENT_PEER_KEY}"
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
// With string-level env substitution, error only includes var name
|
||||||
|
assert.Contains(t, err.Error(), "NONEXISTENT_PEER_KEY")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("static apiKey unchanged", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: sk-static-key
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "sk-static-key", config.Peers["openrouter"].ApiKey)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("multiple peers with env apiKeys", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_PEER_KEY_1", "key-one")
|
||||||
|
t.Setenv("TEST_PEER_KEY_2", "key-two")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
peer1:
|
||||||
|
proxy: https://peer1.example.com
|
||||||
|
apiKey: "${env.TEST_PEER_KEY_1}"
|
||||||
|
models:
|
||||||
|
- model-a
|
||||||
|
peer2:
|
||||||
|
proxy: https://peer2.example.com
|
||||||
|
apiKey: "${env.TEST_PEER_KEY_2}"
|
||||||
|
models:
|
||||||
|
- model-b
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "key-one", config.Peers["peer1"].ApiKey)
|
||||||
|
assert.Equal(t, "key-two", config.Peers["peer2"].ApiKey)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("global macro substitution in peer apiKey", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
macros:
|
||||||
|
API_KEY: sk-from-global-macro
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: "${API_KEY}"
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "sk-from-global-macro", config.Peers["openrouter"].ApiKey)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("global macro in peer filters.stripParams", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
macros:
|
||||||
|
STRIP_LIST: "temperature, top_p"
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
filters:
|
||||||
|
stripParams: "${STRIP_LIST}"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "temperature, top_p", config.Peers["openrouter"].Filters.StripParams)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("global macro in peer filters.setParams", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
macros:
|
||||||
|
MAX_TOKENS: 4096
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
filters:
|
||||||
|
setParams:
|
||||||
|
max_tokens: "${MAX_TOKENS}"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, 4096, config.Peers["openrouter"].Filters.SetParams["max_tokens"])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env macro in peer filters.setParams", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_RETENTION_POLICY", "deny")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
filters:
|
||||||
|
setParams:
|
||||||
|
data_collection: "${env.TEST_RETENTION_POLICY}"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "deny", config.Peers["openrouter"].Filters.SetParams["data_collection"])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("env macro in peer filters.stripParams", func(t *testing.T) {
|
||||||
|
t.Setenv("TEST_STRIP_PARAMS", "frequency_penalty, presence_penalty")
|
||||||
|
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
filters:
|
||||||
|
stripParams: "${env.TEST_STRIP_PARAMS}"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "frequency_penalty, presence_penalty", config.Peers["openrouter"].Filters.StripParams)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("unknown macro in peer apiKey fails", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: "${UNDEFINED_MACRO}"
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "peers.openrouter.apiKey")
|
||||||
|
assert.Contains(t, err.Error(), "unknown macro")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("unknown macro in peer filters.setParams fails", func(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
peers:
|
||||||
|
openrouter:
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
models:
|
||||||
|
- llama-3.1-8b
|
||||||
|
filters:
|
||||||
|
setParams:
|
||||||
|
value: "${UNDEFINED_MACRO}"
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "peers.openrouter.filters.setParams")
|
||||||
|
assert.Contains(t, err.Error(), "unknown macro")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -158,6 +158,7 @@ groups:
|
|||||||
expected := Config{
|
expected := Config{
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
LogTimeFormat: "",
|
LogTimeFormat: "",
|
||||||
|
LogToStdout: LogToStdoutProxy,
|
||||||
StartPort: 5800,
|
StartPort: 5800,
|
||||||
Macros: MacroList{
|
Macros: MacroList{
|
||||||
{"svr-path", "path/to/server"},
|
{"svr-path", "path/to/server"},
|
||||||
|
|||||||
@@ -0,0 +1,81 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"slices"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProtectedParams is a list of parameters that cannot be set or stripped via filters
|
||||||
|
// These are protected to prevent breaking the proxy's ability to route requests correctly
|
||||||
|
var ProtectedParams = []string{"model"}
|
||||||
|
|
||||||
|
// Filters contains filter settings for modifying request parameters
|
||||||
|
// Used by both models and peers
|
||||||
|
type Filters struct {
|
||||||
|
// StripParams is a comma-separated list of parameters to remove from requests
|
||||||
|
// The "model" parameter can never be removed
|
||||||
|
StripParams string `yaml:"stripParams"`
|
||||||
|
|
||||||
|
// SetParams is a dictionary of parameters to set/override in requests
|
||||||
|
// Protected params (like "model") cannot be set
|
||||||
|
SetParams map[string]any `yaml:"setParams"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SanitizedStripParams returns a sorted list of parameters to strip,
|
||||||
|
// with duplicates, empty strings, and protected params removed
|
||||||
|
func (f Filters) SanitizedStripParams() []string {
|
||||||
|
if f.StripParams == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
params := strings.Split(f.StripParams, ",")
|
||||||
|
cleaned := make([]string, 0, len(params))
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
|
||||||
|
for _, param := range params {
|
||||||
|
trimmed := strings.TrimSpace(param)
|
||||||
|
// Skip protected params, empty strings, and duplicates
|
||||||
|
if slices.Contains(ProtectedParams, trimmed) || trimmed == "" || seen[trimmed] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[trimmed] = true
|
||||||
|
cleaned = append(cleaned, trimmed)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cleaned) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.Sort(cleaned)
|
||||||
|
return cleaned
|
||||||
|
}
|
||||||
|
|
||||||
|
// SanitizedSetParams returns a copy of SetParams with protected params removed
|
||||||
|
// and keys sorted for consistent iteration order
|
||||||
|
func (f Filters) SanitizedSetParams() (map[string]any, []string) {
|
||||||
|
if len(f.SetParams) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make(map[string]any, len(f.SetParams))
|
||||||
|
keys := make([]string, 0, len(f.SetParams))
|
||||||
|
|
||||||
|
for key, value := range f.SetParams {
|
||||||
|
// Skip protected params
|
||||||
|
if slices.Contains(ProtectedParams, key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[key] = value
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort keys for consistent ordering
|
||||||
|
sort.Strings(keys)
|
||||||
|
|
||||||
|
if len(result) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, keys
|
||||||
|
}
|
||||||
@@ -0,0 +1,168 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFilters_SanitizedStripParams(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
stripParams string
|
||||||
|
want []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty string",
|
||||||
|
stripParams: "",
|
||||||
|
want: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single param",
|
||||||
|
stripParams: "temperature",
|
||||||
|
want: []string{"temperature"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple params",
|
||||||
|
stripParams: "temperature, top_p, top_k",
|
||||||
|
want: []string{"temperature", "top_k", "top_p"}, // sorted
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "model param filtered",
|
||||||
|
stripParams: "model, temperature, top_p",
|
||||||
|
want: []string{"temperature", "top_p"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "only model param",
|
||||||
|
stripParams: "model",
|
||||||
|
want: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "duplicates removed",
|
||||||
|
stripParams: "temperature, top_p, temperature",
|
||||||
|
want: []string{"temperature", "top_p"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "extra whitespace",
|
||||||
|
stripParams: " temperature , top_p ",
|
||||||
|
want: []string{"temperature", "top_p"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty values filtered",
|
||||||
|
stripParams: "temperature,,top_p,",
|
||||||
|
want: []string{"temperature", "top_p"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
f := Filters{StripParams: tt.stripParams}
|
||||||
|
got := f.SanitizedStripParams()
|
||||||
|
assert.Equal(t, tt.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFilters_SanitizedSetParams(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setParams map[string]any
|
||||||
|
wantParams map[string]any
|
||||||
|
wantKeys []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty setParams",
|
||||||
|
setParams: nil,
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty map",
|
||||||
|
setParams: map[string]any{},
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "normal params",
|
||||||
|
setParams: map[string]any{
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.9,
|
||||||
|
},
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 0.9,
|
||||||
|
},
|
||||||
|
wantKeys: []string{"temperature", "top_p"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "protected model param filtered",
|
||||||
|
setParams: map[string]any{
|
||||||
|
"model": "should-be-filtered",
|
||||||
|
"temperature": 0.7,
|
||||||
|
},
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"temperature": 0.7,
|
||||||
|
},
|
||||||
|
wantKeys: []string{"temperature"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "only protected param",
|
||||||
|
setParams: map[string]any{
|
||||||
|
"model": "should-be-filtered",
|
||||||
|
},
|
||||||
|
wantParams: nil,
|
||||||
|
wantKeys: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "complex nested values",
|
||||||
|
setParams: map[string]any{
|
||||||
|
"provider": map[string]any{
|
||||||
|
"data_collection": "deny",
|
||||||
|
"allow_fallbacks": false,
|
||||||
|
},
|
||||||
|
"transforms": []string{"middle-out"},
|
||||||
|
},
|
||||||
|
wantParams: map[string]any{
|
||||||
|
"provider": map[string]any{
|
||||||
|
"data_collection": "deny",
|
||||||
|
"allow_fallbacks": false,
|
||||||
|
},
|
||||||
|
"transforms": []string{"middle-out"},
|
||||||
|
},
|
||||||
|
wantKeys: []string{"provider", "transforms"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
f := Filters{SetParams: tt.setParams}
|
||||||
|
gotParams, gotKeys := f.SanitizedSetParams()
|
||||||
|
|
||||||
|
assert.Equal(t, len(tt.wantKeys), len(gotKeys), "keys length mismatch")
|
||||||
|
for i, key := range gotKeys {
|
||||||
|
assert.Equal(t, tt.wantKeys[i], key, "key mismatch at %d", i)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.wantParams == nil {
|
||||||
|
assert.Nil(t, gotParams, "expected nil params")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, len(tt.wantParams), len(gotParams), "params length mismatch")
|
||||||
|
for key, wantValue := range tt.wantParams {
|
||||||
|
gotValue, exists := gotParams[key]
|
||||||
|
assert.True(t, exists, "missing key: %s", key)
|
||||||
|
// Simple comparison for basic types
|
||||||
|
switch v := wantValue.(type) {
|
||||||
|
case string, int, float64, bool:
|
||||||
|
assert.Equal(t, v, gotValue, "value mismatch for key %s", key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProtectedParams(t *testing.T) {
|
||||||
|
// Verify that "model" is protected
|
||||||
|
assert.Contains(t, ProtectedParams, "model")
|
||||||
|
}
|
||||||
@@ -3,8 +3,6 @@ package config
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"runtime"
|
"runtime"
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type ModelConfig struct {
|
type ModelConfig struct {
|
||||||
@@ -74,16 +72,15 @@ func (m *ModelConfig) SanitizedCommand() ([]string, error) {
|
|||||||
return SanitizeCommand(m.Cmd)
|
return SanitizeCommand(m.Cmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModelFilters see issue #174
|
// ModelFilters embeds Filters and adds legacy support for strip_params field
|
||||||
|
// See issue #174
|
||||||
type ModelFilters struct {
|
type ModelFilters struct {
|
||||||
StripParams string `yaml:"stripParams"`
|
Filters `yaml:",inline"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ModelFilters) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
func (m *ModelFilters) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
type rawModelFilters ModelFilters
|
type rawModelFilters ModelFilters
|
||||||
defaults := rawModelFilters{
|
defaults := rawModelFilters{}
|
||||||
StripParams: "",
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := unmarshal(&defaults); err != nil {
|
if err := unmarshal(&defaults); err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -104,25 +101,8 @@ func (m *ModelFilters) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SanitizedStripParams wraps Filters.SanitizedStripParams for backwards compatibility
|
||||||
|
// Returns ([]string, error) to match existing API
|
||||||
func (f ModelFilters) SanitizedStripParams() ([]string, error) {
|
func (f ModelFilters) SanitizedStripParams() ([]string, error) {
|
||||||
if f.StripParams == "" {
|
return f.Filters.SanitizedStripParams(), nil
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
params := strings.Split(f.StripParams, ",")
|
|
||||||
cleaned := make([]string, 0, len(params))
|
|
||||||
seen := make(map[string]bool)
|
|
||||||
|
|
||||||
for _, param := range params {
|
|
||||||
trimmed := strings.TrimSpace(param)
|
|
||||||
if trimmed == "model" || trimmed == "" || seen[trimmed] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[trimmed] = true
|
|
||||||
cleaned = append(cleaned, trimmed)
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort cleaned
|
|
||||||
slices.Sort(cleaned)
|
|
||||||
return cleaned, nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,3 +72,35 @@ models:
|
|||||||
assert.True(t, *config.Models["model2"].SendLoadingState)
|
assert.True(t, *config.Models["model2"].SendLoadingState)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_ModelFiltersWithSetParams(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
filters:
|
||||||
|
stripParams: "top_k"
|
||||||
|
setParams:
|
||||||
|
temperature: 0.7
|
||||||
|
top_p: 0.9
|
||||||
|
stop:
|
||||||
|
- "<|end|>"
|
||||||
|
- "<|stop|>"
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
modelConfig := config.Models["model1"]
|
||||||
|
|
||||||
|
// Check stripParams
|
||||||
|
stripParams, err := modelConfig.Filters.SanitizedStripParams()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, []string{"top_k"}, stripParams)
|
||||||
|
|
||||||
|
// Check setParams
|
||||||
|
setParams, keys := modelConfig.Filters.SanitizedSetParams()
|
||||||
|
assert.NotNil(t, setParams)
|
||||||
|
assert.Equal(t, []string{"stop", "temperature", "top_p"}, keys)
|
||||||
|
assert.Equal(t, 0.7, setParams["temperature"])
|
||||||
|
assert.Equal(t, 0.9, setParams["top_p"])
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PeerDictionaryConfig map[string]PeerConfig
|
||||||
|
type PeerConfig struct {
|
||||||
|
Proxy string `yaml:"proxy"`
|
||||||
|
ProxyURL *url.URL `yaml:"-"`
|
||||||
|
ApiKey string `yaml:"apiKey"`
|
||||||
|
Models []string `yaml:"models"`
|
||||||
|
Filters Filters `yaml:"filters"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
|
type rawPeerConfig PeerConfig
|
||||||
|
defaults := rawPeerConfig{
|
||||||
|
Proxy: "",
|
||||||
|
ApiKey: "",
|
||||||
|
Models: []string{},
|
||||||
|
Filters: Filters{},
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := unmarshal(&defaults); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate proxy is not empty
|
||||||
|
if defaults.Proxy == "" {
|
||||||
|
return fmt.Errorf("proxy is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate proxy is a valid URL and store the parsed value
|
||||||
|
parsedURL, err := url.Parse(defaults.Proxy)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("invalid peer proxy URL (%s): %w", defaults.Proxy, err)
|
||||||
|
}
|
||||||
|
defaults.ProxyURL = parsedURL
|
||||||
|
|
||||||
|
// Validate models is not empty
|
||||||
|
if len(defaults.Models) == 0 {
|
||||||
|
return fmt.Errorf("peer models can not be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
*c = PeerConfig(defaults)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,209 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPeerConfig_UnmarshalYAML(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
yaml string
|
||||||
|
wantErr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid config",
|
||||||
|
yaml: `
|
||||||
|
proxy: http://192.168.1.23
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
- model_b
|
||||||
|
`,
|
||||||
|
wantErr: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "valid config with apiKey",
|
||||||
|
yaml: `
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: sk-test-key
|
||||||
|
models:
|
||||||
|
- meta-llama/llama-3.1-8b-instruct
|
||||||
|
`,
|
||||||
|
wantErr: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing proxy",
|
||||||
|
yaml: `
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
`,
|
||||||
|
wantErr: "proxy is required",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty proxy",
|
||||||
|
yaml: `
|
||||||
|
proxy: ""
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
`,
|
||||||
|
wantErr: "proxy is required",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid proxy URL",
|
||||||
|
yaml: `
|
||||||
|
proxy: "://invalid"
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
`,
|
||||||
|
wantErr: "invalid peer proxy URL",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing models",
|
||||||
|
yaml: `
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
`,
|
||||||
|
wantErr: "peer models can not be empty",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty models",
|
||||||
|
yaml: `
|
||||||
|
proxy: http://localhost:8080
|
||||||
|
models: []
|
||||||
|
`,
|
||||||
|
wantErr: "peer models can not be empty",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
var config PeerConfig
|
||||||
|
err := yaml.Unmarshal([]byte(tt.yaml), &config)
|
||||||
|
|
||||||
|
if tt.wantErr == "" {
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("expected error containing %q, got nil", tt.wantErr)
|
||||||
|
} else if !contains(err.Error(), tt.wantErr) {
|
||||||
|
t.Errorf("expected error containing %q, got %q", tt.wantErr, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPeerConfig_ProxyURL(t *testing.T) {
|
||||||
|
yamlData := `
|
||||||
|
proxy: http://192.168.1.23:8080/api
|
||||||
|
apiKey: sk-test
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
`
|
||||||
|
var config PeerConfig
|
||||||
|
err := yaml.Unmarshal([]byte(yamlData), &config)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.ProxyURL == nil {
|
||||||
|
t.Fatal("ProxyURL should not be nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.ProxyURL.Host != "192.168.1.23:8080" {
|
||||||
|
t.Errorf("expected host %q, got %q", "192.168.1.23:8080", config.ProxyURL.Host)
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.ProxyURL.Scheme != "http" {
|
||||||
|
t.Errorf("expected scheme %q, got %q", "http", config.ProxyURL.Scheme)
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.ProxyURL.Path != "/api" {
|
||||||
|
t.Errorf("expected path %q, got %q", "/api", config.ProxyURL.Path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func contains(s, substr string) bool {
|
||||||
|
return len(s) >= len(substr) && searchSubstring(s, substr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func searchSubstring(s, substr string) bool {
|
||||||
|
for i := 0; i <= len(s)-len(substr); i++ {
|
||||||
|
if s[i:i+len(substr)] == substr {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPeerConfig_WithFilters(t *testing.T) {
|
||||||
|
yamlData := `
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: sk-test
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
filters:
|
||||||
|
setParams:
|
||||||
|
temperature: 0.7
|
||||||
|
provider:
|
||||||
|
data_collection: deny
|
||||||
|
`
|
||||||
|
var config PeerConfig
|
||||||
|
err := yaml.Unmarshal([]byte(yamlData), &config)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Filters.SetParams == nil {
|
||||||
|
t.Fatal("Filters.SetParams should not be nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Filters.SetParams["temperature"] != 0.7 {
|
||||||
|
t.Errorf("expected temperature 0.7, got %v", config.Filters.SetParams["temperature"])
|
||||||
|
}
|
||||||
|
|
||||||
|
provider, ok := config.Filters.SetParams["provider"].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("provider should be a map")
|
||||||
|
}
|
||||||
|
if provider["data_collection"] != "deny" {
|
||||||
|
t.Errorf("expected data_collection deny, got %v", provider["data_collection"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPeerConfig_WithBothFilters(t *testing.T) {
|
||||||
|
yamlData := `
|
||||||
|
proxy: https://openrouter.ai/api
|
||||||
|
apiKey: sk-test
|
||||||
|
models:
|
||||||
|
- model_a
|
||||||
|
filters:
|
||||||
|
stripParams: "temperature, top_p"
|
||||||
|
setParams:
|
||||||
|
max_tokens: 1000
|
||||||
|
`
|
||||||
|
var config PeerConfig
|
||||||
|
err := yaml.Unmarshal([]byte(yamlData), &config)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check stripParams
|
||||||
|
stripParams := config.Filters.SanitizedStripParams()
|
||||||
|
if len(stripParams) != 2 {
|
||||||
|
t.Errorf("expected 2 strip params, got %d", len(stripParams))
|
||||||
|
}
|
||||||
|
if stripParams[0] != "temperature" || stripParams[1] != "top_p" {
|
||||||
|
t.Errorf("unexpected strip params: %v", stripParams)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check setParams
|
||||||
|
if config.Filters.SetParams == nil {
|
||||||
|
t.Fatal("Filters.SetParams should not be nil")
|
||||||
|
}
|
||||||
|
if config.Filters.SetParams["max_tokens"] != 1000 {
|
||||||
|
t.Errorf("expected max_tokens 1000, got %v", config.Filters.SetParams["max_tokens"])
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
package proxy
|
package proxy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"container/ring"
|
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -12,6 +11,85 @@ import (
|
|||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// circularBuffer is a fixed-size circular byte buffer that overwrites
|
||||||
|
// oldest data when full. It provides O(1) writes and O(n) reads.
|
||||||
|
type circularBuffer struct {
|
||||||
|
data []byte // pre-allocated capacity
|
||||||
|
head int // next write position
|
||||||
|
size int // current number of bytes stored (0 to cap)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newCircularBuffer(capacity int) *circularBuffer {
|
||||||
|
return &circularBuffer{
|
||||||
|
data: make([]byte, capacity),
|
||||||
|
head: 0,
|
||||||
|
size: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write appends bytes to the buffer, overwriting oldest data when full.
|
||||||
|
// Data is copied into the internal buffer (not stored by reference).
|
||||||
|
func (cb *circularBuffer) Write(p []byte) {
|
||||||
|
if len(p) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cap := len(cb.data)
|
||||||
|
|
||||||
|
// If input is larger than capacity, only keep the last cap bytes
|
||||||
|
if len(p) >= cap {
|
||||||
|
copy(cb.data, p[len(p)-cap:])
|
||||||
|
cb.head = 0
|
||||||
|
cb.size = cap
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate how much space is available from head to end of buffer
|
||||||
|
firstPart := cap - cb.head
|
||||||
|
if firstPart >= len(p) {
|
||||||
|
// All data fits without wrapping
|
||||||
|
copy(cb.data[cb.head:], p)
|
||||||
|
cb.head = (cb.head + len(p)) % cap
|
||||||
|
} else {
|
||||||
|
// Data wraps around
|
||||||
|
copy(cb.data[cb.head:], p[:firstPart])
|
||||||
|
copy(cb.data[:len(p)-firstPart], p[firstPart:])
|
||||||
|
cb.head = len(p) - firstPart
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update size
|
||||||
|
cb.size += len(p)
|
||||||
|
if cb.size > cap {
|
||||||
|
cb.size = cap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHistory returns all buffered data in correct order (oldest to newest).
|
||||||
|
// Returns a new slice (copy), not a view into internal buffer.
|
||||||
|
func (cb *circularBuffer) GetHistory() []byte {
|
||||||
|
if cb.size == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make([]byte, cb.size)
|
||||||
|
cap := len(cb.data)
|
||||||
|
|
||||||
|
// Calculate start position (oldest data)
|
||||||
|
start := (cb.head - cb.size + cap) % cap
|
||||||
|
|
||||||
|
if start+cb.size <= cap {
|
||||||
|
// Data is contiguous, single copy
|
||||||
|
copy(result, cb.data[start:start+cb.size])
|
||||||
|
} else {
|
||||||
|
// Data wraps around, two copies
|
||||||
|
firstPart := cap - start
|
||||||
|
copy(result[:firstPart], cb.data[start:])
|
||||||
|
copy(result[firstPart:], cb.data[:cb.size-firstPart])
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
type LogLevel int
|
type LogLevel int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -19,12 +97,14 @@ const (
|
|||||||
LevelInfo
|
LevelInfo
|
||||||
LevelWarn
|
LevelWarn
|
||||||
LevelError
|
LevelError
|
||||||
|
|
||||||
|
LogBufferSize = 100 * 1024
|
||||||
)
|
)
|
||||||
|
|
||||||
type LogMonitor struct {
|
type LogMonitor struct {
|
||||||
eventbus *event.Dispatcher
|
eventbus *event.Dispatcher
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
buffer *ring.Ring
|
buffer *circularBuffer
|
||||||
bufferMu sync.RWMutex
|
bufferMu sync.RWMutex
|
||||||
|
|
||||||
// typically this can be os.Stdout
|
// typically this can be os.Stdout
|
||||||
@@ -45,7 +125,7 @@ func NewLogMonitor() *LogMonitor {
|
|||||||
func NewLogMonitorWriter(stdout io.Writer) *LogMonitor {
|
func NewLogMonitorWriter(stdout io.Writer) *LogMonitor {
|
||||||
return &LogMonitor{
|
return &LogMonitor{
|
||||||
eventbus: event.NewDispatcherConfig(1000),
|
eventbus: event.NewDispatcherConfig(1000),
|
||||||
buffer: ring.New(10 * 1024), // keep 10KB of buffered logs
|
buffer: nil, // lazy initialized on first Write
|
||||||
stdout: stdout,
|
stdout: stdout,
|
||||||
level: LevelInfo,
|
level: LevelInfo,
|
||||||
prefix: "",
|
prefix: "",
|
||||||
@@ -64,12 +144,15 @@ func (w *LogMonitor) Write(p []byte) (n int, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
w.bufferMu.Lock()
|
w.bufferMu.Lock()
|
||||||
bufferCopy := make([]byte, len(p))
|
if w.buffer == nil {
|
||||||
copy(bufferCopy, p)
|
w.buffer = newCircularBuffer(LogBufferSize)
|
||||||
w.buffer.Value = bufferCopy
|
}
|
||||||
w.buffer = w.buffer.Next()
|
w.buffer.Write(p)
|
||||||
w.bufferMu.Unlock()
|
w.bufferMu.Unlock()
|
||||||
|
|
||||||
|
// Make a copy for broadcast to preserve immutability
|
||||||
|
bufferCopy := make([]byte, len(p))
|
||||||
|
copy(bufferCopy, p)
|
||||||
w.broadcast(bufferCopy)
|
w.broadcast(bufferCopy)
|
||||||
return n, nil
|
return n, nil
|
||||||
}
|
}
|
||||||
@@ -77,16 +160,18 @@ func (w *LogMonitor) Write(p []byte) (n int, err error) {
|
|||||||
func (w *LogMonitor) GetHistory() []byte {
|
func (w *LogMonitor) GetHistory() []byte {
|
||||||
w.bufferMu.RLock()
|
w.bufferMu.RLock()
|
||||||
defer w.bufferMu.RUnlock()
|
defer w.bufferMu.RUnlock()
|
||||||
|
if w.buffer == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return w.buffer.GetHistory()
|
||||||
|
}
|
||||||
|
|
||||||
var history []byte
|
// Clear releases the buffer memory, making it eligible for GC.
|
||||||
w.buffer.Do(func(p any) {
|
// The buffer will be lazily re-allocated on the next Write.
|
||||||
if p != nil {
|
func (w *LogMonitor) Clear() {
|
||||||
if content, ok := p.([]byte); ok {
|
w.bufferMu.Lock()
|
||||||
history = append(history, content...)
|
w.buffer = nil
|
||||||
}
|
w.bufferMu.Unlock()
|
||||||
}
|
|
||||||
})
|
|
||||||
return history
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc {
|
func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc {
|
||||||
|
|||||||
@@ -113,3 +113,204 @@ func TestWrite_LogTimeFormat(t *testing.T) {
|
|||||||
t.Fatalf("Cannot find timestamp: %v", err)
|
t.Fatalf("Cannot find timestamp: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCircularBuffer_WrapAround(t *testing.T) {
|
||||||
|
// Create a small buffer to test wrap-around
|
||||||
|
cb := newCircularBuffer(10)
|
||||||
|
|
||||||
|
// Write "hello" (5 bytes)
|
||||||
|
cb.Write([]byte("hello"))
|
||||||
|
if got := string(cb.GetHistory()); got != "hello" {
|
||||||
|
t.Errorf("Expected 'hello', got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write "world" (5 bytes) - buffer now full
|
||||||
|
cb.Write([]byte("world"))
|
||||||
|
if got := string(cb.GetHistory()); got != "helloworld" {
|
||||||
|
t.Errorf("Expected 'helloworld', got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write "12345" (5 bytes) - should overwrite "hello"
|
||||||
|
cb.Write([]byte("12345"))
|
||||||
|
if got := string(cb.GetHistory()); got != "world12345" {
|
||||||
|
t.Errorf("Expected 'world12345', got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write data larger than buffer capacity
|
||||||
|
cb.Write([]byte("abcdefghijklmnop")) // 16 bytes, only last 10 kept
|
||||||
|
if got := string(cb.GetHistory()); got != "ghijklmnop" {
|
||||||
|
t.Errorf("Expected 'ghijklmnop', got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCircularBuffer_BoundaryConditions(t *testing.T) {
|
||||||
|
// Test empty buffer
|
||||||
|
cb := newCircularBuffer(10)
|
||||||
|
if got := cb.GetHistory(); got != nil {
|
||||||
|
t.Errorf("Expected nil for empty buffer, got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test exact capacity
|
||||||
|
cb.Write([]byte("1234567890"))
|
||||||
|
if got := string(cb.GetHistory()); got != "1234567890" {
|
||||||
|
t.Errorf("Expected '1234567890', got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test write exactly at capacity boundary
|
||||||
|
cb = newCircularBuffer(10)
|
||||||
|
cb.Write([]byte("12345"))
|
||||||
|
cb.Write([]byte("67890"))
|
||||||
|
if got := string(cb.GetHistory()); got != "1234567890" {
|
||||||
|
t.Errorf("Expected '1234567890', got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLogMonitor_LazyInit(t *testing.T) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
|
||||||
|
// Buffer should be nil before any writes
|
||||||
|
if lm.buffer != nil {
|
||||||
|
t.Error("Expected buffer to be nil before first write")
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHistory should return nil when buffer is nil
|
||||||
|
if got := lm.GetHistory(); got != nil {
|
||||||
|
t.Errorf("Expected nil history before first write, got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write should lazily initialize the buffer
|
||||||
|
lm.Write([]byte("test"))
|
||||||
|
|
||||||
|
if lm.buffer == nil {
|
||||||
|
t.Error("Expected buffer to be initialized after write")
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := string(lm.GetHistory()); got != "test" {
|
||||||
|
t.Errorf("Expected 'test', got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLogMonitor_Clear(t *testing.T) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
|
||||||
|
// Write some data
|
||||||
|
lm.Write([]byte("hello"))
|
||||||
|
if got := string(lm.GetHistory()); got != "hello" {
|
||||||
|
t.Errorf("Expected 'hello', got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear should release the buffer
|
||||||
|
lm.Clear()
|
||||||
|
|
||||||
|
if lm.buffer != nil {
|
||||||
|
t.Error("Expected buffer to be nil after Clear")
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := lm.GetHistory(); got != nil {
|
||||||
|
t.Errorf("Expected nil history after Clear, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLogMonitor_ClearAndReuse(t *testing.T) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
|
||||||
|
// Write, clear, then write again
|
||||||
|
lm.Write([]byte("first"))
|
||||||
|
lm.Clear()
|
||||||
|
lm.Write([]byte("second"))
|
||||||
|
|
||||||
|
if got := string(lm.GetHistory()); got != "second" {
|
||||||
|
t.Errorf("Expected 'second' after clear and reuse, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkLogMonitorWrite(b *testing.B) {
|
||||||
|
// Test data of varying sizes
|
||||||
|
smallMsg := []byte("small message\n")
|
||||||
|
mediumMsg := []byte(strings.Repeat("medium message content ", 10) + "\n")
|
||||||
|
largeMsg := []byte(strings.Repeat("large message content for benchmarking ", 100) + "\n")
|
||||||
|
|
||||||
|
b.Run("SmallWrite", func(b *testing.B) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
lm.Write(smallMsg)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("MediumWrite", func(b *testing.B) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
lm.Write(mediumMsg)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("LargeWrite", func(b *testing.B) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
lm.Write(largeMsg)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("WithSubscribers", func(b *testing.B) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
// Add some subscribers
|
||||||
|
for i := 0; i < 5; i++ {
|
||||||
|
lm.OnLogData(func(data []byte) {})
|
||||||
|
}
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
lm.Write(mediumMsg)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("GetHistory", func(b *testing.B) {
|
||||||
|
lm := NewLogMonitorWriter(io.Discard)
|
||||||
|
// Pre-populate with data
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
lm.Write(mediumMsg)
|
||||||
|
}
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
lm.GetHistory()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Benchmark Results - MBP M1 Pro
|
||||||
|
|
||||||
|
Before (ring.Ring):
|
||||||
|
| Benchmark | ns/op | bytes/op | allocs/op |
|
||||||
|
|---------------------------------|------------|----------|-----------|
|
||||||
|
| SmallWrite (14B) | 43 ns | 40 B | 2 |
|
||||||
|
| MediumWrite (241B) | 76 ns | 264 B | 2 |
|
||||||
|
| LargeWrite (4KB) | 504 ns | 4,120 B | 2 |
|
||||||
|
| WithSubscribers (5 subs) | 355 ns | 264 B | 2 |
|
||||||
|
| GetHistory (after 1000 writes) | 145,000 ns | 1.2 MB | 22 |
|
||||||
|
|
||||||
|
After (circularBuffer 10KB):
|
||||||
|
| Benchmark | ns/op | bytes/op | allocs/op |
|
||||||
|
|---------------------------------|------------|----------|-----------|
|
||||||
|
| SmallWrite (14B) | 26 ns | 16 B | 1 |
|
||||||
|
| MediumWrite (241B) | 67 ns | 240 B | 1 |
|
||||||
|
| LargeWrite (4KB) | 774 ns | 4,096 B | 1 |
|
||||||
|
| WithSubscribers (5 subs) | 325 ns | 240 B | 1 |
|
||||||
|
| GetHistory (after 1000 writes) | 1,042 ns | 10,240 B | 1 |
|
||||||
|
|
||||||
|
After (circularBuffer 100KB):
|
||||||
|
| Benchmark | ns/op | bytes/op | allocs/op |
|
||||||
|
|---------------------------------|------------|-----------|-----------|
|
||||||
|
| SmallWrite (14B) | 26 ns | 16 B | 1 |
|
||||||
|
| MediumWrite (241B) | 66 ns | 240 B | 1 |
|
||||||
|
| LargeWrite (4KB) | 753 ns | 4,096 B | 1 |
|
||||||
|
| WithSubscribers (5 subs) | 309 ns | 240 B | 1 |
|
||||||
|
| GetHistory (after 1000 writes) | 7,788 ns | 106,496 B | 1 |
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
- GetHistory: 139x faster (10KB), 18x faster (100KB)
|
||||||
|
- Allocations: reduced from 2 to 1 across all operations
|
||||||
|
- Small/medium writes: ~1.1-1.6x faster
|
||||||
|
*/
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ package proxy
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"compress/flate"
|
||||||
|
"compress/gzip"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -96,6 +98,12 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
next func(modelID string, w http.ResponseWriter, r *http.Request) error,
|
next func(modelID string, w http.ResponseWriter, r *http.Request) error,
|
||||||
) error {
|
) error {
|
||||||
recorder := newBodyCopier(writer)
|
recorder := newBodyCopier(writer)
|
||||||
|
|
||||||
|
// Filter Accept-Encoding to only include encodings we can decompress for metrics
|
||||||
|
if ae := request.Header.Get("Accept-Encoding"); ae != "" {
|
||||||
|
request.Header.Set("Accept-Encoding", filterAcceptEncoding(ae))
|
||||||
|
}
|
||||||
|
|
||||||
if err := next(modelID, recorder, request); err != nil {
|
if err := next(modelID, recorder, request); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -108,17 +116,36 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize default metrics - these will always be recorded
|
||||||
|
tm := TokenMetrics{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
Model: modelID,
|
||||||
|
DurationMs: int(time.Since(recorder.StartTime()).Milliseconds()),
|
||||||
|
}
|
||||||
|
|
||||||
body := recorder.body.Bytes()
|
body := recorder.body.Bytes()
|
||||||
if len(body) == 0 {
|
if len(body) == 0 {
|
||||||
mp.logger.Warn("metrics skipped, empty body")
|
mp.logger.Warn("metrics: empty body, recording minimal metrics")
|
||||||
|
mp.addMetrics(tm)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
|
// Decompress if needed
|
||||||
if tm, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
if encoding := recorder.Header().Get("Content-Encoding"); encoding != "" {
|
||||||
mp.logger.Warnf("error processing streaming response: %v, path=%s", err, request.URL.Path)
|
var err error
|
||||||
} else {
|
body, err = decompressBody(body, encoding)
|
||||||
|
if err != nil {
|
||||||
|
mp.logger.Warnf("metrics: decompression failed: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
mp.addMetrics(tm)
|
mp.addMetrics(tm)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(recorder.Header().Get("Content-Type"), "text/event-stream") {
|
||||||
|
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
||||||
|
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
|
} else {
|
||||||
|
tm = parsed
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if gjson.ValidBytes(body) {
|
if gjson.ValidBytes(body) {
|
||||||
@@ -127,18 +154,18 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
timings := parsed.Get("timings")
|
timings := parsed.Get("timings")
|
||||||
|
|
||||||
if usage.Exists() || timings.Exists() {
|
if usage.Exists() || timings.Exists() {
|
||||||
if tm, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
||||||
mp.logger.Warnf("error parsing metrics: %v, path=%s", err, request.URL.Path)
|
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
} else {
|
} else {
|
||||||
mp.addMetrics(tm)
|
tm = parsedMetrics
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
mp.logger.Warnf("metrics skipped, invalid JSON in response body path=%s", request.URL.Path)
|
mp.logger.Warnf("metrics: invalid JSON in response body path=%s, recording minimal metrics", request.URL.Path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mp.addMetrics(tm)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -251,6 +278,25 @@ func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result)
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decompressBody decompresses the body based on Content-Encoding header
|
||||||
|
func decompressBody(body []byte, encoding string) ([]byte, error) {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(encoding)) {
|
||||||
|
case "gzip":
|
||||||
|
reader, err := gzip.NewReader(bytes.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
return io.ReadAll(reader)
|
||||||
|
case "deflate":
|
||||||
|
reader := flate.NewReader(bytes.NewReader(body))
|
||||||
|
defer reader.Close()
|
||||||
|
return io.ReadAll(reader)
|
||||||
|
default:
|
||||||
|
return body, nil // Return as-is for unknown/no encoding
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// responseBodyCopier records the response body and writes to the original response writer
|
// responseBodyCopier records the response body and writes to the original response writer
|
||||||
// while also capturing it in a buffer for later processing
|
// while also capturing it in a buffer for later processing
|
||||||
type responseBodyCopier struct {
|
type responseBodyCopier struct {
|
||||||
@@ -289,3 +335,25 @@ func (w *responseBodyCopier) Header() http.Header {
|
|||||||
func (w *responseBodyCopier) StartTime() time.Time {
|
func (w *responseBodyCopier) StartTime() time.Time {
|
||||||
return w.start
|
return w.start
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// filterAcceptEncoding filters the Accept-Encoding header to only include
|
||||||
|
// encodings we can decompress (gzip, deflate). This respects the client's
|
||||||
|
// preferences while ensuring we can parse response bodies for metrics.
|
||||||
|
func filterAcceptEncoding(acceptEncoding string) string {
|
||||||
|
if acceptEncoding == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
supported := map[string]bool{"gzip": true, "deflate": true}
|
||||||
|
var filtered []string
|
||||||
|
|
||||||
|
for _, part := range strings.Split(acceptEncoding, ",") {
|
||||||
|
// Parse encoding and optional quality value (e.g., "gzip;q=1.0")
|
||||||
|
encoding := strings.TrimSpace(strings.Split(part, ";")[0])
|
||||||
|
if supported[strings.ToLower(encoding)] {
|
||||||
|
filtered = append(filtered, strings.TrimSpace(part))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(filtered, ", ")
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
package proxy
|
package proxy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"compress/flate"
|
||||||
|
"compress/gzip"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
@@ -291,7 +294,7 @@ data: [DONE]
|
|||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 0, len(metrics))
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("empty response body does not record metrics", func(t *testing.T) {
|
t.Run("empty response body records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
@@ -307,10 +310,13 @@ data: [DONE]
|
|||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
metrics := mm.getMetrics()
|
metrics := mm.getMetrics()
|
||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("invalid JSON does not record metrics", func(t *testing.T) {
|
t.Run("invalid JSON records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
@@ -328,7 +334,10 @@ data: [DONE]
|
|||||||
assert.NoError(t, err) // Errors after response is sent are logged, not returned
|
assert.NoError(t, err) // Errors after response is sent are logged, not returned
|
||||||
|
|
||||||
metrics := mm.getMetrics()
|
metrics := mm.getMetrics()
|
||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("next handler error is propagated", func(t *testing.T) {
|
t.Run("next handler error is propagated", func(t *testing.T) {
|
||||||
@@ -350,7 +359,7 @@ data: [DONE]
|
|||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 0, len(metrics))
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("response without usage or timings does not record metrics", func(t *testing.T) {
|
t.Run("response without usage or timings records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
responseBody := `{"result": "ok"}`
|
responseBody := `{"result": "ok"}`
|
||||||
@@ -367,10 +376,13 @@ data: [DONE]
|
|||||||
ginCtx, _ := gin.CreateTestContext(rec)
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
assert.NoError(t, err) // Errors after response is sent are logged, not returned
|
assert.NoError(t, err)
|
||||||
|
|
||||||
metrics := mm.getMetrics()
|
metrics := mm.getMetrics()
|
||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -598,7 +610,7 @@ data: [DONE]
|
|||||||
assert.Equal(t, 50, metrics[0].OutputTokens)
|
assert.Equal(t, 50, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("handles streaming with no valid JSON", func(t *testing.T) {
|
t.Run("handles streaming with no valid JSON records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
responseBody := `data: not json
|
responseBody := `data: not json
|
||||||
@@ -619,13 +631,16 @@ data: [DONE]
|
|||||||
ginCtx, _ := gin.CreateTestContext(rec)
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
assert.NoError(t, err) // Errors after response is sent are logged, not returned
|
assert.NoError(t, err)
|
||||||
|
|
||||||
metrics := mm.getMetrics()
|
metrics := mm.getMetrics()
|
||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("handles empty streaming response", func(t *testing.T) {
|
t.Run("handles empty streaming response records minimal metrics", func(t *testing.T) {
|
||||||
mm := newMetricsMonitor(testLogger, 10)
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
responseBody := ``
|
responseBody := ``
|
||||||
@@ -642,11 +657,13 @@ data: [DONE]
|
|||||||
ginCtx, _ := gin.CreateTestContext(rec)
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
// Empty body should not trigger WrapHandler processing
|
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
metrics := mm.getMetrics()
|
metrics := mm.getMetrics()
|
||||||
assert.Equal(t, 0, len(metrics))
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -691,3 +708,127 @@ func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
|
|||||||
mm.addMetrics(metric)
|
mm.addMetrics(metric)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMetricsMonitor_WrapHandler_Compression(t *testing.T) {
|
||||||
|
t.Run("gzip encoded response", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
|
responseBody := `{"usage": {"prompt_tokens": 100, "completion_tokens": 50}}`
|
||||||
|
|
||||||
|
// Compress with gzip
|
||||||
|
var buf bytes.Buffer
|
||||||
|
gzWriter := gzip.NewWriter(&buf)
|
||||||
|
gzWriter.Write([]byte(responseBody))
|
||||||
|
gzWriter.Close()
|
||||||
|
compressedBody := buf.Bytes()
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("Content-Encoding", "gzip")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write(compressedBody)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/test", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 100, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 50, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("deflate encoded response", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
|
responseBody := `{"usage": {"prompt_tokens": 200, "completion_tokens": 75}}`
|
||||||
|
|
||||||
|
// Compress with deflate
|
||||||
|
var buf bytes.Buffer
|
||||||
|
flateWriter, _ := flate.NewWriter(&buf, flate.DefaultCompression)
|
||||||
|
flateWriter.Write([]byte(responseBody))
|
||||||
|
flateWriter.Close()
|
||||||
|
compressedBody := buf.Bytes()
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("Content-Encoding", "deflate")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write(compressedBody)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/test", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 200, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 75, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid gzip data records minimal metrics", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
|
// Invalid compressed data
|
||||||
|
invalidData := []byte("this is not gzip data")
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("Content-Encoding", "gzip")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write(invalidData)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/test", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err) // Should not return error, just log warning
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, "test-model", metrics[0].Model)
|
||||||
|
assert.Equal(t, 0, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 0, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("unknown encoding treated as uncompressed", func(t *testing.T) {
|
||||||
|
mm := newMetricsMonitor(testLogger, 10)
|
||||||
|
|
||||||
|
responseBody := `{"usage": {"prompt_tokens": 300, "completion_tokens": 100}}`
|
||||||
|
|
||||||
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("Content-Encoding", "unknown-encoding")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(responseBody))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/test", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ginCtx, _ := gin.CreateTestContext(rec)
|
||||||
|
|
||||||
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
metrics := mm.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.Equal(t, 300, metrics[0].InputTokens)
|
||||||
|
assert.Equal(t, 100, metrics[0].OutputTokens)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,141 @@
|
|||||||
|
package proxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httputil"
|
||||||
|
"runtime"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
type peerProxyMember struct {
|
||||||
|
peerID string
|
||||||
|
reverseProxy *httputil.ReverseProxy
|
||||||
|
apiKey string
|
||||||
|
}
|
||||||
|
|
||||||
|
type PeerProxy struct {
|
||||||
|
peers config.PeerDictionaryConfig
|
||||||
|
proxyMap map[string]*peerProxyMember
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*PeerProxy, error) {
|
||||||
|
proxyMap := make(map[string]*peerProxyMember)
|
||||||
|
|
||||||
|
// Sort peer IDs for consistent iteration order
|
||||||
|
peerIDs := make([]string, 0, len(peers))
|
||||||
|
for peerID := range peers {
|
||||||
|
peerIDs = append(peerIDs, peerID)
|
||||||
|
}
|
||||||
|
sort.Strings(peerIDs)
|
||||||
|
|
||||||
|
// Create a shared transport with reasonable timeouts for peer connections
|
||||||
|
// these can be tuned with feedback later
|
||||||
|
peerTransport := &http.Transport{
|
||||||
|
DialContext: (&net.Dialer{
|
||||||
|
Timeout: 30 * time.Second, // Connection timeout
|
||||||
|
KeepAlive: 30 * time.Second,
|
||||||
|
}).DialContext,
|
||||||
|
TLSHandshakeTimeout: 10 * time.Second,
|
||||||
|
ResponseHeaderTimeout: 60 * time.Second, // Time to wait for response headers
|
||||||
|
ExpectContinueTimeout: 1 * time.Second,
|
||||||
|
MaxIdleConns: 100,
|
||||||
|
MaxIdleConnsPerHost: 10,
|
||||||
|
IdleConnTimeout: 90 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, peerID := range peerIDs {
|
||||||
|
peer := peers[peerID]
|
||||||
|
// Create reverse proxy for this peer
|
||||||
|
reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
|
||||||
|
reverseProxy.Transport = peerTransport
|
||||||
|
|
||||||
|
// Wrap Director to set Host header for remote hosts (not localhost)
|
||||||
|
originalDirector := reverseProxy.Director
|
||||||
|
reverseProxy.Director = func(req *http.Request) {
|
||||||
|
originalDirector(req)
|
||||||
|
// Ensure Host header matches target URL for remote proxying
|
||||||
|
req.Host = req.URL.Host
|
||||||
|
}
|
||||||
|
|
||||||
|
reverseProxy.ModifyResponse = func(resp *http.Response) error {
|
||||||
|
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
|
||||||
|
resp.Header.Set("X-Accel-Buffering", "no")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
reverseProxy.ErrorHandler = func(w http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
proxyLogger.Warnf("peer %s: proxy error: %v", peerID, err)
|
||||||
|
errMsg := fmt.Sprintf("peer proxy error: %v", err)
|
||||||
|
if runtime.GOOS == "darwin" && strings.Contains(err.Error(), "connect: no route to host") {
|
||||||
|
errMsg += " (hint: on macOS, check System Settings > Privacy & Security > Local Network permissions)"
|
||||||
|
}
|
||||||
|
http.Error(w, errMsg, http.StatusBadGateway)
|
||||||
|
}
|
||||||
|
|
||||||
|
pp := &peerProxyMember{
|
||||||
|
peerID: peerID,
|
||||||
|
reverseProxy: reverseProxy,
|
||||||
|
apiKey: peer.ApiKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map each model to this peer's proxy
|
||||||
|
for _, modelID := range peer.Models {
|
||||||
|
if _, found := proxyMap[modelID]; found {
|
||||||
|
proxyLogger.Warnf("peer %s: model %s already mapped to another peer, skipping", peerID, modelID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
proxyMap[modelID] = pp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &PeerProxy{
|
||||||
|
peers: peers,
|
||||||
|
proxyMap: proxyMap,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *PeerProxy) HasPeerModel(modelID string) bool {
|
||||||
|
_, found := p.proxyMap[modelID]
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetPeerFilters returns the filters for a peer model, or empty filters if not found
|
||||||
|
func (p *PeerProxy) GetPeerFilters(modelID string) config.Filters {
|
||||||
|
pp, found := p.proxyMap[modelID]
|
||||||
|
if !found {
|
||||||
|
return config.Filters{}
|
||||||
|
}
|
||||||
|
// Get the peer config using the peerID
|
||||||
|
peer, found := p.peers[pp.peerID]
|
||||||
|
if !found {
|
||||||
|
return config.Filters{}
|
||||||
|
}
|
||||||
|
return peer.Filters
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *PeerProxy) ListPeers() config.PeerDictionaryConfig {
|
||||||
|
return p.peers
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *PeerProxy) ProxyRequest(model_id string, writer http.ResponseWriter, request *http.Request) error {
|
||||||
|
pp, found := p.proxyMap[model_id]
|
||||||
|
if !found {
|
||||||
|
return fmt.Errorf("no peer proxy found for model %s", model_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inject API key if configured for this peer
|
||||||
|
if pp.apiKey != "" {
|
||||||
|
request.Header.Set("Authorization", "Bearer "+pp.apiKey)
|
||||||
|
request.Header.Set("x-api-key", pp.apiKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
pp.reverseProxy.ServeHTTP(writer, request)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,268 @@
|
|||||||
|
package proxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewPeerProxy_EmptyPeers(t *testing.T) {
|
||||||
|
peers := config.PeerDictionaryConfig{}
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotNil(t, pm)
|
||||||
|
assert.Empty(t, pm.proxyMap)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewPeerProxy_SinglePeer(t *testing.T) {
|
||||||
|
proxyURL, _ := url.Parse("http://peer1.example.com:8080")
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: "http://peer1.example.com:8080",
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
ApiKey: "test-key",
|
||||||
|
Models: []string{"model-a", "model-b"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, pm.proxyMap, 2)
|
||||||
|
assert.True(t, pm.HasPeerModel("model-a"))
|
||||||
|
assert.True(t, pm.HasPeerModel("model-b"))
|
||||||
|
assert.False(t, pm.HasPeerModel("model-c"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewPeerProxy_MultiplePeers(t *testing.T) {
|
||||||
|
proxyURL1, _ := url.Parse("http://peer1.example.com:8080")
|
||||||
|
proxyURL2, _ := url.Parse("http://peer2.example.com:8080")
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: "http://peer1.example.com:8080",
|
||||||
|
ProxyURL: proxyURL1,
|
||||||
|
Models: []string{"model-a", "model-b"},
|
||||||
|
},
|
||||||
|
"peer2": config.PeerConfig{
|
||||||
|
Proxy: "http://peer2.example.com:8080",
|
||||||
|
ProxyURL: proxyURL2,
|
||||||
|
Models: []string{"model-c", "model-d"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, pm.proxyMap, 4)
|
||||||
|
assert.True(t, pm.HasPeerModel("model-a"))
|
||||||
|
assert.True(t, pm.HasPeerModel("model-b"))
|
||||||
|
assert.True(t, pm.HasPeerModel("model-c"))
|
||||||
|
assert.True(t, pm.HasPeerModel("model-d"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewPeerProxy_DuplicateModelWarning(t *testing.T) {
|
||||||
|
// When the same model is in multiple peers, only the first (lexicographically by peer ID)
|
||||||
|
// should be mapped, and a warning should be logged
|
||||||
|
proxyURL1, _ := url.Parse("http://peer1.example.com:8080")
|
||||||
|
proxyURL2, _ := url.Parse("http://peer2.example.com:8080")
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"alpha-peer": config.PeerConfig{
|
||||||
|
Proxy: "http://peer1.example.com:8080",
|
||||||
|
ProxyURL: proxyURL1,
|
||||||
|
Models: []string{"duplicate-model"},
|
||||||
|
},
|
||||||
|
"beta-peer": config.PeerConfig{
|
||||||
|
Proxy: "http://peer2.example.com:8080",
|
||||||
|
ProxyURL: proxyURL2,
|
||||||
|
Models: []string{"duplicate-model"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
// Should only have one entry for the duplicate model
|
||||||
|
assert.Len(t, pm.proxyMap, 1)
|
||||||
|
assert.True(t, pm.HasPeerModel("duplicate-model"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHasPeerModel(t *testing.T) {
|
||||||
|
proxyURL, _ := url.Parse("http://peer1.example.com:8080")
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: "http://peer1.example.com:8080",
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
Models: []string{"existing-model"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.True(t, pm.HasPeerModel("existing-model"))
|
||||||
|
assert.False(t, pm.HasPeerModel("non-existing-model"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyRequest_ModelNotFound(t *testing.T) {
|
||||||
|
peers := config.PeerDictionaryConfig{}
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
|
err = pm.ProxyRequest("non-existing-model", w, req)
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "no peer proxy found for model non-existing-model")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyRequest_Success(t *testing.T) {
|
||||||
|
// Create a test server to act as the peer
|
||||||
|
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte("response from peer"))
|
||||||
|
}))
|
||||||
|
defer testServer.Close()
|
||||||
|
|
||||||
|
proxyURL, _ := url.Parse(testServer.URL)
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: testServer.URL,
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
Models: []string{"test-model"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
|
err = pm.ProxyRequest("test-model", w, req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Equal(t, "response from peer", w.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyRequest_ApiKeyInjection(t *testing.T) {
|
||||||
|
// Create a test server that checks for the Authorization header
|
||||||
|
var receivedAuthHeader string
|
||||||
|
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
receivedAuthHeader = r.Header.Get("Authorization")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer testServer.Close()
|
||||||
|
|
||||||
|
proxyURL, _ := url.Parse(testServer.URL)
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: testServer.URL,
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
ApiKey: "secret-api-key",
|
||||||
|
Models: []string{"test-model"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
|
err = pm.ProxyRequest("test-model", w, req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "Bearer secret-api-key", receivedAuthHeader)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyRequest_NoApiKey(t *testing.T) {
|
||||||
|
// Create a test server that checks for the Authorization header
|
||||||
|
var receivedAuthHeader string
|
||||||
|
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
receivedAuthHeader = r.Header.Get("Authorization")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer testServer.Close()
|
||||||
|
|
||||||
|
proxyURL, _ := url.Parse(testServer.URL)
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: testServer.URL,
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
ApiKey: "", // No API key
|
||||||
|
Models: []string{"test-model"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
|
err = pm.ProxyRequest("test-model", w, req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Empty(t, receivedAuthHeader)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyRequest_HostHeaderSet(t *testing.T) {
|
||||||
|
// Create a test server that checks the Host header
|
||||||
|
var receivedHost string
|
||||||
|
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
receivedHost = r.Host
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer testServer.Close()
|
||||||
|
|
||||||
|
proxyURL, _ := url.Parse(testServer.URL)
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: testServer.URL,
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
Models: []string{"test-model"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
|
err = pm.ProxyRequest("test-model", w, req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
// The Host header should be set to the target URL's host
|
||||||
|
assert.True(t, strings.HasPrefix(receivedHost, "127.0.0.1:"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyRequest_SSEHeaderModification(t *testing.T) {
|
||||||
|
// Create a test server that returns SSE content type
|
||||||
|
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/event-stream")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer testServer.Close()
|
||||||
|
|
||||||
|
proxyURL, _ := url.Parse(testServer.URL)
|
||||||
|
peers := config.PeerDictionaryConfig{
|
||||||
|
"peer1": config.PeerConfig{
|
||||||
|
Proxy: testServer.URL,
|
||||||
|
ProxyURL: proxyURL,
|
||||||
|
Models: []string{"test-model"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm, err := NewPeerProxy(peers, testLogger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
|
err = pm.ProxyRequest("test-model", w, req)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
// The X-Accel-Buffering header should be set to "no" for SSE
|
||||||
|
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
||||||
|
}
|
||||||
@@ -414,6 +414,9 @@ func (p *Process) stopCommand() {
|
|||||||
stopStartTime := time.Now()
|
stopStartTime := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
p.proxyLogger.Debugf("<%s> stopCommand took %v", p.ID, time.Since(stopStartTime))
|
p.proxyLogger.Debugf("<%s> stopCommand took %v", p.ID, time.Since(stopStartTime))
|
||||||
|
|
||||||
|
// free the buffer in processLogger so the memory can be recovered
|
||||||
|
p.processLogger.Clear()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
p.cmdMutex.RLock()
|
p.cmdMutex.RLock()
|
||||||
@@ -646,6 +649,11 @@ func (p *Process) cmdStopUpstreamProcess() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Logger returns the logger for this process.
|
||||||
|
func (p *Process) Logger() *LogMonitor {
|
||||||
|
return p.processLogger
|
||||||
|
}
|
||||||
|
|
||||||
var loadingRemarks = []string{
|
var loadingRemarks = []string{
|
||||||
"Still faster than your last standup meeting...",
|
"Still faster than your last standup meeting...",
|
||||||
"Reticulating splines...",
|
"Reticulating splines...",
|
||||||
@@ -864,7 +872,6 @@ func (s *statusResponseWriter) WriteHeader(statusCode int) {
|
|||||||
s.Flush()
|
s.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add Flush method
|
|
||||||
func (s *statusResponseWriter) Flush() {
|
func (s *statusResponseWriter) Flush() {
|
||||||
if flusher, ok := s.writer.(http.Flusher); ok {
|
if flusher, ok := s.writer.(http.Flusher); ok {
|
||||||
flusher.Flush()
|
flusher.Flush()
|
||||||
|
|||||||
@@ -395,6 +395,10 @@ func TestProcess_StopImmediately(t *testing.T) {
|
|||||||
// Test that SIGKILL is sent when gracefulStopTimeout is reached and properly terminates
|
// Test that SIGKILL is sent when gracefulStopTimeout is reached and properly terminates
|
||||||
// the upstream command
|
// the upstream command
|
||||||
func TestProcess_ForceStopWithKill(t *testing.T) {
|
func TestProcess_ForceStopWithKill(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping slow test")
|
||||||
|
}
|
||||||
|
|
||||||
if runtime.GOOS == "windows" {
|
if runtime.GOOS == "windows" {
|
||||||
t.Skip("skipping SIGTERM test on Windows ")
|
t.Skip("skipping SIGTERM test on Windows ")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,7 +46,8 @@ func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, u
|
|||||||
// Create a Process for each member in the group
|
// Create a Process for each member in the group
|
||||||
for _, modelID := range groupConfig.Members {
|
for _, modelID := range groupConfig.Members {
|
||||||
modelConfig, modelID, _ := pg.config.FindConfig(modelID)
|
modelConfig, modelID, _ := pg.config.FindConfig(modelID)
|
||||||
process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, pg.upstreamLogger, pg.proxyLogger)
|
processLogger := NewLogMonitorWriter(upstreamLogger)
|
||||||
|
process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger)
|
||||||
pg.processes[modelID] = process
|
pg.processes[modelID] = process
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,6 +89,13 @@ func (pg *ProcessGroup) HasMember(modelName string) bool {
|
|||||||
return slices.Contains(pg.config.Groups[pg.id].Members, modelName)
|
return slices.Contains(pg.config.Groups[pg.id].Members, modelName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pg *ProcessGroup) GetMember(modelName string) (*Process, bool) {
|
||||||
|
if pg.HasMember(modelName) {
|
||||||
|
return pg.processes[modelName], true
|
||||||
|
}
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
func (pg *ProcessGroup) StopProcess(modelID string, strategy StopStrategy) error {
|
func (pg *ProcessGroup) StopProcess(modelID string, strategy StopStrategy) error {
|
||||||
pg.Lock()
|
pg.Lock()
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,10 @@ func TestProcessGroup_HasMember(t *testing.T) {
|
|||||||
// TestProcessGroup_ProxyRequestSwapIsTrueParallel tests that when swap is true
|
// TestProcessGroup_ProxyRequestSwapIsTrueParallel tests that when swap is true
|
||||||
// and multiple requests are made in parallel, only one process is running at a time.
|
// and multiple requests are made in parallel, only one process is running at a time.
|
||||||
func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) {
|
func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping slow test")
|
||||||
|
}
|
||||||
|
|
||||||
var processGroupTestConfig = config.AddDefaultGroupToConfig(config.Config{
|
var processGroupTestConfig = config.AddDefaultGroupToConfig(config.Config{
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
Models: map[string]config.ModelConfig{
|
Models: map[string]config.ModelConfig{
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package proxy
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"mime/multipart"
|
"mime/multipart"
|
||||||
@@ -50,19 +51,42 @@ type ProxyManager struct {
|
|||||||
buildDate string
|
buildDate string
|
||||||
commit string
|
commit string
|
||||||
version string
|
version string
|
||||||
|
|
||||||
|
// peer proxy see: #296, #433
|
||||||
|
peerProxy *PeerProxy
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(config config.Config) *ProxyManager {
|
func New(proxyConfig config.Config) *ProxyManager {
|
||||||
// set up loggers
|
// set up loggers
|
||||||
stdoutLogger := NewLogMonitorWriter(os.Stdout)
|
|
||||||
upstreamLogger := NewLogMonitorWriter(stdoutLogger)
|
|
||||||
proxyLogger := NewLogMonitorWriter(stdoutLogger)
|
|
||||||
|
|
||||||
if config.LogRequests {
|
var muxLogger, upstreamLogger, proxyLogger *LogMonitor
|
||||||
|
switch proxyConfig.LogToStdout {
|
||||||
|
case config.LogToStdoutNone:
|
||||||
|
muxLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
case config.LogToStdoutBoth:
|
||||||
|
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
case config.LogToStdoutUpstream:
|
||||||
|
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
proxyLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
default:
|
||||||
|
// same as config.LogToStdoutProxy
|
||||||
|
// helpful because some old tests create a config.Config directly and it
|
||||||
|
// may not have LogToStdout set explicitly
|
||||||
|
muxLogger = NewLogMonitorWriter(os.Stdout)
|
||||||
|
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
||||||
|
proxyLogger = NewLogMonitorWriter(muxLogger)
|
||||||
|
}
|
||||||
|
|
||||||
|
if proxyConfig.LogRequests {
|
||||||
proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.")
|
proxyLogger.Warn("LogRequests configuration is deprecated. Use logLevel instead.")
|
||||||
}
|
}
|
||||||
|
|
||||||
switch strings.ToLower(strings.TrimSpace(config.LogLevel)) {
|
switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
|
||||||
case "debug":
|
case "debug":
|
||||||
proxyLogger.SetLogLevel(LevelDebug)
|
proxyLogger.SetLogLevel(LevelDebug)
|
||||||
upstreamLogger.SetLogLevel(LevelDebug)
|
upstreamLogger.SetLogLevel(LevelDebug)
|
||||||
@@ -99,7 +123,7 @@ func New(config config.Config) *ProxyManager {
|
|||||||
"stampnano": time.StampNano,
|
"stampnano": time.StampNano,
|
||||||
}
|
}
|
||||||
|
|
||||||
if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(config.LogTimeFormat))]; ok {
|
if timeFormat, ok := timeFormats[strings.ToLower(strings.TrimSpace(proxyConfig.LogTimeFormat))]; ok {
|
||||||
proxyLogger.SetLogTimeFormat(timeFormat)
|
proxyLogger.SetLogTimeFormat(timeFormat)
|
||||||
upstreamLogger.SetLogTimeFormat(timeFormat)
|
upstreamLogger.SetLogTimeFormat(timeFormat)
|
||||||
}
|
}
|
||||||
@@ -107,18 +131,24 @@ func New(config config.Config) *ProxyManager {
|
|||||||
shutdownCtx, shutdownCancel := context.WithCancel(context.Background())
|
shutdownCtx, shutdownCancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
var maxMetrics int
|
var maxMetrics int
|
||||||
if config.MetricsMaxInMemory <= 0 {
|
if proxyConfig.MetricsMaxInMemory <= 0 {
|
||||||
maxMetrics = 1000 // Default fallback
|
maxMetrics = 1000 // Default fallback
|
||||||
} else {
|
} else {
|
||||||
maxMetrics = config.MetricsMaxInMemory
|
maxMetrics = proxyConfig.MetricsMaxInMemory
|
||||||
|
}
|
||||||
|
|
||||||
|
peerProxy, err := NewPeerProxy(proxyConfig.Peers, proxyLogger)
|
||||||
|
if err != nil {
|
||||||
|
proxyLogger.Errorf("Disabling Peering. Failed to create proxy peers: %v", err)
|
||||||
|
peerProxy = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
pm := &ProxyManager{
|
pm := &ProxyManager{
|
||||||
config: config,
|
config: proxyConfig,
|
||||||
ginEngine: gin.New(),
|
ginEngine: gin.New(),
|
||||||
|
|
||||||
proxyLogger: proxyLogger,
|
proxyLogger: proxyLogger,
|
||||||
muxLogger: stdoutLogger,
|
muxLogger: muxLogger,
|
||||||
upstreamLogger: upstreamLogger,
|
upstreamLogger: upstreamLogger,
|
||||||
|
|
||||||
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
|
metricsMonitor: newMetricsMonitor(proxyLogger, maxMetrics),
|
||||||
@@ -131,37 +161,46 @@ func New(config config.Config) *ProxyManager {
|
|||||||
buildDate: "unknown",
|
buildDate: "unknown",
|
||||||
commit: "abcd1234",
|
commit: "abcd1234",
|
||||||
version: "0",
|
version: "0",
|
||||||
|
|
||||||
|
peerProxy: peerProxy,
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the process groups
|
// create the process groups
|
||||||
for groupID := range config.Groups {
|
for groupID := range proxyConfig.Groups {
|
||||||
processGroup := NewProcessGroup(groupID, config, proxyLogger, upstreamLogger)
|
processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger)
|
||||||
pm.processGroups[groupID] = processGroup
|
pm.processGroups[groupID] = processGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
pm.setupGinEngine()
|
pm.setupGinEngine()
|
||||||
|
|
||||||
// run any startup hooks
|
// run any startup hooks
|
||||||
if len(config.Hooks.OnStartup.Preload) > 0 {
|
if len(proxyConfig.Hooks.OnStartup.Preload) > 0 {
|
||||||
// do it in the background, don't block startup -- not sure if good idea yet
|
// do it in the background, don't block startup -- not sure if good idea yet
|
||||||
go func() {
|
go func() {
|
||||||
discardWriter := &DiscardWriter{}
|
discardWriter := &DiscardWriter{}
|
||||||
for _, realModelName := range config.Hooks.OnStartup.Preload {
|
for _, preloadModelName := range proxyConfig.Hooks.OnStartup.Preload {
|
||||||
proxyLogger.Infof("Preloading model: %s", realModelName)
|
modelID, ok := proxyConfig.RealModelName(preloadModelName)
|
||||||
processGroup, _, err := pm.swapProcessGroup(realModelName)
|
|
||||||
|
if !ok {
|
||||||
|
proxyLogger.Warnf("Preload model %s not found in config", preloadModelName)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
proxyLogger.Infof("Preloading model: %s", modelID)
|
||||||
|
processGroup, err := pm.swapProcessGroup(modelID)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
event.Emit(ModelPreloadedEvent{
|
event.Emit(ModelPreloadedEvent{
|
||||||
ModelName: realModelName,
|
ModelName: modelID,
|
||||||
Success: false,
|
Success: false,
|
||||||
})
|
})
|
||||||
proxyLogger.Errorf("Failed to preload model %s: %v", realModelName, err)
|
proxyLogger.Errorf("Failed to preload model %s: %v", modelID, err)
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
req, _ := http.NewRequest("GET", "/", nil)
|
req, _ := http.NewRequest("GET", "/", nil)
|
||||||
processGroup.ProxyRequest(realModelName, discardWriter, req)
|
processGroup.ProxyRequest(modelID, discardWriter, req)
|
||||||
event.Emit(ModelPreloadedEvent{
|
event.Emit(ModelPreloadedEvent{
|
||||||
ModelName: realModelName,
|
ModelName: modelID,
|
||||||
Success: true,
|
Success: true,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -236,37 +275,44 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
// Set up routes using the Gin engine
|
// Set up routes using the Gin engine
|
||||||
pm.ginEngine.POST("/v1/chat/completions", pm.proxyInferenceHandler)
|
// Protected routes use pm.apiKeyAuth() middleware
|
||||||
|
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
// Support legacy /v1/completions api, see issue #12
|
// Support legacy /v1/completions api, see issue #12
|
||||||
pm.ginEngine.POST("/v1/completions", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
||||||
pm.ginEngine.POST("/v1/messages", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
// Support anthropic count_tokens API (Also added in the above PR)
|
||||||
|
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// Support embeddings and reranking
|
// Support embeddings and reranking
|
||||||
pm.ginEngine.POST("/v1/embeddings", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /reranking endpoint + aliases
|
// llama-server's /reranking endpoint + aliases
|
||||||
pm.ginEngine.POST("/reranking", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/rerank", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/rerank", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/reranking", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /infill endpoint for code infilling
|
// llama-server's /infill endpoint for code infilling
|
||||||
pm.ginEngine.POST("/infill", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// llama-server's /completion endpoint
|
// llama-server's /completion endpoint
|
||||||
pm.ginEngine.POST("/completion", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
|
||||||
// Support audio/speech endpoint
|
// Support audio/speech endpoint
|
||||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
|
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler)
|
||||||
|
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.proxyInferenceHandler)
|
||||||
|
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.proxyOAIPostFormHandler)
|
||||||
|
|
||||||
pm.ginEngine.GET("/v1/models", pm.listModelsHandler)
|
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
|
||||||
|
|
||||||
// in proxymanager_loghandlers.go
|
// in proxymanager_loghandlers.go
|
||||||
pm.ginEngine.GET("/logs", pm.sendLogsHandlers)
|
pm.ginEngine.GET("/logs", pm.apiKeyAuth(), pm.sendLogsHandlers)
|
||||||
pm.ginEngine.GET("/logs/stream", pm.streamLogsHandler)
|
pm.ginEngine.GET("/logs/stream", pm.apiKeyAuth(), pm.streamLogsHandler)
|
||||||
pm.ginEngine.GET("/logs/stream/:logMonitorID", pm.streamLogsHandler)
|
pm.ginEngine.GET("/logs/stream/*logMonitorID", pm.apiKeyAuth(), pm.streamLogsHandler)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User Interface Endpoints
|
* User Interface Endpoints
|
||||||
@@ -278,9 +324,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
pm.ginEngine.GET("/upstream", func(c *gin.Context) {
|
pm.ginEngine.GET("/upstream", func(c *gin.Context) {
|
||||||
c.Redirect(http.StatusFound, "/ui/models")
|
c.Redirect(http.StatusFound, "/ui/models")
|
||||||
})
|
})
|
||||||
pm.ginEngine.Any("/upstream/*upstreamPath", pm.proxyToUpstream)
|
pm.ginEngine.Any("/upstream/*upstreamPath", pm.apiKeyAuth(), pm.proxyToUpstream)
|
||||||
pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler)
|
pm.ginEngine.GET("/unload", pm.apiKeyAuth(), pm.unloadAllModelsHandler)
|
||||||
pm.ginEngine.GET("/running", pm.listRunningProcessesHandler)
|
pm.ginEngine.GET("/running", pm.apiKeyAuth(), pm.listRunningProcessesHandler)
|
||||||
pm.ginEngine.GET("/health", func(c *gin.Context) {
|
pm.ginEngine.GET("/health", func(c *gin.Context) {
|
||||||
c.String(http.StatusOK, "OK")
|
c.String(http.StatusOK, "OK")
|
||||||
})
|
})
|
||||||
@@ -378,16 +424,10 @@ func (pm *ProxyManager) Shutdown() {
|
|||||||
pm.shutdownCancel()
|
pm.shutdownCancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) swapProcessGroup(requestedModel string) (*ProcessGroup, string, error) {
|
func (pm *ProxyManager) swapProcessGroup(realModelName string) (*ProcessGroup, error) {
|
||||||
// de-alias the real model name and get a real one
|
|
||||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
|
||||||
if !found {
|
|
||||||
return nil, realModelName, fmt.Errorf("could not find real modelID for %s", requestedModel)
|
|
||||||
}
|
|
||||||
|
|
||||||
processGroup := pm.findGroupByModelName(realModelName)
|
processGroup := pm.findGroupByModelName(realModelName)
|
||||||
if processGroup == nil {
|
if processGroup == nil {
|
||||||
return nil, realModelName, fmt.Errorf("could not find process group for model %s", requestedModel)
|
return nil, fmt.Errorf("could not find process group for model %s", realModelName)
|
||||||
}
|
}
|
||||||
|
|
||||||
if processGroup.exclusive {
|
if processGroup.exclusive {
|
||||||
@@ -399,54 +439,71 @@ func (pm *ProxyManager) swapProcessGroup(requestedModel string) (*ProcessGroup,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return processGroup, realModelName, nil
|
return processGroup, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
|
func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
|
||||||
data := make([]gin.H, 0, len(pm.config.Models))
|
data := make([]gin.H, 0, len(pm.config.Models))
|
||||||
createdTime := time.Now().Unix()
|
createdTime := time.Now().Unix()
|
||||||
|
|
||||||
|
newRecord := func(modelId string, modelConfig config.ModelConfig) gin.H {
|
||||||
|
record := gin.H{
|
||||||
|
"id": modelId,
|
||||||
|
"object": "model",
|
||||||
|
"created": createdTime,
|
||||||
|
"owned_by": "llama-swap",
|
||||||
|
}
|
||||||
|
|
||||||
|
if name := strings.TrimSpace(modelConfig.Name); name != "" {
|
||||||
|
record["name"] = name
|
||||||
|
}
|
||||||
|
if desc := strings.TrimSpace(modelConfig.Description); desc != "" {
|
||||||
|
record["description"] = desc
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add metadata if present
|
||||||
|
if len(modelConfig.Metadata) > 0 {
|
||||||
|
record["meta"] = gin.H{
|
||||||
|
"llamaswap": modelConfig.Metadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
for id, modelConfig := range pm.config.Models {
|
for id, modelConfig := range pm.config.Models {
|
||||||
if modelConfig.Unlisted {
|
if modelConfig.Unlisted {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
newRecord := func(modelId string) gin.H {
|
data = append(data, newRecord(id, modelConfig))
|
||||||
record := gin.H{
|
|
||||||
"id": modelId,
|
|
||||||
"object": "model",
|
|
||||||
"created": createdTime,
|
|
||||||
"owned_by": "llama-swap",
|
|
||||||
}
|
|
||||||
|
|
||||||
if name := strings.TrimSpace(modelConfig.Name); name != "" {
|
|
||||||
record["name"] = name
|
|
||||||
}
|
|
||||||
if desc := strings.TrimSpace(modelConfig.Description); desc != "" {
|
|
||||||
record["description"] = desc
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add metadata if present
|
|
||||||
if len(modelConfig.Metadata) > 0 {
|
|
||||||
record["meta"] = gin.H{
|
|
||||||
"llamaswap": modelConfig.Metadata,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return record
|
|
||||||
}
|
|
||||||
|
|
||||||
data = append(data, newRecord(id))
|
|
||||||
|
|
||||||
// Include aliases
|
// Include aliases
|
||||||
if pm.config.IncludeAliasesInList {
|
if pm.config.IncludeAliasesInList {
|
||||||
for _, alias := range modelConfig.Aliases {
|
for _, alias := range modelConfig.Aliases {
|
||||||
if alias := strings.TrimSpace(alias); alias != "" {
|
if alias := strings.TrimSpace(alias); alias != "" {
|
||||||
data = append(data, newRecord(alias))
|
data = append(data, newRecord(alias, modelConfig))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if pm.peerProxy != nil {
|
||||||
|
for peerID, peer := range pm.peerProxy.ListPeers() {
|
||||||
|
// add peer models
|
||||||
|
for _, modelID := range peer.Models {
|
||||||
|
// Skip unlisted models if not showing them
|
||||||
|
record := newRecord(modelID, config.ModelConfig{
|
||||||
|
Name: fmt.Sprintf("%s: %s", peerID, modelID),
|
||||||
|
Metadata: map[string]any{
|
||||||
|
"peerID": peerID,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
data = append(data, record)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sort by the "id" key
|
// Sort by the "id" key
|
||||||
sort.Slice(data, func(i, j int) bool {
|
sort.Slice(data, func(i, j int) bool {
|
||||||
si, _ := data[i]["id"].(string)
|
si, _ := data[i]["id"].(string)
|
||||||
@@ -466,62 +523,61 @@ func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
// findModelInPath searches for a valid model name in a path with slashes.
|
||||||
upstreamPath := c.Param("upstreamPath")
|
// It iteratively builds up path segments until it finds a matching model.
|
||||||
|
// Returns: (searchModelName, realModelName, remainingPath, found)
|
||||||
// split the upstream path by / and search for the model name
|
// Example: "/author/model/endpoint" with model "author/model" -> ("author/model", "author/model", "/endpoint", true)
|
||||||
parts := strings.Split(strings.TrimSpace(upstreamPath), "/")
|
func (pm *ProxyManager) findModelInPath(path string) (searchName string, realName string, remainingPath string, found bool) {
|
||||||
if len(parts) == 0 {
|
parts := strings.Split(strings.TrimSpace(path), "/")
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
modelFound := false
|
|
||||||
searchModelName := ""
|
searchModelName := ""
|
||||||
var modelName, remainingPath string
|
|
||||||
for i, part := range parts {
|
for i, part := range parts {
|
||||||
if parts[i] == "" {
|
if part == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if searchModelName == "" {
|
if searchModelName == "" {
|
||||||
searchModelName = part
|
searchModelName = part
|
||||||
} else {
|
} else {
|
||||||
searchModelName = searchModelName + "/" + parts[i]
|
searchModelName = searchModelName + "/" + part
|
||||||
}
|
}
|
||||||
|
|
||||||
if real, ok := pm.config.RealModelName(searchModelName); ok {
|
if modelID, ok := pm.config.RealModelName(searchModelName); ok {
|
||||||
modelName = real
|
return searchModelName, modelID, "/" + strings.Join(parts[i+1:], "/"), true
|
||||||
remainingPath = "/" + strings.Join(parts[i+1:], "/")
|
|
||||||
modelFound = true
|
|
||||||
|
|
||||||
// Check if this is exactly a model name with no additional path
|
|
||||||
// and doesn't end with a trailing slash
|
|
||||||
if remainingPath == "/" && !strings.HasSuffix(upstreamPath, "/") {
|
|
||||||
// Build new URL with query parameters preserved
|
|
||||||
newPath := "/upstream/" + searchModelName + "/"
|
|
||||||
if c.Request.URL.RawQuery != "" {
|
|
||||||
newPath += "?" + c.Request.URL.RawQuery
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use 308 for non-GET/HEAD requests to preserve method
|
|
||||||
if c.Request.Method == http.MethodGet || c.Request.Method == http.MethodHead {
|
|
||||||
c.Redirect(http.StatusMovedPermanently, newPath)
|
|
||||||
} else {
|
|
||||||
c.Redirect(http.StatusPermanentRedirect, newPath)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return "", "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
||||||
|
upstreamPath := c.Param("upstreamPath")
|
||||||
|
|
||||||
|
searchModelName, modelID, remainingPath, modelFound := pm.findModelInPath(upstreamPath)
|
||||||
|
|
||||||
if !modelFound {
|
if !modelFound {
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
|
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
processGroup, realModelName, err := pm.swapProcessGroup(modelName)
|
// Redirect /upstream/modelname to /upstream/modelname/ for URL consistency.
|
||||||
|
// This ensures relative URLs in upstream responses resolve correctly and
|
||||||
|
// provides canonical URL form. Uses 308 for POST/PUT/etc to preserve the
|
||||||
|
// HTTP method (301 would downgrade to GET).
|
||||||
|
if remainingPath == "/" && !strings.HasSuffix(upstreamPath, "/") {
|
||||||
|
newPath := "/upstream/" + searchModelName + "/"
|
||||||
|
if c.Request.URL.RawQuery != "" {
|
||||||
|
newPath += "?" + c.Request.URL.RawQuery
|
||||||
|
}
|
||||||
|
if c.Request.Method == http.MethodGet || c.Request.Method == http.MethodHead {
|
||||||
|
c.Redirect(http.StatusMovedPermanently, newPath)
|
||||||
|
} else {
|
||||||
|
c.Redirect(http.StatusPermanentRedirect, newPath)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
processGroup, err := pm.swapProcessGroup(modelID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
||||||
return
|
return
|
||||||
@@ -533,15 +589,15 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
|||||||
|
|
||||||
// attempt to record metrics if it is a POST request
|
// attempt to record metrics if it is a POST request
|
||||||
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
||||||
if err := pm.metricsMonitor.wrapHandler(realModelName, c.Writer, c.Request, processGroup.ProxyRequest); err != nil {
|
if err := pm.metricsMonitor.wrapHandler(modelID, c.Writer, c.Request, processGroup.ProxyRequest); err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
||||||
pm.proxyLogger.Errorf("Error proxying wrapped upstream request for model %s, path=%s", realModelName, originalPath)
|
pm.proxyLogger.Errorf("Error proxying wrapped upstream request for model %s, path=%s", modelID, originalPath)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if err := processGroup.ProxyRequest(realModelName, c.Writer, c.Request); err != nil {
|
if err := processGroup.ProxyRequest(modelID, c.Writer, c.Request); err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
||||||
pm.proxyLogger.Errorf("Error proxying upstream request for model %s, path=%s", realModelName, originalPath)
|
pm.proxyLogger.Errorf("Error proxying upstream request for model %s, path=%s", modelID, originalPath)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -560,41 +616,90 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
// Look for a matching local model first
|
||||||
if !found {
|
var nextHandler func(modelID string, w http.ResponseWriter, r *http.Request) error
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
processGroup, _, err := pm.swapProcessGroup(realModelName)
|
modelID, found := pm.config.RealModelName(requestedModel)
|
||||||
if err != nil {
|
if found {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
processGroup, err := pm.swapProcessGroup(modelID)
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// issue #69 allow custom model names to be sent to upstream
|
|
||||||
useModelName := pm.config.Models[realModelName].UseModelName
|
|
||||||
if useModelName != "" {
|
|
||||||
bodyBytes, err = sjson.SetBytes(bodyBytes, "model", useModelName)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error rewriting model name in JSON: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// issue #174 strip parameters from the JSON body
|
// issue #69 allow custom model names to be sent to upstream
|
||||||
stripParams, err := pm.config.Models[realModelName].Filters.SanitizedStripParams()
|
useModelName := pm.config.Models[modelID].UseModelName
|
||||||
if err != nil { // just log it and continue
|
if useModelName != "" {
|
||||||
pm.proxyLogger.Errorf("Error sanitizing strip params string: %s, %s", pm.config.Models[realModelName].Filters.StripParams, err.Error())
|
bodyBytes, err = sjson.SetBytes(bodyBytes, "model", useModelName)
|
||||||
} else {
|
|
||||||
for _, param := range stripParams {
|
|
||||||
pm.proxyLogger.Debugf("<%s> stripping param: %s", realModelName, param)
|
|
||||||
bodyBytes, err = sjson.DeleteBytes(bodyBytes, param)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error deleting parameter %s from request", param))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error rewriting model name in JSON: %s", err.Error()))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// issue #174 strip parameters from the JSON body
|
||||||
|
stripParams, err := pm.config.Models[modelID].Filters.SanitizedStripParams()
|
||||||
|
if err != nil { // just log it and continue
|
||||||
|
pm.proxyLogger.Errorf("Error sanitizing strip params string: %s, %s", pm.config.Models[modelID].Filters.StripParams, err.Error())
|
||||||
|
} else {
|
||||||
|
for _, param := range stripParams {
|
||||||
|
pm.proxyLogger.Debugf("<%s> stripping param: %s", modelID, param)
|
||||||
|
bodyBytes, err = sjson.DeleteBytes(bodyBytes, param)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error deleting parameter %s from request", param))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// issue #453 set/override parameters in the JSON body
|
||||||
|
setParams, setParamKeys := pm.config.Models[modelID].Filters.SanitizedSetParams()
|
||||||
|
for _, key := range setParamKeys {
|
||||||
|
pm.proxyLogger.Debugf("<%s> setting param: %s", modelID, key)
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParams[key])
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
||||||
|
nextHandler = processGroup.ProxyRequest
|
||||||
|
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
||||||
|
pm.proxyLogger.Debugf("ProxyManager using ProxyPeer for model: %s", requestedModel)
|
||||||
|
modelID = requestedModel
|
||||||
|
|
||||||
|
// issue #453 apply filters for peer requests
|
||||||
|
peerFilters := pm.peerProxy.GetPeerFilters(requestedModel)
|
||||||
|
|
||||||
|
// Apply stripParams - remove specified parameters from request
|
||||||
|
stripParams := peerFilters.SanitizedStripParams()
|
||||||
|
for _, param := range stripParams {
|
||||||
|
pm.proxyLogger.Debugf("<%s> stripping param: %s", requestedModel, param)
|
||||||
|
bodyBytes, err = sjson.DeleteBytes(bodyBytes, param)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error stripping parameter %s from request", param))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply setParams - set/override specified parameters in request
|
||||||
|
setParams, setParamKeys := peerFilters.SanitizedSetParams()
|
||||||
|
for _, key := range setParamKeys {
|
||||||
|
pm.proxyLogger.Debugf("<%s> setting param: %s", requestedModel, key)
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParams[key])
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nextHandler = pm.peerProxy.ProxyRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
if nextHandler == nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find suitable inference handler for %s", requestedModel))
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||||
@@ -607,19 +712,19 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
|||||||
// issue #366 extract values that downstream handlers may need
|
// issue #366 extract values that downstream handlers may need
|
||||||
isStreaming := gjson.GetBytes(bodyBytes, "stream").Bool()
|
isStreaming := gjson.GetBytes(bodyBytes, "stream").Bool()
|
||||||
ctx := context.WithValue(c.Request.Context(), proxyCtxKey("streaming"), isStreaming)
|
ctx := context.WithValue(c.Request.Context(), proxyCtxKey("streaming"), isStreaming)
|
||||||
ctx = context.WithValue(ctx, proxyCtxKey("model"), realModelName)
|
ctx = context.WithValue(ctx, proxyCtxKey("model"), modelID)
|
||||||
c.Request = c.Request.WithContext(ctx)
|
c.Request = c.Request.WithContext(ctx)
|
||||||
|
|
||||||
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
||||||
if err := pm.metricsMonitor.wrapHandler(realModelName, c.Writer, c.Request, processGroup.ProxyRequest); err != nil {
|
if err := pm.metricsMonitor.wrapHandler(modelID, c.Writer, c.Request, nextHandler); err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
||||||
pm.proxyLogger.Errorf("Error Proxying Metrics Wrapped Request for processGroup %s and model %s", processGroup.id, realModelName)
|
pm.proxyLogger.Errorf("Error Proxying Metrics Wrapped Request model %s", modelID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if err := processGroup.ProxyRequest(realModelName, c.Writer, c.Request); err != nil {
|
if err := nextHandler(modelID, c.Writer, c.Request); err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
||||||
pm.proxyLogger.Errorf("Error Proxying Request for processGroup %s and model %s", processGroup.id, realModelName)
|
pm.proxyLogger.Errorf("Error Proxying Request for model %s", modelID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -639,7 +744,13 @@ func (pm *ProxyManager) proxyOAIPostFormHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
processGroup, realModelName, err := pm.swapProcessGroup(requestedModel)
|
modelID, found := pm.config.RealModelName(requestedModel)
|
||||||
|
if !found {
|
||||||
|
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find real modelID for %s", requestedModel))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
processGroup, err := pm.swapProcessGroup(modelID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
||||||
return
|
return
|
||||||
@@ -657,7 +768,7 @@ func (pm *ProxyManager) proxyOAIPostFormHandler(c *gin.Context) {
|
|||||||
// If this is the model field and we have a profile, use just the model name
|
// If this is the model field and we have a profile, use just the model name
|
||||||
if key == "model" {
|
if key == "model" {
|
||||||
// # issue #69 allow custom model names to be sent to upstream
|
// # issue #69 allow custom model names to be sent to upstream
|
||||||
useModelName := pm.config.Models[realModelName].UseModelName
|
useModelName := pm.config.Models[modelID].UseModelName
|
||||||
|
|
||||||
if useModelName != "" {
|
if useModelName != "" {
|
||||||
fieldValue = useModelName
|
fieldValue = useModelName
|
||||||
@@ -728,9 +839,9 @@ func (pm *ProxyManager) proxyOAIPostFormHandler(c *gin.Context) {
|
|||||||
modifiedReq.ContentLength = int64(requestBuffer.Len())
|
modifiedReq.ContentLength = int64(requestBuffer.Len())
|
||||||
|
|
||||||
// Use the modified request for proxying
|
// Use the modified request for proxying
|
||||||
if err := processGroup.ProxyRequest(realModelName, c.Writer, modifiedReq); err != nil {
|
if err := processGroup.ProxyRequest(modelID, c.Writer, modifiedReq); err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
||||||
pm.proxyLogger.Errorf("Error Proxying Request for processGroup %s and model %s", processGroup.id, realModelName)
|
pm.proxyLogger.Errorf("Error Proxying Request for processGroup %s and model %s", processGroup.id, modelID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -745,6 +856,67 @@ func (pm *ProxyManager) sendErrorResponse(c *gin.Context, statusCode int, messag
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// apiKeyAuth returns a middleware that validates API keys if configured.
|
||||||
|
// Returns a pass-through handler if no API keys are configured.
|
||||||
|
func (pm *ProxyManager) apiKeyAuth() gin.HandlerFunc {
|
||||||
|
if len(pm.config.RequiredAPIKeys) == 0 {
|
||||||
|
return func(c *gin.Context) { c.Next() }
|
||||||
|
}
|
||||||
|
|
||||||
|
return func(c *gin.Context) {
|
||||||
|
xApiKey := c.GetHeader("x-api-key")
|
||||||
|
|
||||||
|
var bearerKey string
|
||||||
|
var basicKey string
|
||||||
|
if auth := c.GetHeader("Authorization"); auth != "" {
|
||||||
|
if strings.HasPrefix(auth, "Bearer ") {
|
||||||
|
bearerKey = strings.TrimPrefix(auth, "Bearer ")
|
||||||
|
} else if strings.HasPrefix(auth, "Basic ") {
|
||||||
|
// Basic Auth: base64(username:password), password is the API key
|
||||||
|
encoded := strings.TrimPrefix(auth, "Basic ")
|
||||||
|
if decoded, err := base64.StdEncoding.DecodeString(encoded); err == nil {
|
||||||
|
parts := strings.SplitN(string(decoded), ":", 2)
|
||||||
|
if len(parts) == 2 {
|
||||||
|
basicKey = parts[1] // password is the API key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use first key found: Basic, then Bearer, then x-api-key
|
||||||
|
var providedKey string
|
||||||
|
if basicKey != "" {
|
||||||
|
providedKey = basicKey
|
||||||
|
} else if bearerKey != "" {
|
||||||
|
providedKey = bearerKey
|
||||||
|
} else {
|
||||||
|
providedKey = xApiKey
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate key
|
||||||
|
valid := false
|
||||||
|
for _, key := range pm.config.RequiredAPIKeys {
|
||||||
|
if providedKey == key {
|
||||||
|
valid = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !valid {
|
||||||
|
c.Header("WWW-Authenticate", `Basic realm="llama-swap"`)
|
||||||
|
pm.sendErrorResponse(c, http.StatusUnauthorized, "unauthorized: invalid or missing API key")
|
||||||
|
c.Abort()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip auth headers to prevent leakage to upstream
|
||||||
|
c.Request.Header.Del("Authorization")
|
||||||
|
c.Request.Header.Del("x-api-key")
|
||||||
|
|
||||||
|
c.Next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) unloadAllModelsHandler(c *gin.Context) {
|
func (pm *ProxyManager) unloadAllModelsHandler(c *gin.Context) {
|
||||||
pm.StopProcesses(StopImmediately)
|
pm.StopProcesses(StopImmediately)
|
||||||
c.String(http.StatusOK, "OK")
|
c.String(http.StatusOK, "OK")
|
||||||
@@ -758,8 +930,13 @@ func (pm *ProxyManager) listRunningProcessesHandler(context *gin.Context) {
|
|||||||
for _, process := range processGroup.processes {
|
for _, process := range processGroup.processes {
|
||||||
if process.CurrentState() == StateReady {
|
if process.CurrentState() == StateReady {
|
||||||
runningProcesses = append(runningProcesses, gin.H{
|
runningProcesses = append(runningProcesses, gin.H{
|
||||||
"model": process.ID,
|
"model": process.ID,
|
||||||
"state": process.state,
|
"state": process.state,
|
||||||
|
"cmd": process.config.Cmd,
|
||||||
|
"proxy": process.config.Proxy,
|
||||||
|
"ttl": process.config.UnloadAfter,
|
||||||
|
"name": process.config.Name,
|
||||||
|
"description": process.config.Description,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,11 +18,13 @@ type Model struct {
|
|||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
State string `json:"state"`
|
State string `json:"state"`
|
||||||
Unlisted bool `json:"unlisted"`
|
Unlisted bool `json:"unlisted"`
|
||||||
|
PeerID string `json:"peerID"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func addApiHandlers(pm *ProxyManager) {
|
func addApiHandlers(pm *ProxyManager) {
|
||||||
// Add API endpoints for React to consume
|
// Add API endpoints for React to consume
|
||||||
apiGroup := pm.ginEngine.Group("/api")
|
// Protected with API key authentication
|
||||||
|
apiGroup := pm.ginEngine.Group("/api", pm.apiKeyAuth())
|
||||||
{
|
{
|
||||||
apiGroup.POST("/models/unload", pm.apiUnloadAllModels)
|
apiGroup.POST("/models/unload", pm.apiUnloadAllModels)
|
||||||
apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
|
apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
|
||||||
@@ -82,6 +84,18 @@ func (pm *ProxyManager) getModelStatus() []Model {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Iterate over the peer models
|
||||||
|
if pm.peerProxy != nil {
|
||||||
|
for peerID, peer := range pm.peerProxy.ListPeers() {
|
||||||
|
for _, modelID := range peer.Models {
|
||||||
|
models = append(models, Model{
|
||||||
|
Id: modelID,
|
||||||
|
PeerID: peerID,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return models
|
return models
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
|
|||||||
// prevent nginx from buffering streamed logs
|
// prevent nginx from buffering streamed logs
|
||||||
c.Header("X-Accel-Buffering", "no")
|
c.Header("X-Accel-Buffering", "no")
|
||||||
|
|
||||||
logMonitorId := c.Param("logMonitorID")
|
logMonitorId := strings.TrimPrefix(c.Param("logMonitorID"), "/")
|
||||||
logger, err := pm.getLogger(logMonitorId)
|
logger, err := pm.getLogger(logMonitorId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.String(http.StatusBadRequest, err.Error())
|
c.String(http.StatusBadRequest, err.Error())
|
||||||
@@ -83,18 +83,25 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
|
|||||||
|
|
||||||
// getLogger searches for the appropriate logger based on the logMonitorId
|
// getLogger searches for the appropriate logger based on the logMonitorId
|
||||||
func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) {
|
func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) {
|
||||||
var logger *LogMonitor
|
switch logMonitorId {
|
||||||
|
case "":
|
||||||
if logMonitorId == "" {
|
|
||||||
// maintain the default
|
// maintain the default
|
||||||
logger = pm.muxLogger
|
return pm.muxLogger, nil
|
||||||
} else if logMonitorId == "proxy" {
|
case "proxy":
|
||||||
logger = pm.proxyLogger
|
return pm.proxyLogger, nil
|
||||||
} else if logMonitorId == "upstream" {
|
case "upstream":
|
||||||
logger = pm.upstreamLogger
|
return pm.upstreamLogger, nil
|
||||||
} else {
|
default:
|
||||||
return nil, fmt.Errorf("invalid logger. Use 'proxy' or 'upstream'")
|
// search for a models specific logger using findModelInPath
|
||||||
}
|
// to handle model names with slashes (e.g., "author/model")
|
||||||
|
if _, name, _, found := pm.findModelInPath("/" + logMonitorId); found {
|
||||||
|
for _, group := range pm.processGroups {
|
||||||
|
if process, found := group.GetMember(name); found {
|
||||||
|
return process.Logger(), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return logger, nil
|
return nil, fmt.Errorf("invalid logger. Use 'proxy', 'upstream' or a model's ID")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package proxy
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
@@ -36,10 +37,6 @@ func (r *TestResponseRecorder) CloseNotify() <-chan bool {
|
|||||||
return r.closeChannel
|
return r.closeChannel
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *TestResponseRecorder) closeClient() {
|
|
||||||
r.closeChannel <- true
|
|
||||||
}
|
|
||||||
|
|
||||||
func CreateTestResponseRecorder() *TestResponseRecorder {
|
func CreateTestResponseRecorder() *TestResponseRecorder {
|
||||||
return &TestResponseRecorder{
|
return &TestResponseRecorder{
|
||||||
httptest.NewRecorder(),
|
httptest.NewRecorder(),
|
||||||
@@ -223,17 +220,23 @@ func TestProxyManager_ListModelsHandler(t *testing.T) {
|
|||||||
model2Config.Name = " " // empty whitespace only strings will get ignored
|
model2Config.Name = " " // empty whitespace only strings will get ignored
|
||||||
model2Config.Description = " "
|
model2Config.Description = " "
|
||||||
|
|
||||||
config := config.Config{
|
cfg := config.Config{
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
Models: map[string]config.ModelConfig{
|
Models: map[string]config.ModelConfig{
|
||||||
"model1": model1Config,
|
"model1": model1Config,
|
||||||
"model2": model2Config,
|
"model2": model2Config,
|
||||||
"model3": getTestSimpleResponderConfig("model3"),
|
"model3": getTestSimpleResponderConfig("model3"),
|
||||||
},
|
},
|
||||||
|
Peers: map[string]config.PeerConfig{
|
||||||
|
"peer1": {
|
||||||
|
Proxy: "http://peer1:8080",
|
||||||
|
Models: []string{"peer-model-a", "peer-model-b"},
|
||||||
|
},
|
||||||
|
},
|
||||||
LogLevel: "error",
|
LogLevel: "error",
|
||||||
}
|
}
|
||||||
|
|
||||||
proxy := New(config)
|
proxy := New(cfg)
|
||||||
|
|
||||||
// Create a test request
|
// Create a test request
|
||||||
req := httptest.NewRequest("GET", "/v1/models", nil)
|
req := httptest.NewRequest("GET", "/v1/models", nil)
|
||||||
@@ -258,14 +261,16 @@ func TestProxyManager_ListModelsHandler(t *testing.T) {
|
|||||||
t.Fatalf("Failed to parse JSON response: %v", err)
|
t.Fatalf("Failed to parse JSON response: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the number of models returned
|
// Check the number of models returned (3 local + 2 peer models)
|
||||||
assert.Len(t, response.Data, 3)
|
assert.Len(t, response.Data, 5)
|
||||||
|
|
||||||
// Check the details of each model
|
// Check the details of each model
|
||||||
expectedModels := map[string]struct{}{
|
expectedModels := map[string]struct{}{
|
||||||
"model1": {},
|
"model1": {},
|
||||||
"model2": {},
|
"model2": {},
|
||||||
"model3": {},
|
"model3": {},
|
||||||
|
"peer-model-a": {},
|
||||||
|
"peer-model-b": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
// make all models
|
// make all models
|
||||||
@@ -296,6 +301,19 @@ func TestProxyManager_ListModelsHandler(t *testing.T) {
|
|||||||
description, ok := model["description"].(string)
|
description, ok := model["description"].(string)
|
||||||
assert.True(t, ok, "description should be a string")
|
assert.True(t, ok, "description should be a string")
|
||||||
assert.Equal(t, "Model 1 description is used for testing", description)
|
assert.Equal(t, "Model 1 description is used for testing", description)
|
||||||
|
} else if modelID == "peer-model-a" || modelID == "peer-model-b" {
|
||||||
|
// Peer models should have meta.llamaswap.peerID
|
||||||
|
meta, exists := model["meta"]
|
||||||
|
assert.True(t, exists, "peer model should have meta field")
|
||||||
|
metaMap, ok := meta.(map[string]interface{})
|
||||||
|
assert.True(t, ok, "meta should be a map")
|
||||||
|
llamaswap, exists := metaMap["llamaswap"]
|
||||||
|
assert.True(t, exists, "meta should have llamaswap field")
|
||||||
|
llamaswapMap, ok := llamaswap.(map[string]interface{})
|
||||||
|
assert.True(t, ok, "llamaswap should be a map")
|
||||||
|
peerID, exists := llamaswapMap["peerID"]
|
||||||
|
assert.True(t, exists, "llamaswap should have peerID field")
|
||||||
|
assert.Equal(t, "peer1", peerID)
|
||||||
} else {
|
} else {
|
||||||
_, exists := model["name"]
|
_, exists := model["name"]
|
||||||
assert.False(t, exists, "unexpected name field for model: %s", modelID)
|
assert.False(t, exists, "unexpected name field for model: %s", modelID)
|
||||||
@@ -502,6 +520,10 @@ func TestProxyManager_ListModelsHandler_IncludeAliasesInList(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestProxyManager_Shutdown(t *testing.T) {
|
func TestProxyManager_Shutdown(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping slow test")
|
||||||
|
}
|
||||||
|
|
||||||
// make broken model configurations
|
// make broken model configurations
|
||||||
model1Config := getTestSimpleResponderConfigPort("model1", 9991)
|
model1Config := getTestSimpleResponderConfigPort("model1", 9991)
|
||||||
model1Config.Proxy = "http://localhost:10001/"
|
model1Config.Proxy = "http://localhost:10001/"
|
||||||
@@ -650,8 +672,13 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
|
|||||||
// Define a helper struct to parse the JSON response.
|
// Define a helper struct to parse the JSON response.
|
||||||
type RunningResponse struct {
|
type RunningResponse struct {
|
||||||
Running []struct {
|
Running []struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
State string `json:"state"`
|
State string `json:"state"`
|
||||||
|
Cmd string `json:"cmd"`
|
||||||
|
Proxy string `json:"proxy"`
|
||||||
|
TTL int `json:"ttl"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Description string `json:"description"`
|
||||||
} `json:"running"`
|
} `json:"running"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -699,6 +726,11 @@ func TestProxyManager_RunningEndpoint(t *testing.T) {
|
|||||||
|
|
||||||
// Is the model loaded?
|
// Is the model loaded?
|
||||||
assert.Equal(t, "ready", response.Running[0].State)
|
assert.Equal(t, "ready", response.Running[0].State)
|
||||||
|
|
||||||
|
// Verify extended fields are present
|
||||||
|
assert.NotEmpty(t, response.Running[0].Cmd, "cmd should be populated")
|
||||||
|
assert.NotEmpty(t, response.Running[0].Proxy, "proxy should be populated")
|
||||||
|
assert.Equal(t, 0, response.Running[0].TTL, "ttl should default to 0")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -944,7 +976,9 @@ func TestProxyManager_ChatContentLength(t *testing.T) {
|
|||||||
func TestProxyManager_FiltersStripParams(t *testing.T) {
|
func TestProxyManager_FiltersStripParams(t *testing.T) {
|
||||||
modelConfig := getTestSimpleResponderConfig("model1")
|
modelConfig := getTestSimpleResponderConfig("model1")
|
||||||
modelConfig.Filters = config.ModelFilters{
|
modelConfig.Filters = config.ModelFilters{
|
||||||
StripParams: "temperature, model, stream",
|
Filters: config.Filters{
|
||||||
|
StripParams: "temperature, model, stream",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
config := config.AddDefaultGroupToConfig(config.Config{
|
config := config.AddDefaultGroupToConfig(config.Config{
|
||||||
@@ -1078,7 +1112,8 @@ func TestProxyManager_StreamingEndpointsReturnNoBufferingHeader(t *testing.T) {
|
|||||||
config := config.AddDefaultGroupToConfig(config.Config{
|
config := config.AddDefaultGroupToConfig(config.Config{
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
Models: map[string]config.ModelConfig{
|
Models: map[string]config.ModelConfig{
|
||||||
"model1": getTestSimpleResponderConfig("model1"),
|
"model1": getTestSimpleResponderConfig("model1"),
|
||||||
|
"author/model": getTestSimpleResponderConfig("author/model"),
|
||||||
},
|
},
|
||||||
LogLevel: "error",
|
LogLevel: "error",
|
||||||
})
|
})
|
||||||
@@ -1091,6 +1126,7 @@ func TestProxyManager_StreamingEndpointsReturnNoBufferingHeader(t *testing.T) {
|
|||||||
"/logs/stream",
|
"/logs/stream",
|
||||||
"/logs/stream/proxy",
|
"/logs/stream/proxy",
|
||||||
"/logs/stream/upstream",
|
"/logs/stream/upstream",
|
||||||
|
"/logs/stream/author/model",
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, endpoint := range endpoints {
|
for _, endpoint := range endpoints {
|
||||||
@@ -1185,3 +1221,349 @@ func TestProxyManager_ApiGetVersion(t *testing.T) {
|
|||||||
assert.Equal(t, value, response[key], "%s value %s should match response %s", key, value, response[key])
|
assert.Equal(t, value, response[key], "%s value %s should match response %s", key, value, response[key])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_APIKeyAuth(t *testing.T) {
|
||||||
|
testConfig := config.AddDefaultGroupToConfig(config.Config{
|
||||||
|
HealthCheckTimeout: 15,
|
||||||
|
Models: map[string]config.ModelConfig{
|
||||||
|
"model1": getTestSimpleResponderConfig("model1"),
|
||||||
|
},
|
||||||
|
RequiredAPIKeys: []string{"valid-key-1", "valid-key-2"},
|
||||||
|
LogLevel: "error",
|
||||||
|
})
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
t.Run("valid key in x-api-key header", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
req.Header.Set("x-api-key", "valid-key-1")
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("valid key in Authorization Bearer header", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
req.Header.Set("Authorization", "Bearer valid-key-2")
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("both headers with matching keys", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
req.Header.Set("x-api-key", "valid-key-1")
|
||||||
|
req.Header.Set("Authorization", "Bearer valid-key-1")
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid key returns 401", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
req.Header.Set("x-api-key", "invalid-key")
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "unauthorized")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("missing key returns 401", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, w.Code)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("valid key in Basic Auth header", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
// Basic Auth: base64("anyuser:valid-key-1")
|
||||||
|
credentials := base64.StdEncoding.EncodeToString([]byte("anyuser:valid-key-1"))
|
||||||
|
req.Header.Set("Authorization", "Basic "+credentials)
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid key in Basic Auth header returns 401", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
credentials := base64.StdEncoding.EncodeToString([]byte("anyuser:wrong-key"))
|
||||||
|
req.Header.Set("Authorization", "Basic "+credentials)
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "unauthorized")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("x-api-key and Basic Auth with matching keys", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
req.Header.Set("x-api-key", "valid-key-1")
|
||||||
|
credentials := base64.StdEncoding.EncodeToString([]byte("user:valid-key-1"))
|
||||||
|
req.Header.Set("Authorization", "Basic "+credentials)
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("401 response includes WWW-Authenticate header", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, w.Code)
|
||||||
|
assert.Equal(t, `Basic realm="llama-swap"`, w.Header().Get("WWW-Authenticate"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_APIKeyAuth_Disabled(t *testing.T) {
|
||||||
|
// Config without RequiredAPIKeys - auth should be disabled
|
||||||
|
testConfig := config.AddDefaultGroupToConfig(config.Config{
|
||||||
|
HealthCheckTimeout: 15,
|
||||||
|
Models: map[string]config.ModelConfig{
|
||||||
|
"model1": getTestSimpleResponderConfig("model1"),
|
||||||
|
},
|
||||||
|
LogLevel: "error",
|
||||||
|
})
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
t.Run("requests pass without API key when not configured", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestProxyManager_PeerProxy_InferenceHandler tests the peerProxy integration
|
||||||
|
// in proxyInferenceHandler for issue #433
|
||||||
|
func TestProxyManager_PeerProxy_InferenceHandler(t *testing.T) {
|
||||||
|
t.Run("requests to peer models are proxied", func(t *testing.T) {
|
||||||
|
// Create a test server to act as the peer
|
||||||
|
peerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(`{"response":"from-peer","model":"peer-model"}`))
|
||||||
|
}))
|
||||||
|
defer peerServer.Close()
|
||||||
|
|
||||||
|
// Create config with peers but no local model for "peer-model"
|
||||||
|
configStr := fmt.Sprintf(`
|
||||||
|
logLevel: error
|
||||||
|
peers:
|
||||||
|
test-peer:
|
||||||
|
proxy: %s
|
||||||
|
models:
|
||||||
|
- peer-model
|
||||||
|
models:
|
||||||
|
local-model:
|
||||||
|
cmd: %s -port ${PORT} -silent -respond local-model
|
||||||
|
`, peerServer.URL, getSimpleResponderPath())
|
||||||
|
|
||||||
|
testConfig, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
reqBody := `{"model":"peer-model"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "from-peer")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("local models take precedence over peer models", func(t *testing.T) {
|
||||||
|
// Create a test server to act as the peer - should NOT be called
|
||||||
|
peerCalled := false
|
||||||
|
peerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
peerCalled = true
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(`{"response":"from-peer"}`))
|
||||||
|
}))
|
||||||
|
defer peerServer.Close()
|
||||||
|
|
||||||
|
// Create config where "shared-model" exists both locally and on peer
|
||||||
|
configStr := fmt.Sprintf(`
|
||||||
|
logLevel: error
|
||||||
|
peers:
|
||||||
|
test-peer:
|
||||||
|
proxy: %s
|
||||||
|
models:
|
||||||
|
- shared-model
|
||||||
|
models:
|
||||||
|
shared-model:
|
||||||
|
cmd: %s -port ${PORT} -silent -respond local-response
|
||||||
|
`, peerServer.URL, getSimpleResponderPath())
|
||||||
|
|
||||||
|
testConfig, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
reqBody := `{"model":"shared-model"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "local-response")
|
||||||
|
assert.False(t, peerCalled, "peer should not be called when local model exists")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("unknown model returns error", func(t *testing.T) {
|
||||||
|
// Create a test server to act as the peer
|
||||||
|
peerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
defer peerServer.Close()
|
||||||
|
|
||||||
|
configStr := fmt.Sprintf(`
|
||||||
|
logLevel: error
|
||||||
|
peers:
|
||||||
|
test-peer:
|
||||||
|
proxy: %s
|
||||||
|
models:
|
||||||
|
- peer-model
|
||||||
|
models:
|
||||||
|
local-model:
|
||||||
|
cmd: %s -port ${PORT} -silent -respond local-model
|
||||||
|
`, peerServer.URL, getSimpleResponderPath())
|
||||||
|
|
||||||
|
testConfig, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
reqBody := `{"model":"unknown-model"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusBadRequest, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "could not find suitable inference handler")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("peer API key is injected into request", func(t *testing.T) {
|
||||||
|
var receivedAuthHeader string
|
||||||
|
peerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
receivedAuthHeader = r.Header.Get("Authorization")
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(`{"response":"ok"}`))
|
||||||
|
}))
|
||||||
|
defer peerServer.Close()
|
||||||
|
|
||||||
|
configStr := fmt.Sprintf(`
|
||||||
|
logLevel: error
|
||||||
|
peers:
|
||||||
|
test-peer:
|
||||||
|
proxy: %s
|
||||||
|
apiKey: secret-peer-key
|
||||||
|
models:
|
||||||
|
- peer-model
|
||||||
|
models:
|
||||||
|
local-model:
|
||||||
|
cmd: %s -port ${PORT} -silent -respond local-model
|
||||||
|
`, peerServer.URL, getSimpleResponderPath())
|
||||||
|
|
||||||
|
testConfig, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
reqBody := `{"model":"peer-model"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Equal(t, "Bearer secret-peer-key", receivedAuthHeader)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("no peers configured - unknown model returns error", func(t *testing.T) {
|
||||||
|
testConfig := config.AddDefaultGroupToConfig(config.Config{
|
||||||
|
HealthCheckTimeout: 15,
|
||||||
|
Models: map[string]config.ModelConfig{
|
||||||
|
"local-model": getTestSimpleResponderConfig("local-model"),
|
||||||
|
},
|
||||||
|
LogLevel: "error",
|
||||||
|
})
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
// peerProxy exists but has no peer models configured
|
||||||
|
assert.False(t, proxy.peerProxy.HasPeerModel("unknown-model"))
|
||||||
|
|
||||||
|
reqBody := `{"model":"unknown-model"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusBadRequest, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "could not find suitable inference handler")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("peer streaming response sets X-Accel-Buffering header", func(t *testing.T) {
|
||||||
|
peerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/event-stream")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte("data: test\n\n"))
|
||||||
|
}))
|
||||||
|
defer peerServer.Close()
|
||||||
|
|
||||||
|
configStr := fmt.Sprintf(`
|
||||||
|
logLevel: error
|
||||||
|
peers:
|
||||||
|
test-peer:
|
||||||
|
proxy: %s
|
||||||
|
models:
|
||||||
|
- peer-model
|
||||||
|
models:
|
||||||
|
local-model:
|
||||||
|
cmd: %s -port ${PORT} -silent -respond local-model
|
||||||
|
`, peerServer.URL, getSimpleResponderPath())
|
||||||
|
|
||||||
|
testConfig, err := config.LoadConfigFromReader(strings.NewReader(configStr))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
proxy := New(testConfig)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
reqBody := `{"model":"peer-model"}`
|
||||||
|
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
node_modules
|
||||||
|
.vite
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
|
<link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96" />
|
||||||
|
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
||||||
|
<link rel="shortcut icon" href="/favicon.ico" />
|
||||||
|
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png" />
|
||||||
|
<link rel="manifest" href="/site.webmanifest" />
|
||||||
|
<title>llama-swap</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="app"></div>
|
||||||
|
<script type="module" src="/src/main.ts"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"name": "ui-svelte",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.0.0",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"start": "vite",
|
||||||
|
"build": "vite build --emptyOutDir",
|
||||||
|
"preview": "vite preview",
|
||||||
|
"check": "svelte-check --tsconfig ./tsconfig.json"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
||||||
|
"@tailwindcss/vite": "^4.1.8",
|
||||||
|
"@tsconfig/svelte": "^5.0.4",
|
||||||
|
"svelte": "^5.19.0",
|
||||||
|
"svelte-check": "^4.1.4",
|
||||||
|
"tailwindcss": "^4.1.8",
|
||||||
|
"typescript": "~5.8.3",
|
||||||
|
"vite": "^6.3.5"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"svelte-spa-router": "^4.0.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
After Width: | Height: | Size: 5.9 KiB |
|
After Width: | Height: | Size: 2.2 KiB |
|
After Width: | Height: | Size: 15 KiB |
|
After Width: | Height: | Size: 38 KiB |
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"name": "llama-swap",
|
||||||
|
"short_name": "llama-swap",
|
||||||
|
"icons": [
|
||||||
|
{
|
||||||
|
"src": "/web-app-manifest-192x192.png",
|
||||||
|
"sizes": "192x192",
|
||||||
|
"type": "image/png",
|
||||||
|
"purpose": "maskable"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src": "/web-app-manifest-512x512.png",
|
||||||
|
"sizes": "512x512",
|
||||||
|
"type": "image/png",
|
||||||
|
"purpose": "maskable"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"theme_color": "#ffffff",
|
||||||
|
"background_color": "#ffffff",
|
||||||
|
"display": "standalone"
|
||||||
|
}
|
||||||
|
After Width: | Height: | Size: 6.5 KiB |
|
After Width: | Height: | Size: 28 KiB |
@@ -0,0 +1,46 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from "svelte";
|
||||||
|
import Router from "svelte-spa-router";
|
||||||
|
import Header from "./components/Header.svelte";
|
||||||
|
import LogViewer from "./routes/LogViewer.svelte";
|
||||||
|
import Models from "./routes/Models.svelte";
|
||||||
|
import Activity from "./routes/Activity.svelte";
|
||||||
|
import { enableAPIEvents } from "./stores/api";
|
||||||
|
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
||||||
|
|
||||||
|
const routes = {
|
||||||
|
"/": Models,
|
||||||
|
"/logs": LogViewer,
|
||||||
|
"/activity": Activity,
|
||||||
|
"*": Models,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sync theme to document attribute
|
||||||
|
$effect(() => {
|
||||||
|
document.documentElement.setAttribute("data-theme", $isDarkMode ? "dark" : "light");
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sync title to document
|
||||||
|
$effect(() => {
|
||||||
|
const icon = $connectionState === "connecting" ? "\u{1F7E1}" : $connectionState === "connected" ? "\u{1F7E2}" : "\u{1F534}";
|
||||||
|
document.title = `${icon} ${$appTitle}`;
|
||||||
|
});
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
const cleanupScreenWidth = initScreenWidth();
|
||||||
|
enableAPIEvents(true);
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
cleanupScreenWidth();
|
||||||
|
enableAPIEvents(false);
|
||||||
|
};
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="flex flex-col h-screen">
|
||||||
|
<Header />
|
||||||
|
|
||||||
|
<main class="flex-1 overflow-auto p-4">
|
||||||
|
<Router {routes} />
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
After Width: | Height: | Size: 12 KiB |
@@ -0,0 +1 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
|
||||||
|
After Width: | Height: | Size: 4.0 KiB |
@@ -0,0 +1,24 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { connectionState } from "../stores/theme";
|
||||||
|
import { versionInfo } from "../stores/api";
|
||||||
|
|
||||||
|
let eventStatusColor = $derived.by(() => {
|
||||||
|
switch ($connectionState) {
|
||||||
|
case "connected":
|
||||||
|
return "bg-emerald-500";
|
||||||
|
case "connecting":
|
||||||
|
return "bg-amber-500";
|
||||||
|
case "disconnected":
|
||||||
|
default:
|
||||||
|
return "bg-red-500";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let tooltipText = $derived(
|
||||||
|
`Event Stream: ${$connectionState ?? "unknown"}\nAPI Version: ${$versionInfo?.version ?? "unknown"}\nCommit Hash: ${$versionInfo?.commit?.substring(0, 7) ?? "unknown"}\nBuild Date: ${$versionInfo?.build_date ?? "unknown"}`
|
||||||
|
);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="flex items-center" title={tooltipText}>
|
||||||
|
<span class="inline-block w-3 h-3 rounded-full {eventStatusColor} mr-2"></span>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { link, location } from "svelte-spa-router";
|
||||||
|
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
|
||||||
|
import ConnectionStatus from "./ConnectionStatus.svelte";
|
||||||
|
|
||||||
|
function handleTitleChange(newTitle: string): void {
|
||||||
|
const sanitized = newTitle.replace(/\n/g, "").trim().substring(0, 64) || "llama-swap";
|
||||||
|
appTitle.set(sanitized);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleKeyDown(e: KeyboardEvent): void {
|
||||||
|
if (e.key === "Enter") {
|
||||||
|
e.preventDefault();
|
||||||
|
const target = e.currentTarget as HTMLElement;
|
||||||
|
handleTitleChange(target.textContent || "(set title)");
|
||||||
|
target.blur();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleBlur(e: FocusEvent): void {
|
||||||
|
const target = e.currentTarget as HTMLElement;
|
||||||
|
handleTitleChange(target.textContent || "(set title)");
|
||||||
|
}
|
||||||
|
|
||||||
|
function isActive(path: string, currentLocation: string): boolean {
|
||||||
|
return path === "/" ? currentLocation === "/" : currentLocation.startsWith(path);
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<header
|
||||||
|
class="flex items-center justify-between bg-surface border-b border-border px-4 {$isNarrow
|
||||||
|
? 'py-1 h-[60px]'
|
||||||
|
: 'p-2 h-[75px]'}"
|
||||||
|
>
|
||||||
|
{#if $screenWidth !== "xs" && $screenWidth !== "sm"}
|
||||||
|
<h1
|
||||||
|
contenteditable="true"
|
||||||
|
class="p-0 outline-none hover:bg-gray-100 dark:hover:bg-gray-700 rounded"
|
||||||
|
onblur={handleBlur}
|
||||||
|
onkeydown={handleKeyDown}
|
||||||
|
>
|
||||||
|
{$appTitle}
|
||||||
|
</h1>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<menu class="flex items-center gap-4">
|
||||||
|
<a
|
||||||
|
href="/"
|
||||||
|
use:link
|
||||||
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1"
|
||||||
|
class:font-semibold={isActive("/", $location)}
|
||||||
|
>
|
||||||
|
Models
|
||||||
|
</a>
|
||||||
|
<a
|
||||||
|
href="/activity"
|
||||||
|
use:link
|
||||||
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1"
|
||||||
|
class:font-semibold={isActive("/activity", $location)}
|
||||||
|
>
|
||||||
|
Activity
|
||||||
|
</a>
|
||||||
|
<a
|
||||||
|
href="/logs"
|
||||||
|
use:link
|
||||||
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1"
|
||||||
|
class:font-semibold={isActive("/logs", $location)}
|
||||||
|
>
|
||||||
|
Logs
|
||||||
|
</a>
|
||||||
|
<button onclick={toggleTheme} title="Toggle theme">
|
||||||
|
{#if $isDarkMode}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path
|
||||||
|
fill-rule="evenodd"
|
||||||
|
d="M9.528 1.718a.75.75 0 0 1 .162.819A8.97 8.97 0 0 0 9 6a9 9 0 0 0 9 9 8.97 8.97 0 0 0 3.463-.69.75.75 0 0 1 .981.98 10.503 10.503 0 0 1-9.694 6.46c-5.799 0-10.5-4.7-10.5-10.5 0-4.368 2.667-8.112 6.46-9.694a.75.75 0 0 1 .818.162Z"
|
||||||
|
clip-rule="evenodd"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
{:else}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path
|
||||||
|
d="M12 2.25a.75.75 0 0 1 .75.75v2.25a.75.75 0 0 1-1.5 0V3a.75.75 0 0 1 .75-.75ZM7.5 12a4.5 4.5 0 1 1 9 0 4.5 4.5 0 0 1-9 0ZM18.894 6.166a.75.75 0 0 0-1.06-1.06l-1.591 1.59a.75.75 0 1 0 1.06 1.061l1.591-1.59ZM21.75 12a.75.75 0 0 1-.75.75h-2.25a.75.75 0 0 1 0-1.5H21a.75.75 0 0 1 .75.75ZM17.834 18.894a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 1 0-1.061 1.06l1.591 1.591ZM12 18a.75.75 0 0 1 .75.75V21a.75.75 0 0 1-1.5 0v-2.25A.75.75 0 0 1 12 18ZM7.758 17.303a.75.75 0 0 0-1.061-1.06l-1.591 1.59a.75.75 0 0 0 1.06 1.061l1.591-1.59ZM6 12a.75.75 0 0 1-.75.75H3a.75.75 0 0 1 0-1.5h2.25A.75.75 0 0 1 6 12ZM6.697 7.757a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 0 0-1.061 1.06l1.59 1.591Z"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
<ConnectionStatus />
|
||||||
|
</menu>
|
||||||
|
</header>
|
||||||
@@ -0,0 +1,132 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { persistentStore } from "../stores/persistent";
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
id: string;
|
||||||
|
title: string;
|
||||||
|
logData: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { id, title, logData }: Props = $props();
|
||||||
|
|
||||||
|
let filterRegex = $state("");
|
||||||
|
|
||||||
|
// Create persistent stores for this panel (id is intentionally captured at init time)
|
||||||
|
// svelte-ignore state_referenced_locally
|
||||||
|
const fontSizeStore = persistentStore<"xxs" | "xs" | "small" | "normal">(`logPanel-${id}-fontSize`, "normal");
|
||||||
|
// svelte-ignore state_referenced_locally
|
||||||
|
const wrapTextStore = persistentStore<boolean>(`logPanel-${id}-wrapText`, false);
|
||||||
|
// svelte-ignore state_referenced_locally
|
||||||
|
const showFilterStore = persistentStore<boolean>(`logPanel-${id}-showFilter`, false);
|
||||||
|
|
||||||
|
let textWrapClass = $derived($wrapTextStore ? "whitespace-pre-wrap" : "whitespace-pre");
|
||||||
|
|
||||||
|
function toggleFontSize(): void {
|
||||||
|
fontSizeStore.update((prev) => {
|
||||||
|
switch (prev) {
|
||||||
|
case "xxs": return "xs";
|
||||||
|
case "xs": return "small";
|
||||||
|
case "small": return "normal";
|
||||||
|
case "normal": return "xxs";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleWrapText(): void {
|
||||||
|
wrapTextStore.update((prev) => !prev);
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleFilter(): void {
|
||||||
|
if ($showFilterStore) {
|
||||||
|
showFilterStore.set(false);
|
||||||
|
filterRegex = "";
|
||||||
|
} else {
|
||||||
|
showFilterStore.set(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let fontSizeClass = $derived.by(() => {
|
||||||
|
switch ($fontSizeStore) {
|
||||||
|
case "xxs": return "text-[0.5rem]";
|
||||||
|
case "xs": return "text-[0.75rem]";
|
||||||
|
case "small": return "text-[0.875rem]";
|
||||||
|
case "normal": return "text-base";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let filteredLogs = $derived.by(() => {
|
||||||
|
if (!filterRegex) return logData;
|
||||||
|
try {
|
||||||
|
const regex = new RegExp(filterRegex, "i");
|
||||||
|
return logData.split("\n").filter((line) => regex.test(line)).join("\n");
|
||||||
|
} catch {
|
||||||
|
return logData;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let preElement: HTMLPreElement;
|
||||||
|
|
||||||
|
// Auto scroll to bottom when logs change
|
||||||
|
$effect(() => {
|
||||||
|
if (preElement && filteredLogs) {
|
||||||
|
preElement.scrollTop = preElement.scrollHeight;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="rounded-lg overflow-hidden flex flex-col bg-gray-950/5 dark:bg-white/10 h-full w-full p-1">
|
||||||
|
<div class="p-4">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<h3 class="m-0 text-lg p-0">{title}</h3>
|
||||||
|
|
||||||
|
<div class="flex gap-2 items-center">
|
||||||
|
<button class="btn border-0" onclick={toggleFontSize} title="Change font size">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||||
|
<path fill-rule="evenodd" d="M10.5 3.75a6 6 0 0 0-5.98 6.496A5.25 5.25 0 0 0 6.75 20.25H18a4.5 4.5 0 0 0 2.206-8.423 3.75 3.75 0 0 0-4.133-4.303A6.001 6.001 0 0 0 10.5 3.75Zm2.25 6a.75.75 0 0 0-1.5 0v4.94l-1.72-1.72a.75.75 0 0 0-1.06 1.06l3 3a.75.75 0 0 0 1.06 0l3-3a.75.75 0 1 0-1.06-1.06l-1.72 1.72V9.75Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<button class="btn border-0" onclick={toggleWrapText} title="Toggle text wrap">
|
||||||
|
{#if $wrapTextStore}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||||
|
<path fill-rule="evenodd" d="M3 6.75A.75.75 0 0 1 3.75 6h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 6.75ZM3 12a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 12Zm0 5.25a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75a.75.75 0 0 1-.75-.75Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{:else}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||||
|
<path fill-rule="evenodd" d="M3 6.75A.75.75 0 0 1 3.75 6h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 6.75ZM3 12a.75.75 0 0 1 .75-.75h10.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 12Zm0 5.25a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75a.75.75 0 0 1-.75-.75Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
<button class="btn border-0" onclick={toggleFilter} title="Toggle filter">
|
||||||
|
{#if $showFilterStore}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-4 h-4">
|
||||||
|
<path fill-rule="evenodd" d="M10.5 3.75a6.75 6.75 0 1 0 0 13.5 6.75 6.75 0 0 0 0-13.5ZM2.25 10.5a8.25 8.25 0 1 1 14.59 5.28l4.69 4.69a.75.75 0 1 1-1.06 1.06l-4.69-4.69A8.25 8.25 0 0 1 2.25 10.5Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{:else}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" d="m21 21-5.197-5.197m0 0A7.5 7.5 0 1 0 5.196 5.196a7.5 7.5 0 0 0 10.607 10.607Z" />
|
||||||
|
</svg>
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if $showFilterStore}
|
||||||
|
<div class="mt-2 flex gap-2 items-center w-full">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
class="w-full text-sm border border-gray-950/10 dark:border-white/5 p-2 rounded outline-none"
|
||||||
|
placeholder="Filter logs (regex)..."
|
||||||
|
bind:value={filterRegex}
|
||||||
|
/>
|
||||||
|
<button class="pl-2" onclick={() => (filterRegex = "")} aria-label="Clear filter">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-6 h-6">
|
||||||
|
<path fill-rule="evenodd" d="M12 2.25c-5.385 0-9.75 4.365-9.75 9.75s4.365 9.75 9.75 9.75 9.75-4.365 9.75-9.75S17.385 2.25 12 2.25Zm-1.72 6.97a.75.75 0 1 0-1.06 1.06L10.94 12l-1.72 1.72a.75.75 0 1 0 1.06 1.06L12 13.06l1.72 1.72a.75.75 0 1 0 1.06-1.06L13.06 12l1.72-1.72a.75.75 0 1 0-1.06-1.06L12 10.94l-1.72-1.72Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<div class="rounded-lg bg-background font-mono text-sm flex-1 overflow-hidden">
|
||||||
|
<pre bind:this={preElement} class="{textWrapClass} {fontSizeClass} h-full overflow-auto p-4">{filteredLogs}</pre>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,208 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { models, loadModel, unloadAllModels, unloadSingleModel } from "../stores/api";
|
||||||
|
import { isNarrow } from "../stores/theme";
|
||||||
|
import { persistentStore } from "../stores/persistent";
|
||||||
|
import type { Model } from "../lib/types";
|
||||||
|
|
||||||
|
let isUnloading = $state(false);
|
||||||
|
let menuOpen = $state(false);
|
||||||
|
|
||||||
|
const showUnlistedStore = persistentStore<boolean>("showUnlisted", true);
|
||||||
|
const showIdorNameStore = persistentStore<"id" | "name">("showIdorName", "id");
|
||||||
|
|
||||||
|
let filteredModels = $derived.by(() => {
|
||||||
|
const filtered = $models.filter((model) => $showUnlistedStore || !model.unlisted);
|
||||||
|
const peerModels = filtered.filter((m) => m.peerID);
|
||||||
|
|
||||||
|
// Group peer models by peerID
|
||||||
|
const grouped = peerModels.reduce(
|
||||||
|
(acc, model) => {
|
||||||
|
const peerId = model.peerID || "unknown";
|
||||||
|
if (!acc[peerId]) acc[peerId] = [];
|
||||||
|
acc[peerId].push(model);
|
||||||
|
return acc;
|
||||||
|
},
|
||||||
|
{} as Record<string, Model[]>
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
regularModels: filtered.filter((m) => !m.peerID),
|
||||||
|
peerModelsByPeerId: grouped,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
async function handleUnloadAllModels(): Promise<void> {
|
||||||
|
isUnloading = true;
|
||||||
|
try {
|
||||||
|
await unloadAllModels();
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
} finally {
|
||||||
|
setTimeout(() => (isUnloading = false), 1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleIdorName(): void {
|
||||||
|
showIdorNameStore.update((prev) => (prev === "name" ? "id" : "name"));
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleShowUnlisted(): void {
|
||||||
|
showUnlistedStore.update((prev) => !prev);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getModelDisplay(model: Model): string {
|
||||||
|
return $showIdorNameStore === "id" ? model.id : (model.name || model.id);
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="card h-full flex flex-col">
|
||||||
|
<div class="shrink-0">
|
||||||
|
<div class="flex justify-between items-baseline">
|
||||||
|
<h2 class={$isNarrow ? "text-xl" : ""}>Models</h2>
|
||||||
|
{#if $isNarrow}
|
||||||
|
<div class="relative">
|
||||||
|
<button class="btn text-base flex items-center gap-2 py-1" onclick={() => (menuOpen = !menuOpen)} aria-label="Toggle menu">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path fill-rule="evenodd" d="M3 6.75A.75.75 0 0 1 3.75 6h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 6.75ZM3 12a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 12Zm0 5.25a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75a.75.75 0 0 1-.75-.75Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
{#if menuOpen}
|
||||||
|
<div class="absolute right-0 mt-2 w-48 bg-surface border border-gray-200 dark:border-white/10 rounded shadow-lg z-20">
|
||||||
|
<button
|
||||||
|
class="w-full text-left px-4 py-2 hover:bg-secondary-hover flex items-center gap-2"
|
||||||
|
onclick={() => { toggleIdorName(); menuOpen = false; }}
|
||||||
|
>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path fill-rule="evenodd" d="M15.97 2.47a.75.75 0 0 1 1.06 0l4.5 4.5a.75.75 0 0 1 0 1.06l-4.5 4.5a.75.75 0 1 1-1.06-1.06l3.22-3.22H7.5a.75.75 0 0 1 0-1.5h11.69l-3.22-3.22a.75.75 0 0 1 0-1.06Zm-7.94 9a.75.75 0 0 1 0 1.06l-3.22 3.22H16.5a.75.75 0 0 1 0 1.5H4.81l3.22 3.22a.75.75 0 1 1-1.06 1.06l-4.5-4.5a.75.75 0 0 1 0-1.06l4.5-4.5a.75.75 0 0 1 1.06 0Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{$showIdorNameStore === "id" ? "Show Name" : "Show ID"}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
class="w-full text-left px-4 py-2 hover:bg-secondary-hover flex items-center gap-2"
|
||||||
|
onclick={() => { toggleShowUnlisted(); menuOpen = false; }}
|
||||||
|
>
|
||||||
|
{#if $showUnlistedStore}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path d="M3.53 2.47a.75.75 0 0 0-1.06 1.06l18 18a.75.75 0 1 0 1.06-1.06l-18-18ZM22.676 12.553a11.249 11.249 0 0 1-2.631 4.31l-3.099-3.099a5.25 5.25 0 0 0-6.71-6.71L7.759 4.577a11.217 11.217 0 0 1 4.242-.827c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113Z" />
|
||||||
|
<path d="M15.75 12c0 .18-.013.357-.037.53l-4.244-4.243A3.75 3.75 0 0 1 15.75 12ZM12.53 15.713l-4.243-4.244a3.75 3.75 0 0 0 4.244 4.243Z" />
|
||||||
|
<path d="M6.75 12c0-.619.107-1.213.304-1.764l-3.1-3.1a11.25 11.25 0 0 0-2.63 4.31c-.12.362-.12.752 0 1.114 1.489 4.467 5.704 7.69 10.675 7.69 1.5 0 2.933-.294 4.242-.827l-2.477-2.477A5.25 5.25 0 0 1 6.75 12Z" />
|
||||||
|
</svg>
|
||||||
|
{:else}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path d="M12 15a3 3 0 1 0 0-6 3 3 0 0 0 0 6Z" />
|
||||||
|
<path fill-rule="evenodd" d="M1.323 11.447C2.811 6.976 7.028 3.75 12.001 3.75c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113-1.487 4.471-5.705 7.697-10.677 7.697-4.97 0-9.186-3.223-10.675-7.69a1.762 1.762 0 0 1 0-1.113ZM17.25 12a5.25 5.25 0 1 1-10.5 0 5.25 5.25 0 0 1 10.5 0Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{/if}
|
||||||
|
{$showUnlistedStore ? "Hide Unlisted" : "Show Unlisted"}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
class="w-full text-left px-4 py-2 hover:bg-secondary-hover flex items-center gap-2"
|
||||||
|
onclick={() => { handleUnloadAllModels(); menuOpen = false; }}
|
||||||
|
disabled={isUnloading}
|
||||||
|
>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-6 h-6">
|
||||||
|
<path fill-rule="evenodd" d="M12 2.25c-5.385 0-9.75 4.365-9.75 9.75s4.365 9.75 9.75 9.75 9.75-4.365 9.75-9.75S17.385 2.25 12 2.25Zm.53 5.47a.75.75 0 0 0-1.06 0l-3 3a.75.75 0 1 0 1.06 1.06l1.72-1.72v5.69a.75.75 0 0 0 1.5 0v-5.69l1.72 1.72a.75.75 0 1 0 1.06-1.06l-3-3Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{isUnloading ? "Unloading..." : "Unload All"}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{#if !$isNarrow}
|
||||||
|
<div class="flex justify-between">
|
||||||
|
<div class="flex gap-2">
|
||||||
|
<button class="btn text-base flex items-center gap-2" onclick={toggleIdorName} style="line-height: 1.2">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path fill-rule="evenodd" d="M15.97 2.47a.75.75 0 0 1 1.06 0l4.5 4.5a.75.75 0 0 1 0 1.06l-4.5 4.5a.75.75 0 1 1-1.06-1.06l3.22-3.22H7.5a.75.75 0 0 1 0-1.5h11.69l-3.22-3.22a.75.75 0 0 1 0-1.06Zm-7.94 9a.75.75 0 0 1 0 1.06l-3.22 3.22H16.5a.75.75 0 0 1 0 1.5H4.81l3.22 3.22a.75.75 0 1 1-1.06 1.06l-4.5-4.5a.75.75 0 0 1 0-1.06l4.5-4.5a.75.75 0 0 1 1.06 0Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{$showIdorNameStore === "id" ? "ID" : "Name"}
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<button class="btn text-base flex items-center gap-2" onclick={toggleShowUnlisted} style="line-height: 1.2">
|
||||||
|
{#if $showUnlistedStore}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path d="M12 15a3 3 0 1 0 0-6 3 3 0 0 0 0 6Z" />
|
||||||
|
<path fill-rule="evenodd" d="M1.323 11.447C2.811 6.976 7.028 3.75 12.001 3.75c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113-1.487 4.471-5.705 7.697-10.677 7.697-4.97 0-9.186-3.223-10.675-7.69a1.762 1.762 0 0 1 0-1.113ZM17.25 12a5.25 5.25 0 1 1-10.5 0 5.25 5.25 0 0 1 10.5 0Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{:else}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path d="M3.53 2.47a.75.75 0 0 0-1.06 1.06l18 18a.75.75 0 1 0 1.06-1.06l-18-18ZM22.676 12.553a11.249 11.249 0 0 1-2.631 4.31l-3.099-3.099a5.25 5.25 0 0 0-6.71-6.71L7.759 4.577a11.217 11.217 0 0 1 4.242-.827c4.97 0 9.185 3.223 10.675 7.69.12.362.12.752 0 1.113Z" />
|
||||||
|
<path d="M15.75 12c0 .18-.013.357-.037.53l-4.244-4.243A3.75 3.75 0 0 1 15.75 12ZM12.53 15.713l-4.243-4.244a3.75 3.75 0 0 0 4.244 4.243Z" />
|
||||||
|
<path d="M6.75 12c0-.619.107-1.213.304-1.764l-3.1-3.1a11.25 11.25 0 0 0-2.63 4.31c-.12.362-.12.752 0 1.114 1.489 4.467 5.704 7.69 10.675 7.69 1.5 0 2.933-.294 4.242-.827l-2.477-2.477A5.25 5.25 0 0 1 6.75 12Z" />
|
||||||
|
</svg>
|
||||||
|
{/if}
|
||||||
|
unlisted
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<button class="btn text-base flex items-center gap-2" onclick={handleUnloadAllModels} disabled={isUnloading}>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-6 h-6">
|
||||||
|
<path fill-rule="evenodd" d="M12 2.25c-5.385 0-9.75 4.365-9.75 9.75s4.365 9.75 9.75 9.75 9.75-4.365 9.75-9.75S17.385 2.25 12 2.25Zm.53 5.47a.75.75 0 0 0-1.06 0l-3 3a.75.75 0 1 0 1.06 1.06l1.72-1.72v5.69a.75.75 0 0 0 1.5 0v-5.69l1.72 1.72a.75.75 0 1 0 1.06-1.06l-3-3Z" clip-rule="evenodd" />
|
||||||
|
</svg>
|
||||||
|
{isUnloading ? "Unloading..." : "Unload All"}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex-1 overflow-y-auto">
|
||||||
|
<table class="w-full">
|
||||||
|
<thead class="sticky top-0 bg-card z-10">
|
||||||
|
<tr class="text-left border-b border-gray-200 dark:border-white/10 bg-surface">
|
||||||
|
<th>{$showIdorNameStore === "id" ? "Model ID" : "Name"}</th>
|
||||||
|
<th></th>
|
||||||
|
<th>State</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{#each filteredModels.regularModels as model (model.id)}
|
||||||
|
<tr class="border-b hover:bg-secondary-hover border-gray-200">
|
||||||
|
<td class={model.unlisted ? "text-txtsecondary" : ""}>
|
||||||
|
<a href="/upstream/{model.id}/" class="font-semibold" target="_blank">
|
||||||
|
{getModelDisplay(model)}
|
||||||
|
</a>
|
||||||
|
{#if model.description}
|
||||||
|
<p class={model.unlisted ? "text-opacity-70" : ""}><em>{model.description}</em></p>
|
||||||
|
{/if}
|
||||||
|
</td>
|
||||||
|
<td class="w-12">
|
||||||
|
{#if model.state === "stopped"}
|
||||||
|
<button class="btn btn--sm" onclick={() => loadModel(model.id)}>Load</button>
|
||||||
|
{:else}
|
||||||
|
<button class="btn btn--sm" onclick={() => unloadSingleModel(model.id)} disabled={model.state !== "ready"}>Unload</button>
|
||||||
|
{/if}
|
||||||
|
</td>
|
||||||
|
<td class="w-20">
|
||||||
|
<span class="w-16 text-center status status--{model.state}">{model.state}</span>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
{#if Object.keys(filteredModels.peerModelsByPeerId).length > 0}
|
||||||
|
<h3 class="mt-8 mb-2">Peer Models</h3>
|
||||||
|
{#each Object.entries(filteredModels.peerModelsByPeerId).sort(([a], [b]) => a.localeCompare(b)) as [peerId, peerModels] (peerId)}
|
||||||
|
<div class="mb-4">
|
||||||
|
<table class="w-full">
|
||||||
|
<thead class="sticky top-0 bg-card z-10">
|
||||||
|
<tr class="text-left border-b border-gray-200 dark:border-white/10 bg-surface">
|
||||||
|
<th class="font-semibold">{peerId}</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{#each peerModels as model (model.id)}
|
||||||
|
<tr class="border-b hover:bg-secondary-hover border-gray-200">
|
||||||
|
<td class="pl-8 {model.unlisted ? 'text-txtsecondary' : ''}">
|
||||||
|
<span>{model.id}</span>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{/each}
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import type { Snippet } from "svelte";
|
||||||
|
import { onMount } from "svelte";
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
direction: "horizontal" | "vertical";
|
||||||
|
storageKey: string;
|
||||||
|
leftPanel: Snippet;
|
||||||
|
rightPanel: Snippet;
|
||||||
|
defaultSize?: number;
|
||||||
|
minSize?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { direction, storageKey, leftPanel, rightPanel, defaultSize = 50, minSize = 5 }: Props = $props();
|
||||||
|
|
||||||
|
let containerRef: HTMLDivElement;
|
||||||
|
let isDragging = $state(false);
|
||||||
|
// svelte-ignore state_referenced_locally
|
||||||
|
let leftSize = $state(defaultSize);
|
||||||
|
|
||||||
|
// Load saved size from localStorage
|
||||||
|
onMount(() => {
|
||||||
|
const saved = localStorage.getItem(`panel-size-${storageKey}`);
|
||||||
|
if (saved) {
|
||||||
|
const parsed = parseFloat(saved);
|
||||||
|
if (!isNaN(parsed) && parsed >= minSize && parsed <= 100 - minSize) {
|
||||||
|
leftSize = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function saveSize(): void {
|
||||||
|
localStorage.setItem(`panel-size-${storageKey}`, String(leftSize));
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleMouseDown(e: MouseEvent): void {
|
||||||
|
e.preventDefault();
|
||||||
|
isDragging = true;
|
||||||
|
document.addEventListener("mousemove", handleMouseMove);
|
||||||
|
document.addEventListener("mouseup", handleMouseUp);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleTouchStart(_e: TouchEvent): void {
|
||||||
|
isDragging = true;
|
||||||
|
document.addEventListener("touchmove", handleTouchMove);
|
||||||
|
document.addEventListener("touchend", handleTouchEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleMouseMove(e: MouseEvent): void {
|
||||||
|
if (!isDragging || !containerRef) return;
|
||||||
|
updateSize(e.clientX, e.clientY);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleTouchMove(e: TouchEvent): void {
|
||||||
|
if (!isDragging || !containerRef || e.touches.length === 0) return;
|
||||||
|
updateSize(e.touches[0].clientX, e.touches[0].clientY);
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateSize(clientX: number, clientY: number): void {
|
||||||
|
const rect = containerRef.getBoundingClientRect();
|
||||||
|
|
||||||
|
let newSize: number;
|
||||||
|
if (direction === "horizontal") {
|
||||||
|
newSize = ((clientX - rect.left) / rect.width) * 100;
|
||||||
|
} else {
|
||||||
|
newSize = ((clientY - rect.top) / rect.height) * 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clamp size
|
||||||
|
newSize = Math.max(minSize, Math.min(100 - minSize, newSize));
|
||||||
|
leftSize = newSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleMouseUp(): void {
|
||||||
|
isDragging = false;
|
||||||
|
saveSize();
|
||||||
|
document.removeEventListener("mousemove", handleMouseMove);
|
||||||
|
document.removeEventListener("mouseup", handleMouseUp);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleTouchEnd(): void {
|
||||||
|
isDragging = false;
|
||||||
|
saveSize();
|
||||||
|
document.removeEventListener("touchmove", handleTouchMove);
|
||||||
|
document.removeEventListener("touchend", handleTouchEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleKeyDown(e: KeyboardEvent): void {
|
||||||
|
const step = 2; // 2% increment for keyboard navigation
|
||||||
|
const key = e.key;
|
||||||
|
|
||||||
|
if (direction === "horizontal" && (key === "ArrowLeft" || key === "ArrowRight")) {
|
||||||
|
e.preventDefault();
|
||||||
|
const delta = key === "ArrowLeft" ? -step : step;
|
||||||
|
const newSize = Math.max(minSize, Math.min(100 - minSize, leftSize + delta));
|
||||||
|
leftSize = newSize;
|
||||||
|
saveSize();
|
||||||
|
} else if (direction === "vertical" && (key === "ArrowUp" || key === "ArrowDown")) {
|
||||||
|
e.preventDefault();
|
||||||
|
const delta = key === "ArrowUp" ? -step : step;
|
||||||
|
const newSize = Math.max(minSize, Math.min(100 - minSize, leftSize + delta));
|
||||||
|
leftSize = newSize;
|
||||||
|
saveSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let containerClass = $derived(direction === "horizontal" ? "flex-row" : "flex-col");
|
||||||
|
|
||||||
|
let handleClass = $derived(
|
||||||
|
direction === "horizontal"
|
||||||
|
? "w-2 h-full cursor-col-resize"
|
||||||
|
: "w-full h-2 cursor-row-resize"
|
||||||
|
);
|
||||||
|
|
||||||
|
let leftStyle = $derived(
|
||||||
|
direction === "horizontal"
|
||||||
|
? `width: ${leftSize}%; min-width: ${minSize}%`
|
||||||
|
: `height: ${leftSize}%; min-height: ${minSize}%`
|
||||||
|
);
|
||||||
|
|
||||||
|
let rightStyle = $derived(
|
||||||
|
direction === "horizontal"
|
||||||
|
? `width: ${100 - leftSize}%; min-width: ${minSize}%`
|
||||||
|
: `height: ${100 - leftSize}%; min-height: ${minSize}%`
|
||||||
|
);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div bind:this={containerRef} class="flex {containerClass} h-full w-full gap-2">
|
||||||
|
<div style={leftStyle} class="overflow-hidden">
|
||||||
|
{@render leftPanel()}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- svelte-ignore a11y_no_noninteractive_tabindex -->
|
||||||
|
<!-- svelte-ignore a11y_no_noninteractive_element_interactions -->
|
||||||
|
<div
|
||||||
|
role="separator"
|
||||||
|
tabindex="0"
|
||||||
|
class="{handleClass} bg-primary hover:bg-success transition-colors rounded flex-shrink-0"
|
||||||
|
onmousedown={handleMouseDown}
|
||||||
|
ontouchstart={handleTouchStart}
|
||||||
|
onkeydown={handleKeyDown}
|
||||||
|
aria-label="Resize panels"
|
||||||
|
aria-orientation={direction}
|
||||||
|
aria-valuenow={Math.round(leftSize)}
|
||||||
|
aria-valuemin={minSize}
|
||||||
|
aria-valuemax={100 - minSize}
|
||||||
|
></div>
|
||||||
|
|
||||||
|
<div style={rightStyle} class="overflow-hidden">
|
||||||
|
{@render rightPanel()}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,147 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { metrics } from "../stores/api";
|
||||||
|
import TokenHistogram from "./TokenHistogram.svelte";
|
||||||
|
|
||||||
|
interface HistogramData {
|
||||||
|
bins: number[];
|
||||||
|
min: number;
|
||||||
|
max: number;
|
||||||
|
binSize: number;
|
||||||
|
p99: number;
|
||||||
|
p95: number;
|
||||||
|
p50: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
let stats = $derived.by(() => {
|
||||||
|
const totalRequests = $metrics.length;
|
||||||
|
if (totalRequests === 0) {
|
||||||
|
return { totalRequests: 0, totalInputTokens: 0, totalOutputTokens: 0, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
|
||||||
|
const totalOutputTokens = $metrics.reduce((sum, m) => sum + m.output_tokens, 0);
|
||||||
|
|
||||||
|
// Calculate token statistics using output_tokens and duration_ms
|
||||||
|
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
|
||||||
|
if (validMetrics.length === 0) {
|
||||||
|
return { totalRequests, totalInputTokens, totalOutputTokens, tokenStats: { p99: "0", p95: "0", p50: "0" }, histogramData: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate tokens/second for each valid metric
|
||||||
|
const tokensPerSecond = validMetrics.map((m) => m.output_tokens / (m.duration_ms / 1000));
|
||||||
|
|
||||||
|
// Sort for percentile calculation
|
||||||
|
const sortedTokensPerSecond = [...tokensPerSecond].sort((a, b) => a - b);
|
||||||
|
|
||||||
|
const p99 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.99)];
|
||||||
|
const p95 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.95)];
|
||||||
|
const p50 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.5)];
|
||||||
|
|
||||||
|
// Create histogram data
|
||||||
|
const min = Math.min(...tokensPerSecond);
|
||||||
|
const max = Math.max(...tokensPerSecond);
|
||||||
|
const binCount = Math.min(30, Math.max(10, Math.floor(tokensPerSecond.length / 5)));
|
||||||
|
const binSize = (max - min) / binCount;
|
||||||
|
|
||||||
|
const bins = Array(binCount).fill(0);
|
||||||
|
tokensPerSecond.forEach((value) => {
|
||||||
|
const binIndex = Math.min(Math.floor((value - min) / binSize), binCount - 1);
|
||||||
|
bins[binIndex]++;
|
||||||
|
});
|
||||||
|
|
||||||
|
const histogramData: HistogramData = {
|
||||||
|
bins,
|
||||||
|
min,
|
||||||
|
max,
|
||||||
|
binSize,
|
||||||
|
p99,
|
||||||
|
p95,
|
||||||
|
p50,
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalRequests,
|
||||||
|
totalInputTokens,
|
||||||
|
totalOutputTokens,
|
||||||
|
tokenStats: {
|
||||||
|
p99: p99.toFixed(2),
|
||||||
|
p95: p95.toFixed(2),
|
||||||
|
p50: p50.toFixed(2),
|
||||||
|
},
|
||||||
|
histogramData,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
const nf = new Intl.NumberFormat();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<div class="rounded-lg overflow-hidden border border-card-border-inner">
|
||||||
|
<table class="min-w-full divide-y divide-card-border-inner">
|
||||||
|
<thead class="bg-secondary">
|
||||||
|
<tr>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain">Requests</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
|
||||||
|
Processed
|
||||||
|
</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
|
||||||
|
Generated
|
||||||
|
</th>
|
||||||
|
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
|
||||||
|
Token Stats (tokens/sec)
|
||||||
|
</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
|
||||||
|
<tbody class="bg-surface divide-y divide-card-border-inner">
|
||||||
|
<tr class="hover:bg-secondary">
|
||||||
|
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">{stats.totalRequests}</td>
|
||||||
|
|
||||||
|
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<span class="text-sm font-medium">{nf.format(stats.totalInputTokens)}</span>
|
||||||
|
<span class="text-xs text-gray-500 dark:text-gray-400">tokens</span>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<span class="text-sm font-medium">{nf.format(stats.totalOutputTokens)}</span>
|
||||||
|
<span class="text-xs text-gray-500 dark:text-gray-400">tokens</span>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
|
||||||
|
<td class="px-4 py-4 border-l border-gray-200 dark:border-white/10">
|
||||||
|
<div class="space-y-3">
|
||||||
|
<div class="grid grid-cols-3 gap-2 items-center">
|
||||||
|
<div class="text-center">
|
||||||
|
<div class="text-xs text-gray-500 dark:text-gray-400">P50</div>
|
||||||
|
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
|
||||||
|
{stats.tokenStats.p50}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="text-center">
|
||||||
|
<div class="text-xs text-gray-500 dark:text-gray-400">P95</div>
|
||||||
|
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
|
||||||
|
{stats.tokenStats.p95}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="text-center">
|
||||||
|
<div class="text-xs text-gray-500 dark:text-gray-400">P99</div>
|
||||||
|
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
|
||||||
|
{stats.tokenStats.p99}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{#if stats.histogramData}
|
||||||
|
<TokenHistogram data={stats.histogramData} />
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
interface HistogramData {
|
||||||
|
bins: number[];
|
||||||
|
min: number;
|
||||||
|
max: number;
|
||||||
|
binSize: number;
|
||||||
|
p99: number;
|
||||||
|
p95: number;
|
||||||
|
p50: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
data: HistogramData;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { data }: Props = $props();
|
||||||
|
|
||||||
|
const height = 120;
|
||||||
|
const padding = { top: 10, right: 15, bottom: 25, left: 45 };
|
||||||
|
const viewBoxWidth = 600;
|
||||||
|
const chartWidth = viewBoxWidth - padding.left - padding.right;
|
||||||
|
const chartHeight = height - padding.top - padding.bottom;
|
||||||
|
|
||||||
|
let maxCount = $derived(Math.max(...data.bins));
|
||||||
|
let barWidth = $derived(chartWidth / data.bins.length);
|
||||||
|
let range = $derived(data.max - data.min);
|
||||||
|
|
||||||
|
function getXPosition(value: number): number {
|
||||||
|
return padding.left + ((value - data.min) / range) * chartWidth;
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="mt-2 w-full">
|
||||||
|
<svg viewBox="0 0 {viewBoxWidth} {height}" class="w-full h-auto" preserveAspectRatio="xMidYMid meet">
|
||||||
|
<!-- Y-axis -->
|
||||||
|
<line
|
||||||
|
x1={padding.left}
|
||||||
|
y1={padding.top}
|
||||||
|
x2={padding.left}
|
||||||
|
y2={height - padding.bottom}
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="1"
|
||||||
|
opacity="0.3"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- X-axis -->
|
||||||
|
<line
|
||||||
|
x1={padding.left}
|
||||||
|
y1={height - padding.bottom}
|
||||||
|
x2={viewBoxWidth - padding.right}
|
||||||
|
y2={height - padding.bottom}
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="1"
|
||||||
|
opacity="0.3"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- Histogram bars -->
|
||||||
|
{#each data.bins as count, i}
|
||||||
|
{@const barHeight = maxCount > 0 ? (count / maxCount) * chartHeight : 0}
|
||||||
|
{@const x = padding.left + i * barWidth}
|
||||||
|
{@const y = height - padding.bottom - barHeight}
|
||||||
|
{@const binStart = data.min + i * data.binSize}
|
||||||
|
{@const binEnd = binStart + data.binSize}
|
||||||
|
<g>
|
||||||
|
<rect
|
||||||
|
{x}
|
||||||
|
{y}
|
||||||
|
width={Math.max(barWidth - 1, 1)}
|
||||||
|
height={barHeight}
|
||||||
|
fill="currentColor"
|
||||||
|
opacity="0.6"
|
||||||
|
class="text-blue-500 dark:text-blue-400 hover:opacity-90 transition-opacity cursor-pointer"
|
||||||
|
/>
|
||||||
|
<title>{`${binStart.toFixed(1)} - ${binEnd.toFixed(1)} tokens/sec\nCount: ${count}`}</title>
|
||||||
|
</g>
|
||||||
|
{/each}
|
||||||
|
|
||||||
|
<!-- Percentile lines -->
|
||||||
|
<line
|
||||||
|
x1={getXPosition(data.p50)}
|
||||||
|
y1={padding.top}
|
||||||
|
x2={getXPosition(data.p50)}
|
||||||
|
y2={height - padding.bottom}
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="2"
|
||||||
|
stroke-dasharray="4 2"
|
||||||
|
opacity="0.7"
|
||||||
|
class="text-gray-600 dark:text-gray-400"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<line
|
||||||
|
x1={getXPosition(data.p95)}
|
||||||
|
y1={padding.top}
|
||||||
|
x2={getXPosition(data.p95)}
|
||||||
|
y2={height - padding.bottom}
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="2"
|
||||||
|
stroke-dasharray="4 2"
|
||||||
|
opacity="0.7"
|
||||||
|
class="text-orange-500 dark:text-orange-400"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<line
|
||||||
|
x1={getXPosition(data.p99)}
|
||||||
|
y1={padding.top}
|
||||||
|
x2={getXPosition(data.p99)}
|
||||||
|
y2={height - padding.bottom}
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="2"
|
||||||
|
stroke-dasharray="4 2"
|
||||||
|
opacity="0.7"
|
||||||
|
class="text-green-500 dark:text-green-400"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- X-axis labels -->
|
||||||
|
<text x={padding.left} y={height - 5} font-size="10" fill="currentColor" opacity="0.6" text-anchor="start">
|
||||||
|
{data.min.toFixed(1)}
|
||||||
|
</text>
|
||||||
|
|
||||||
|
<text x={viewBoxWidth - padding.right} y={height - 5} font-size="10" fill="currentColor" opacity="0.6" text-anchor="end">
|
||||||
|
{data.max.toFixed(1)}
|
||||||
|
</text>
|
||||||
|
|
||||||
|
<!-- X-axis label -->
|
||||||
|
<text x={padding.left + chartWidth / 2} y={height - 2} font-size="10" fill="currentColor" opacity="0.6" text-anchor="middle">
|
||||||
|
Tokens/Second Distribution
|
||||||
|
</text>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
interface Props {
|
||||||
|
content: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { content }: Props = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="relative group inline-block">
|
||||||
|
<span class="cursor-help">ⓘ</span>
|
||||||
|
<div
|
||||||
|
class="absolute top-full left-1/2 transform -translate-x-1/2 mt-2
|
||||||
|
px-3 py-2 bg-gray-900 text-white text-sm rounded-md
|
||||||
|
opacity-0 group-hover:opacity-100 transition-opacity
|
||||||
|
duration-200 pointer-events-none whitespace-nowrap z-50 normal-case"
|
||||||
|
>
|
||||||
|
{content}
|
||||||
|
<div class="absolute bottom-full left-1/2 transform -translate-x-1/2 border-4 border-transparent border-b-gray-900"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,176 @@
|
|||||||
|
@import "tailwindcss";
|
||||||
|
@custom-variant dark (&:where([data-theme=dark], [data-theme=dark] *));
|
||||||
|
|
||||||
|
@theme {
|
||||||
|
--color-background: rgba(252, 252, 249, 1);
|
||||||
|
--color-surface: rgba(255, 255, 253, 1);
|
||||||
|
|
||||||
|
/* text colors */
|
||||||
|
--color-txtmain: rgba(19, 52, 59, 1);
|
||||||
|
--color-txtsecondary: rgba(98, 108, 113, 1);
|
||||||
|
--color-navlink-active: rgba(245, 245, 245, 1);
|
||||||
|
|
||||||
|
--color-primary: rgba(50, 184, 198, 1);
|
||||||
|
|
||||||
|
--color-primary-hover: rgba(29, 116, 128, 1);
|
||||||
|
--color-primary-active: rgba(26, 104, 115, 1);
|
||||||
|
--color-secondary: rgba(94, 82, 64, 0.12);
|
||||||
|
--color-secondary-hover: rgba(94, 82, 64, 0.2);
|
||||||
|
--color-secondary-active: rgba(94, 82, 64, 0.25);
|
||||||
|
--color-border: rgba(94, 82, 64, 0.3);
|
||||||
|
--color-btn-primary-text: rgba(252, 252, 249, 1);
|
||||||
|
--color-card-border: rgba(94, 82, 64, 0.12);
|
||||||
|
--color-card-border-inner: rgba(94, 82, 64, 0.12);
|
||||||
|
--color-error: rgba(192, 21, 47, 1);
|
||||||
|
--color-success: rgba(33, 128, 141, 1);
|
||||||
|
--color-warning: rgb(244, 155, 0);
|
||||||
|
--color-info: rgba(98, 108, 113, 1);
|
||||||
|
--color-focus-ring: rgba(33, 128, 141, 0.4);
|
||||||
|
--color-select-caret: rgba(19, 52, 59, 0.8);
|
||||||
|
--color-btn-border: rgba(94, 82, 64, 0.7);
|
||||||
|
}
|
||||||
|
|
||||||
|
@layer theme {
|
||||||
|
/* over ride theme for dark mode */
|
||||||
|
[data-theme="dark"] {
|
||||||
|
--color-background: rgba(31, 33, 33, 1);
|
||||||
|
--color-surface: rgba(38, 40, 40, 1);
|
||||||
|
/* text colors */
|
||||||
|
--color-txtmain: rgba(245, 245, 245, 1);
|
||||||
|
--color-txtsecondary: rgba(167, 169, 169, 0.7);
|
||||||
|
|
||||||
|
--color-navlink-active: rgba(245, 245, 245, 1);
|
||||||
|
|
||||||
|
--color-primary: rgba(33, 128, 141, 1);
|
||||||
|
--color-primary-hover: rgba(45, 166, 178, 1);
|
||||||
|
--color-primary-active: rgba(41, 150, 161, 1);
|
||||||
|
--color-secondary: rgba(119, 124, 124, 0.15);
|
||||||
|
--color-secondary-hover: rgba(119, 124, 124, 0.25);
|
||||||
|
--color-secondary-active: rgba(119, 124, 124, 0.3);
|
||||||
|
--color-border: rgba(119, 124, 124, 0.3);
|
||||||
|
--color-error: rgba(255, 84, 89, 1);
|
||||||
|
--color-success: rgba(50, 184, 198, 1);
|
||||||
|
--color-warning: rgb(244, 155, 0);
|
||||||
|
--color-info: rgba(167, 169, 169, 1);
|
||||||
|
--color-focus-ring: rgba(50, 184, 198, 0.4);
|
||||||
|
--color-btn-primary-text: rgba(19, 52, 59, 1);
|
||||||
|
--color-card-border: rgba(119, 124, 124, 0.2);
|
||||||
|
--color-card-border-inner: rgba(119, 124, 124, 0.15);
|
||||||
|
--shadow-inset-sm: inset 0 1px 0 rgba(255, 255, 255, 0.1), inset 0 -1px 0 rgba(0, 0, 0, 0.15);
|
||||||
|
--button-border-secondary: rgba(119, 124, 124, 0.2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@layer base {
|
||||||
|
body {
|
||||||
|
/* example of how colors using theme colors*/
|
||||||
|
@apply bg-background text-txtmain;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
@apply text-4xl text-txtmain font-bold pb-4;
|
||||||
|
}
|
||||||
|
h2 {
|
||||||
|
@apply text-3xl text-txtmain font-bold pb-4;
|
||||||
|
}
|
||||||
|
h3 {
|
||||||
|
@apply text-2xl text-txtmain font-bold pb-4;
|
||||||
|
}
|
||||||
|
h4 {
|
||||||
|
@apply text-xl text-txtmain font-bold pb-4;
|
||||||
|
}
|
||||||
|
h5 {
|
||||||
|
@apply text-lg text-txtmain font-bold pb-4;
|
||||||
|
}
|
||||||
|
h6 {
|
||||||
|
@apply text-base text-txtmain font-bold pb-4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* define CSS classes here for specific types of components */
|
||||||
|
@layer components {
|
||||||
|
.container {
|
||||||
|
@apply px-4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tables */
|
||||||
|
table th {
|
||||||
|
@apply p-2 font-semibold;
|
||||||
|
}
|
||||||
|
table td {
|
||||||
|
@apply p-2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Navigation Header */
|
||||||
|
|
||||||
|
.navlink {
|
||||||
|
@apply text-txtsecondary hover:bg-secondary hover:text-txtmain rounded-lg p-2;
|
||||||
|
}
|
||||||
|
.navlink.active {
|
||||||
|
@apply bg-primary text-navlink-active;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Card component */
|
||||||
|
.card {
|
||||||
|
@apply bg-surface rounded-lg border border-card-border shadow-sm overflow-hidden p-4;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card:hover {
|
||||||
|
@apply shadow-md;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card__body {
|
||||||
|
@apply p-4;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card__header,
|
||||||
|
.card__footer {
|
||||||
|
@apply p-4 border-b border-card-border-inner;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Status Badges */
|
||||||
|
.status {
|
||||||
|
@apply inline-block px-2 py-1 text-xs font-medium rounded-lg;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status--ready {
|
||||||
|
@apply bg-success/10 text-success;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status--starting,
|
||||||
|
.status--stopping {
|
||||||
|
@apply bg-warning/10 text-warning;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status--stopped {
|
||||||
|
@apply bg-error/10 text-error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
.btn {
|
||||||
|
@apply bg-surface py-2 px-4 text-sm rounded-md border transition-colors duration-200 border-btn-border;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn:hover {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn--sm {
|
||||||
|
@apply px-2 py-0.5 text-xs;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn:disabled {
|
||||||
|
@apply opacity-50 cursor-not-allowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@layer utilities {
|
||||||
|
.ml-2 {
|
||||||
|
margin-left: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.my-8 {
|
||||||
|
margin-top: 2rem;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
export type ConnectionState = "connected" | "connecting" | "disconnected";
|
||||||
|
|
||||||
|
export type ModelStatus = "ready" | "starting" | "stopping" | "stopped" | "shutdown" | "unknown";
|
||||||
|
|
||||||
|
export interface Model {
|
||||||
|
id: string;
|
||||||
|
state: ModelStatus;
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
unlisted: boolean;
|
||||||
|
peerID: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Metrics {
|
||||||
|
id: number;
|
||||||
|
timestamp: string;
|
||||||
|
model: string;
|
||||||
|
cache_tokens: number;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
prompt_per_second: number;
|
||||||
|
tokens_per_second: number;
|
||||||
|
duration_ms: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LogData {
|
||||||
|
source: "upstream" | "proxy";
|
||||||
|
data: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface APIEventEnvelope {
|
||||||
|
type: "modelStatus" | "logData" | "metrics";
|
||||||
|
data: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface VersionInfo {
|
||||||
|
build_date: string;
|
||||||
|
commit: string;
|
||||||
|
version: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ScreenWidth = "xs" | "sm" | "md" | "lg" | "xl" | "2xl";
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
import "./index.css";
|
||||||
|
import App from "./App.svelte";
|
||||||
|
import { mount } from "svelte";
|
||||||
|
|
||||||
|
const app = mount(App, {
|
||||||
|
target: document.getElementById("app")!,
|
||||||
|
});
|
||||||
|
|
||||||
|
export default app;
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { metrics } from "../stores/api";
|
||||||
|
import Tooltip from "../components/Tooltip.svelte";
|
||||||
|
|
||||||
|
function formatSpeed(speed: number): string {
|
||||||
|
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDuration(ms: number): string {
|
||||||
|
return (ms / 1000).toFixed(2) + "s";
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatRelativeTime(timestamp: string): string {
|
||||||
|
const now = new Date();
|
||||||
|
const date = new Date(timestamp);
|
||||||
|
const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000);
|
||||||
|
|
||||||
|
// Handle future dates by returning "just now"
|
||||||
|
if (diffInSeconds < 5) {
|
||||||
|
return "now";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (diffInSeconds < 60) {
|
||||||
|
return `${diffInSeconds}s ago`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const diffInMinutes = Math.floor(diffInSeconds / 60);
|
||||||
|
if (diffInMinutes < 60) {
|
||||||
|
return `${diffInMinutes}m ago`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const diffInHours = Math.floor(diffInMinutes / 60);
|
||||||
|
if (diffInHours < 24) {
|
||||||
|
return `${diffInHours}h ago`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return "a while ago";
|
||||||
|
}
|
||||||
|
|
||||||
|
let sortedMetrics = $derived([...$metrics].sort((a, b) => b.id - a.id));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="p-2">
|
||||||
|
<h1 class="text-2xl font-bold">Activity</h1>
|
||||||
|
|
||||||
|
{#if $metrics.length === 0}
|
||||||
|
<div class="text-center py-8">
|
||||||
|
<p class="text-gray-600">No metrics data available</p>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div class="card overflow-auto">
|
||||||
|
<table class="min-w-full divide-y">
|
||||||
|
<thead class="border-gray-200 dark:border-white/10">
|
||||||
|
<tr class="text-left text-xs uppercase tracking-wider">
|
||||||
|
<th class="px-6 py-3">ID</th>
|
||||||
|
<th class="px-6 py-3">Time</th>
|
||||||
|
<th class="px-6 py-3">Model</th>
|
||||||
|
<th class="px-6 py-3">
|
||||||
|
Cached <Tooltip content="prompt tokens from cache" />
|
||||||
|
</th>
|
||||||
|
<th class="px-6 py-3">
|
||||||
|
Prompt <Tooltip content="new prompt tokens processed" />
|
||||||
|
</th>
|
||||||
|
<th class="px-6 py-3">Generated</th>
|
||||||
|
<th class="px-6 py-3">Prompt Processing</th>
|
||||||
|
<th class="px-6 py-3">Generation Speed</th>
|
||||||
|
<th class="px-6 py-3">Duration</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="divide-y">
|
||||||
|
{#each sortedMetrics as metric (metric.id)}
|
||||||
|
<tr class="whitespace-nowrap text-sm border-gray-200 dark:border-white/10">
|
||||||
|
<td class="px-4 py-4">{metric.id + 1}</td>
|
||||||
|
<td class="px-6 py-4">{formatRelativeTime(metric.timestamp)}</td>
|
||||||
|
<td class="px-6 py-4">{metric.model}</td>
|
||||||
|
<td class="px-6 py-4">{metric.cache_tokens > 0 ? metric.cache_tokens.toLocaleString() : "-"}</td>
|
||||||
|
<td class="px-6 py-4">{metric.input_tokens.toLocaleString()}</td>
|
||||||
|
<td class="px-6 py-4">{metric.output_tokens.toLocaleString()}</td>
|
||||||
|
<td class="px-6 py-4">{formatSpeed(metric.prompt_per_second)}</td>
|
||||||
|
<td class="px-6 py-4">{formatSpeed(metric.tokens_per_second)}</td>
|
||||||
|
<td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { proxyLogs, upstreamLogs } from "../stores/api";
|
||||||
|
import { screenWidth } from "../stores/theme";
|
||||||
|
import { persistentStore } from "../stores/persistent";
|
||||||
|
import LogPanel from "../components/LogPanel.svelte";
|
||||||
|
import ResizablePanels from "../components/ResizablePanels.svelte";
|
||||||
|
|
||||||
|
type ViewMode = "proxy" | "upstream" | "panels";
|
||||||
|
|
||||||
|
const viewModeStore = persistentStore<ViewMode>("logviewer-view-mode", "panels");
|
||||||
|
|
||||||
|
let direction = $derived<"horizontal" | "vertical">(
|
||||||
|
$screenWidth === "xs" || $screenWidth === "sm" ? "vertical" : "horizontal"
|
||||||
|
);
|
||||||
|
|
||||||
|
function cycleViewMode(): void {
|
||||||
|
const modes: ViewMode[] = ["panels", "proxy", "upstream"];
|
||||||
|
const currentIndex = modes.indexOf($viewModeStore);
|
||||||
|
const nextIndex = (currentIndex + 1) % modes.length;
|
||||||
|
viewModeStore.set(modes[nextIndex]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getViewModeIcon(mode: ViewMode): string {
|
||||||
|
switch (mode) {
|
||||||
|
case "proxy":
|
||||||
|
return "P";
|
||||||
|
case "upstream":
|
||||||
|
return "U";
|
||||||
|
case "panels":
|
||||||
|
return "⊞";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getViewModeLabel(mode: ViewMode): string {
|
||||||
|
switch (mode) {
|
||||||
|
case "proxy":
|
||||||
|
return "Proxy";
|
||||||
|
case "upstream":
|
||||||
|
return "Upstream";
|
||||||
|
case "panels":
|
||||||
|
return "Panels";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="flex flex-col h-full w-full gap-2">
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onclick={cycleViewMode}
|
||||||
|
class="btn flex items-center gap-2 text-sm"
|
||||||
|
title="Toggle view mode"
|
||||||
|
aria-label="Toggle view mode: {getViewModeLabel($viewModeStore)}"
|
||||||
|
>
|
||||||
|
<span class="font-mono font-bold">{getViewModeIcon($viewModeStore)}</span>
|
||||||
|
<span>{getViewModeLabel($viewModeStore)}</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex-1 w-full overflow-hidden">
|
||||||
|
{#if $viewModeStore === "panels"}
|
||||||
|
<ResizablePanels {direction} storageKey="logviewer-panel-group">
|
||||||
|
{#snippet leftPanel()}
|
||||||
|
<LogPanel id="proxy" title="Proxy Logs" logData={$proxyLogs} />
|
||||||
|
{/snippet}
|
||||||
|
{#snippet rightPanel()}
|
||||||
|
<LogPanel id="upstream" title="Upstream Logs" logData={$upstreamLogs} />
|
||||||
|
{/snippet}
|
||||||
|
</ResizablePanels>
|
||||||
|
{:else if $viewModeStore === "proxy"}
|
||||||
|
<LogPanel id="proxy" title="Proxy Logs" logData={$proxyLogs} />
|
||||||
|
{:else}
|
||||||
|
<LogPanel id="upstream" title="Upstream Logs" logData={$upstreamLogs} />
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { isNarrow } from "../stores/theme";
|
||||||
|
import { upstreamLogs } from "../stores/api";
|
||||||
|
import ModelsPanel from "../components/ModelsPanel.svelte";
|
||||||
|
import StatsPanel from "../components/StatsPanel.svelte";
|
||||||
|
import LogPanel from "../components/LogPanel.svelte";
|
||||||
|
import ResizablePanels from "../components/ResizablePanels.svelte";
|
||||||
|
|
||||||
|
let direction = $derived<"horizontal" | "vertical">($isNarrow ? "vertical" : "horizontal");
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<ResizablePanels {direction} storageKey="models-panel-group">
|
||||||
|
{#snippet leftPanel()}
|
||||||
|
<ModelsPanel />
|
||||||
|
{/snippet}
|
||||||
|
{#snippet rightPanel()}
|
||||||
|
<div class="flex flex-col h-full space-y-4">
|
||||||
|
{#if direction === "horizontal"}
|
||||||
|
<StatsPanel />
|
||||||
|
{/if}
|
||||||
|
<div class="flex-1 min-h-0">
|
||||||
|
<LogPanel id="modelsupstream" title="Upstream Logs" logData={$upstreamLogs} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/snippet}
|
||||||
|
</ResizablePanels>
|
||||||
@@ -0,0 +1,174 @@
|
|||||||
|
import { writable } from "svelte/store";
|
||||||
|
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope } from "../lib/types";
|
||||||
|
import { connectionState } from "./theme";
|
||||||
|
|
||||||
|
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
||||||
|
|
||||||
|
// Stores
|
||||||
|
export const models = writable<Model[]>([]);
|
||||||
|
export const proxyLogs = writable<string>("");
|
||||||
|
export const upstreamLogs = writable<string>("");
|
||||||
|
export const metrics = writable<Metrics[]>([]);
|
||||||
|
export const versionInfo = writable<VersionInfo>({
|
||||||
|
build_date: "unknown",
|
||||||
|
commit: "unknown",
|
||||||
|
version: "unknown",
|
||||||
|
});
|
||||||
|
|
||||||
|
let apiEventSource: EventSource | null = null;
|
||||||
|
|
||||||
|
function appendLog(newData: string, store: typeof proxyLogs | typeof upstreamLogs): void {
|
||||||
|
store.update((prev) => {
|
||||||
|
const updatedLog = prev + newData;
|
||||||
|
return updatedLog.length > LOG_LENGTH_LIMIT ? updatedLog.slice(-LOG_LENGTH_LIMIT) : updatedLog;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function enableAPIEvents(enabled: boolean): void {
|
||||||
|
if (!enabled) {
|
||||||
|
apiEventSource?.close();
|
||||||
|
apiEventSource = null;
|
||||||
|
metrics.set([]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let retryCount = 0;
|
||||||
|
const initialDelay = 1000; // 1 second
|
||||||
|
|
||||||
|
const connect = () => {
|
||||||
|
apiEventSource?.close();
|
||||||
|
apiEventSource = new EventSource("/api/events");
|
||||||
|
|
||||||
|
connectionState.set("connecting");
|
||||||
|
|
||||||
|
apiEventSource.onopen = () => {
|
||||||
|
// Clear everything on connect to keep things in sync
|
||||||
|
proxyLogs.set("");
|
||||||
|
upstreamLogs.set("");
|
||||||
|
metrics.set([]);
|
||||||
|
models.set([]);
|
||||||
|
retryCount = 0;
|
||||||
|
connectionState.set("connected");
|
||||||
|
};
|
||||||
|
|
||||||
|
apiEventSource.onmessage = (e: MessageEvent) => {
|
||||||
|
try {
|
||||||
|
const message = JSON.parse(e.data) as APIEventEnvelope;
|
||||||
|
switch (message.type) {
|
||||||
|
case "modelStatus": {
|
||||||
|
const newModels = JSON.parse(message.data) as Model[];
|
||||||
|
// Sort models by name and id
|
||||||
|
newModels.sort((a, b) => {
|
||||||
|
return (a.name + a.id).localeCompare(b.name + b.id);
|
||||||
|
});
|
||||||
|
models.set(newModels);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "logData": {
|
||||||
|
const logData = JSON.parse(message.data) as LogData;
|
||||||
|
switch (logData.source) {
|
||||||
|
case "proxy":
|
||||||
|
appendLog(logData.data, proxyLogs);
|
||||||
|
break;
|
||||||
|
case "upstream":
|
||||||
|
appendLog(logData.data, upstreamLogs);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "metrics": {
|
||||||
|
const newMetrics = JSON.parse(message.data) as Metrics[];
|
||||||
|
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error(e.data, err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
apiEventSource.onerror = () => {
|
||||||
|
apiEventSource?.close();
|
||||||
|
retryCount++;
|
||||||
|
const delay = Math.min(initialDelay * Math.pow(2, retryCount - 1), 5000);
|
||||||
|
connectionState.set("disconnected");
|
||||||
|
setTimeout(connect, delay);
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
connect();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch version info when connected
|
||||||
|
connectionState.subscribe(async (status) => {
|
||||||
|
if (status === "connected") {
|
||||||
|
try {
|
||||||
|
const response = await fetch("/api/version");
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP error! status: ${response.status}`);
|
||||||
|
}
|
||||||
|
const data: VersionInfo = await response.json();
|
||||||
|
versionInfo.set(data);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export async function listModels(): Promise<Model[]> {
|
||||||
|
try {
|
||||||
|
const response = await fetch("/api/models/");
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP error! status: ${response.status}`);
|
||||||
|
}
|
||||||
|
const data = await response.json();
|
||||||
|
return data || [];
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to fetch models:", error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function unloadAllModels(): Promise<void> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`/api/models/unload`, {
|
||||||
|
method: "POST",
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to unload models: ${response.status}`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to unload models:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function unloadSingleModel(model: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`/api/models/unload/${model}`, {
|
||||||
|
method: "POST",
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to unload model: ${response.status}`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to unload model", model, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function loadModel(model: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`/upstream/${model}/`, {
|
||||||
|
method: "GET",
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to load model: ${response.status}`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to load model:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
import { writable, type Writable } from "svelte/store";
|
||||||
|
|
||||||
|
export function persistentStore<T>(key: string, initialValue: T): Writable<T> {
|
||||||
|
// Get initial value from localStorage or use default
|
||||||
|
let storedValue = initialValue;
|
||||||
|
if (typeof window !== "undefined") {
|
||||||
|
try {
|
||||||
|
const saved = localStorage.getItem(key);
|
||||||
|
if (saved !== null) {
|
||||||
|
storedValue = JSON.parse(saved);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`Error parsing stored value for ${key}`, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const store = writable<T>(storedValue);
|
||||||
|
|
||||||
|
// Subscribe to changes and save to localStorage
|
||||||
|
store.subscribe((value) => {
|
||||||
|
if (typeof window !== "undefined") {
|
||||||
|
try {
|
||||||
|
localStorage.setItem(key, JSON.stringify(value));
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`Error saving value for ${key}`, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return store;
|
||||||
|
}
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
import { writable, derived } from "svelte/store";
|
||||||
|
import { persistentStore } from "./persistent";
|
||||||
|
import type { ScreenWidth } from "../lib/types";
|
||||||
|
|
||||||
|
// Persistent stores
|
||||||
|
export const isDarkMode = persistentStore<boolean>("theme", false);
|
||||||
|
export const appTitle = persistentStore<string>("app-title", "llama-swap");
|
||||||
|
|
||||||
|
// Non-persistent stores
|
||||||
|
export const screenWidth = writable<ScreenWidth>("md");
|
||||||
|
export const connectionState = writable<"connected" | "connecting" | "disconnected">("disconnected");
|
||||||
|
|
||||||
|
// Derived store for narrow screens
|
||||||
|
export const isNarrow = derived(screenWidth, ($screenWidth) => {
|
||||||
|
return $screenWidth === "xs" || $screenWidth === "sm" || $screenWidth === "md";
|
||||||
|
});
|
||||||
|
|
||||||
|
// Function to toggle theme
|
||||||
|
export function toggleTheme(): void {
|
||||||
|
isDarkMode.update((current) => !current);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to check and update screen width
|
||||||
|
export function checkScreenWidth(): void {
|
||||||
|
const innerWidth = window.innerWidth;
|
||||||
|
let newWidth: ScreenWidth;
|
||||||
|
|
||||||
|
if (innerWidth < 640) {
|
||||||
|
newWidth = "xs";
|
||||||
|
} else if (innerWidth < 768) {
|
||||||
|
newWidth = "sm";
|
||||||
|
} else if (innerWidth < 1024) {
|
||||||
|
newWidth = "md";
|
||||||
|
} else if (innerWidth < 1280) {
|
||||||
|
newWidth = "lg";
|
||||||
|
} else if (innerWidth < 1536) {
|
||||||
|
newWidth = "xl";
|
||||||
|
} else {
|
||||||
|
newWidth = "2xl";
|
||||||
|
}
|
||||||
|
|
||||||
|
screenWidth.set(newWidth);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize screen width and set up resize listener
|
||||||
|
export function initScreenWidth(): () => void {
|
||||||
|
checkScreenWidth();
|
||||||
|
window.addEventListener("resize", checkScreenWidth);
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
window.removeEventListener("resize", checkScreenWidth);
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
|
||||||
|
|
||||||
|
export default {
|
||||||
|
preprocess: vitePreprocess(),
|
||||||
|
};
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"extends": "@tsconfig/svelte/tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ESNext",
|
||||||
|
"useDefineForClassFields": true,
|
||||||
|
"module": "ESNext",
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"allowJs": true,
|
||||||
|
"checkJs": true,
|
||||||
|
"isolatedModules": true,
|
||||||
|
"moduleDetection": "force",
|
||||||
|
"noEmit": true,
|
||||||
|
"strict": true,
|
||||||
|
"noUnusedLocals": true,
|
||||||
|
"noUnusedParameters": true,
|
||||||
|
"noFallthroughCasesInSwitch": true,
|
||||||
|
"verbatimModuleSyntax": true
|
||||||
|
},
|
||||||
|
"include": ["src/**/*.ts", "src/**/*.svelte"]
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import { defineConfig } from "vite";
|
||||||
|
import { svelte } from "@sveltejs/vite-plugin-svelte";
|
||||||
|
import tailwindcss from "@tailwindcss/vite";
|
||||||
|
|
||||||
|
// https://vite.dev/config/
|
||||||
|
export default defineConfig({
|
||||||
|
plugins: [svelte(), tailwindcss()],
|
||||||
|
base: "/ui/",
|
||||||
|
build: {
|
||||||
|
outDir: "../proxy/ui_dist",
|
||||||
|
assetsDir: "assets",
|
||||||
|
},
|
||||||
|
server: {
|
||||||
|
proxy: {
|
||||||
|
"/api": "http://localhost:8080", // Proxy API calls to Go backend during development
|
||||||
|
"/logs": "http://localhost:8080",
|
||||||
|
"/upstream": "http://localhost:8080",
|
||||||
|
"/unload": "http://localhost:8080",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
@@ -12,7 +12,7 @@
|
|||||||
"react-dom": "^19.1.0",
|
"react-dom": "^19.1.0",
|
||||||
"react-icons": "^5.5.0",
|
"react-icons": "^5.5.0",
|
||||||
"react-resizable-panels": "^3.0.4",
|
"react-resizable-panels": "^3.0.4",
|
||||||
"react-router-dom": "^7.6.2"
|
"react-router-dom": "^7.12.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@eslint/js": "^9.25.0",
|
"@eslint/js": "^9.25.0",
|
||||||
@@ -75,6 +75,7 @@
|
|||||||
"integrity": "sha512-bXYxrXFubeYdvB0NhD/NBB3Qi6aZeV20GOWVI47t2dkecCEoneR4NPVcb7abpXDEvejgrUfFtG6vG/zxAKmg+g==",
|
"integrity": "sha512-bXYxrXFubeYdvB0NhD/NBB3Qi6aZeV20GOWVI47t2dkecCEoneR4NPVcb7abpXDEvejgrUfFtG6vG/zxAKmg+g==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ampproject/remapping": "^2.2.0",
|
"@ampproject/remapping": "^2.2.0",
|
||||||
"@babel/code-frame": "^7.27.1",
|
"@babel/code-frame": "^7.27.1",
|
||||||
@@ -1593,6 +1594,66 @@
|
|||||||
"node": ">=14.0.0"
|
"node": ">=14.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
|
||||||
|
"version": "1.4.3",
|
||||||
|
"dev": true,
|
||||||
|
"inBundle": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@emnapi/wasi-threads": "1.0.2",
|
||||||
|
"tslib": "^2.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
|
||||||
|
"version": "1.4.3",
|
||||||
|
"dev": true,
|
||||||
|
"inBundle": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"tslib": "^2.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"dev": true,
|
||||||
|
"inBundle": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"tslib": "^2.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
|
||||||
|
"version": "0.2.10",
|
||||||
|
"dev": true,
|
||||||
|
"inBundle": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@emnapi/core": "^1.4.3",
|
||||||
|
"@emnapi/runtime": "^1.4.3",
|
||||||
|
"@tybys/wasm-util": "^0.9.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
|
||||||
|
"version": "0.9.0",
|
||||||
|
"dev": true,
|
||||||
|
"inBundle": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"tslib": "^2.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
|
||||||
|
"version": "2.8.0",
|
||||||
|
"dev": true,
|
||||||
|
"inBundle": true,
|
||||||
|
"license": "0BSD",
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
"node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
|
"node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
|
||||||
"version": "4.1.8",
|
"version": "4.1.8",
|
||||||
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.8.tgz",
|
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.8.tgz",
|
||||||
@@ -1707,6 +1768,7 @@
|
|||||||
"integrity": "sha512-JeG0rEWak0N6Itr6QUx+X60uQmN+5t3j9r/OVDtWzFXKaj6kD1BwJzOksD0FF6iWxZlbE1kB0q9vtnU2ekqa1Q==",
|
"integrity": "sha512-JeG0rEWak0N6Itr6QUx+X60uQmN+5t3j9r/OVDtWzFXKaj6kD1BwJzOksD0FF6iWxZlbE1kB0q9vtnU2ekqa1Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"csstype": "^3.0.2"
|
"csstype": "^3.0.2"
|
||||||
}
|
}
|
||||||
@@ -1767,6 +1829,7 @@
|
|||||||
"integrity": "sha512-qwxv6dq682yVvgKKp2qWwLgRbscDAYktPptK4JPojCwwi3R9cwrvIxS4lvBpzmcqzR4bdn54Z0IG1uHFskW4dA==",
|
"integrity": "sha512-qwxv6dq682yVvgKKp2qWwLgRbscDAYktPptK4JPojCwwi3R9cwrvIxS4lvBpzmcqzR4bdn54Z0IG1uHFskW4dA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@typescript-eslint/scope-manager": "8.33.1",
|
"@typescript-eslint/scope-manager": "8.33.1",
|
||||||
"@typescript-eslint/types": "8.33.1",
|
"@typescript-eslint/types": "8.33.1",
|
||||||
@@ -2018,6 +2081,7 @@
|
|||||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"bin": {
|
"bin": {
|
||||||
"acorn": "bin/acorn"
|
"acorn": "bin/acorn"
|
||||||
},
|
},
|
||||||
@@ -2126,6 +2190,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"caniuse-lite": "^1.0.30001718",
|
"caniuse-lite": "^1.0.30001718",
|
||||||
"electron-to-chromium": "^1.5.160",
|
"electron-to-chromium": "^1.5.160",
|
||||||
@@ -2232,12 +2297,16 @@
|
|||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/cookie": {
|
"node_modules/cookie": {
|
||||||
"version": "1.0.2",
|
"version": "1.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/cookie/-/cookie-1.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/cookie/-/cookie-1.1.1.tgz",
|
||||||
"integrity": "sha512-9Kr/j4O16ISv8zBBhJoi4bXOYNTkFLOqSL3UDB0njXxCXNezjeyVrJyGOWtgfs/q2km1gwBcfH8q1yEGoMYunA==",
|
"integrity": "sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=18"
|
"node": ">=18"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"type": "opencollective",
|
||||||
|
"url": "https://opencollective.com/express"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/cross-spawn": {
|
"node_modules/cross-spawn": {
|
||||||
@@ -2388,6 +2457,7 @@
|
|||||||
"integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
|
"integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@eslint-community/eslint-utils": "^4.8.0",
|
"@eslint-community/eslint-utils": "^4.8.0",
|
||||||
"@eslint-community/regexpp": "^4.12.1",
|
"@eslint-community/regexpp": "^4.12.1",
|
||||||
@@ -3267,9 +3337,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/minizlib": {
|
"node_modules/minizlib": {
|
||||||
"version": "3.0.2",
|
"version": "3.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
|
||||||
"integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==",
|
"integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
@@ -3279,22 +3349,6 @@
|
|||||||
"node": ">= 18"
|
"node": ">= 18"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/mkdirp": {
|
|
||||||
"version": "3.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
|
|
||||||
"integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
|
|
||||||
"dev": true,
|
|
||||||
"license": "MIT",
|
|
||||||
"bin": {
|
|
||||||
"mkdirp": "dist/cjs/src/bin.js"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/isaacs"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/ms": {
|
"node_modules/ms": {
|
||||||
"version": "2.1.3",
|
"version": "2.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||||
@@ -3513,6 +3567,7 @@
|
|||||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
|
||||||
"integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
|
"integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
@@ -3522,6 +3577,7 @@
|
|||||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
|
||||||
"integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
|
"integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"scheduler": "^0.26.0"
|
"scheduler": "^0.26.0"
|
||||||
},
|
},
|
||||||
@@ -3559,9 +3615,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-router": {
|
"node_modules/react-router": {
|
||||||
"version": "7.6.2",
|
"version": "7.12.0",
|
||||||
"resolved": "https://registry.npmjs.org/react-router/-/react-router-7.6.2.tgz",
|
"resolved": "https://registry.npmjs.org/react-router/-/react-router-7.12.0.tgz",
|
||||||
"integrity": "sha512-U7Nv3y+bMimgWjhlT5CRdzHPu2/KVmqPwKUCChW8en5P3znxUqwlYFlbmyj8Rgp1SF6zs5X4+77kBVknkg6a0w==",
|
"integrity": "sha512-kTPDYPFzDVGIIGNLS5VJykK0HfHLY5MF3b+xj0/tTyNYL1gF1qs7u67Z9jEhQk2sQ98SUaHxlG31g1JtF7IfVw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"cookie": "^1.0.1",
|
"cookie": "^1.0.1",
|
||||||
@@ -3581,12 +3637,12 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-router-dom": {
|
"node_modules/react-router-dom": {
|
||||||
"version": "7.6.2",
|
"version": "7.12.0",
|
||||||
"resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.6.2.tgz",
|
"resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.12.0.tgz",
|
||||||
"integrity": "sha512-Q8zb6VlTbdYKK5JJBLQEN06oTUa/RAbG/oQS1auK1I0TbJOXktqm+QENEVJU6QvWynlXPRBXI3fiOQcSEA78rA==",
|
"integrity": "sha512-pfO9fiBcpEfX4Tx+iTYKDtPbrSLLCbwJ5EqP+SPYQu1VYCXdy79GSj0wttR0U4cikVdlImZuEZ/9ZNCgoaxwBA==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"react-router": "7.6.2"
|
"react-router": "7.12.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=20.0.0"
|
"node": ">=20.0.0"
|
||||||
@@ -3705,9 +3761,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/set-cookie-parser": {
|
"node_modules/set-cookie-parser": {
|
||||||
"version": "2.7.1",
|
"version": "2.7.2",
|
||||||
"resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.1.tgz",
|
"resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz",
|
||||||
"integrity": "sha512-IOc8uWeOZgnb3ptbCURJWNjWUPcO3ZnTTdzsurqERrP6nPyv+paC55vJM0LpOlT2ne+Ix+9+CRG1MNLlyZ4GjQ==",
|
"integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/shebang-command": {
|
"node_modules/shebang-command": {
|
||||||
@@ -3787,17 +3843,16 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/tar": {
|
"node_modules/tar": {
|
||||||
"version": "7.4.3",
|
"version": "7.5.7",
|
||||||
"resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
|
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.7.tgz",
|
||||||
"integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
|
"integrity": "sha512-fov56fJiRuThVFXD6o6/Q354S7pnWMJIVlDBYijsTNx6jKSE4pvrDTs6lUnmGvNyfJwFQQwWy3owKz1ucIhveQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "ISC",
|
"license": "BlueOak-1.0.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@isaacs/fs-minipass": "^4.0.0",
|
"@isaacs/fs-minipass": "^4.0.0",
|
||||||
"chownr": "^3.0.0",
|
"chownr": "^3.0.0",
|
||||||
"minipass": "^7.1.2",
|
"minipass": "^7.1.2",
|
||||||
"minizlib": "^3.0.1",
|
"minizlib": "^3.1.0",
|
||||||
"mkdirp": "^3.0.1",
|
|
||||||
"yallist": "^5.0.0"
|
"yallist": "^5.0.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
@@ -3852,6 +3907,7 @@
|
|||||||
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
},
|
},
|
||||||
@@ -3904,6 +3960,7 @@
|
|||||||
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
|
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
|
"peer": true,
|
||||||
"bin": {
|
"bin": {
|
||||||
"tsc": "bin/tsc",
|
"tsc": "bin/tsc",
|
||||||
"tsserver": "bin/tsserver"
|
"tsserver": "bin/tsserver"
|
||||||
@@ -3982,6 +4039,7 @@
|
|||||||
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
|
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"esbuild": "^0.25.0",
|
"esbuild": "^0.25.0",
|
||||||
"fdir": "^6.4.4",
|
"fdir": "^6.4.4",
|
||||||
@@ -4072,6 +4130,7 @@
|
|||||||
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
"react-dom": "^19.1.0",
|
"react-dom": "^19.1.0",
|
||||||
"react-icons": "^5.5.0",
|
"react-icons": "^5.5.0",
|
||||||
"react-resizable-panels": "^3.0.4",
|
"react-resizable-panels": "^3.0.4",
|
||||||
"react-router-dom": "^7.6.2"
|
"react-router-dom": "^7.12.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@eslint/js": "^9.25.0",
|
"@eslint/js": "^9.25.0",
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ export interface Model {
|
|||||||
name: string;
|
name: string;
|
||||||
description: string;
|
description: string;
|
||||||
unlisted: boolean;
|
unlisted: boolean;
|
||||||
|
peerID: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface APIProviderType {
|
interface APIProviderType {
|
||||||
@@ -70,7 +71,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
const [versionInfo, setVersionInfo] = useState<VersionInfo>({
|
const [versionInfo, setVersionInfo] = useState<VersionInfo>({
|
||||||
build_date: "unknown",
|
build_date: "unknown",
|
||||||
commit: "unknown",
|
commit: "unknown",
|
||||||
version: "unknown"
|
version: "unknown",
|
||||||
});
|
});
|
||||||
//const apiEventSource = useRef<EventSource | null>(null);
|
//const apiEventSource = useRef<EventSource | null>(null);
|
||||||
|
|
||||||
@@ -166,7 +167,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
// fetch version
|
// fetch version
|
||||||
const fetchVersion = async () => {
|
const fetchVersion = async () => {
|
||||||
try {
|
try {
|
||||||
const response = await fetch("/api/version");
|
const response = await fetch("/api/version");
|
||||||
@@ -180,7 +181,7 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (connectionStatus === 'connected') {
|
if (connectionStatus === "connected") {
|
||||||
fetchVersion();
|
fetchVersion();
|
||||||
}
|
}
|
||||||
}, [connectionStatus]);
|
}, [connectionStatus]);
|
||||||
@@ -265,7 +266,19 @@ export function APIProvider({ children, autoStartAPIEvents = true }: APIProvider
|
|||||||
connectionStatus,
|
connectionStatus,
|
||||||
versionInfo,
|
versionInfo,
|
||||||
}),
|
}),
|
||||||
[models, listModels, unloadAllModels, unloadSingleModel, loadModel, enableAPIEvents, proxyLogs, upstreamLogs, metrics, connectionStatus, versionInfo]
|
[
|
||||||
|
models,
|
||||||
|
listModels,
|
||||||
|
unloadAllModels,
|
||||||
|
unloadSingleModel,
|
||||||
|
loadModel,
|
||||||
|
enableAPIEvents,
|
||||||
|
proxyLogs,
|
||||||
|
upstreamLogs,
|
||||||
|
metrics,
|
||||||
|
connectionStatus,
|
||||||
|
versionInfo,
|
||||||
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
return <APIContext.Provider value={value}>{children}</APIContext.Provider>;
|
return <APIContext.Provider value={value}>{children}</APIContext.Provider>;
|
||||||
|
|||||||
@@ -44,8 +44,24 @@ function ModelsPanel() {
|
|||||||
const [showIdorName, setShowIdorName] = usePersistentState<"id" | "name">("showIdorName", "id"); // true = show ID, false = show name
|
const [showIdorName, setShowIdorName] = usePersistentState<"id" | "name">("showIdorName", "id"); // true = show ID, false = show name
|
||||||
const [menuOpen, setMenuOpen] = useState(false);
|
const [menuOpen, setMenuOpen] = useState(false);
|
||||||
|
|
||||||
const filteredModels = useMemo(() => {
|
const { regularModels, peerModelsByPeerId } = useMemo(() => {
|
||||||
return models.filter((model) => showUnlisted || !model.unlisted);
|
const filtered = models.filter((model) => showUnlisted || !model.unlisted);
|
||||||
|
const peerModels = filtered.filter((m) => m.peerID);
|
||||||
|
|
||||||
|
// Group peer models by peerID
|
||||||
|
const grouped = peerModels.reduce((acc, model) => {
|
||||||
|
const peerId = model.peerID || "unknown";
|
||||||
|
if (!acc[peerId]) {
|
||||||
|
acc[peerId] = [];
|
||||||
|
}
|
||||||
|
acc[peerId].push(model);
|
||||||
|
return acc;
|
||||||
|
}, {} as Record<string, typeof peerModels>);
|
||||||
|
|
||||||
|
return {
|
||||||
|
regularModels: filtered.filter((m) => !m.peerID),
|
||||||
|
peerModelsByPeerId: grouped,
|
||||||
|
};
|
||||||
}, [models, showUnlisted]);
|
}, [models, showUnlisted]);
|
||||||
|
|
||||||
const handleUnloadAllModels = useCallback(async () => {
|
const handleUnloadAllModels = useCallback(async () => {
|
||||||
@@ -151,7 +167,7 @@ function ModelsPanel() {
|
|||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{filteredModels.map((model) => (
|
{regularModels.map((model) => (
|
||||||
<tr key={model.id} className="border-b hover:bg-secondary-hover border-gray-200">
|
<tr key={model.id} className="border-b hover:bg-secondary-hover border-gray-200">
|
||||||
<td className={`${model.unlisted ? "text-txtsecondary" : ""}`}>
|
<td className={`${model.unlisted ? "text-txtsecondary" : ""}`}>
|
||||||
<a href={`/upstream/${model.id}/`} className="font-semibold" target="_blank">
|
<a href={`/upstream/${model.id}/`} className="font-semibold" target="_blank">
|
||||||
@@ -186,6 +202,34 @@ function ModelsPanel() {
|
|||||||
))}
|
))}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
{Object.keys(peerModelsByPeerId).length > 0 && (
|
||||||
|
<>
|
||||||
|
<h3 className="mt-8 mb-2">Peer Models</h3>
|
||||||
|
{Object.entries(peerModelsByPeerId)
|
||||||
|
.sort(([a], [b]) => a.localeCompare(b))
|
||||||
|
.map(([peerId, models]) => (
|
||||||
|
<div key={peerId} className="mb-4">
|
||||||
|
<table className="w-full">
|
||||||
|
<thead className="sticky top-0 bg-card z-10">
|
||||||
|
<tr className="text-left border-b border-gray-200 dark:border-white/10 bg-surface">
|
||||||
|
<th className="font-semibold">{peerId}</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{models.map((model) => (
|
||||||
|
<tr key={model.id} className="border-b hover:bg-secondary-hover border-gray-200">
|
||||||
|
<td className={`pl-8 ${model.unlisted ? "text-txtsecondary" : ""}`}>
|
||||||
|
<span>{model.id}</span>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
@@ -223,11 +267,7 @@ function TokenHistogram({ data }: { data: HistogramData }) {
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="mt-2 w-full">
|
<div className="mt-2 w-full">
|
||||||
<svg
|
<svg viewBox={`0 0 ${viewBoxWidth} ${height}`} className="w-full h-auto" preserveAspectRatio="xMidYMid meet">
|
||||||
viewBox={`0 0 ${viewBoxWidth} ${height}`}
|
|
||||||
className="w-full h-auto"
|
|
||||||
preserveAspectRatio="xMidYMid meet"
|
|
||||||
>
|
|
||||||
{/* Y-axis */}
|
{/* Y-axis */}
|
||||||
<line
|
<line
|
||||||
x1={padding.left}
|
x1={padding.left}
|
||||||
@@ -312,14 +352,7 @@ function TokenHistogram({ data }: { data: HistogramData }) {
|
|||||||
/>
|
/>
|
||||||
|
|
||||||
{/* X-axis labels */}
|
{/* X-axis labels */}
|
||||||
<text
|
<text x={padding.left} y={height - 5} fontSize="10" fill="currentColor" opacity="0.6" textAnchor="start">
|
||||||
x={padding.left}
|
|
||||||
y={height - 5}
|
|
||||||
fontSize="10"
|
|
||||||
fill="currentColor"
|
|
||||||
opacity="0.6"
|
|
||||||
textAnchor="start"
|
|
||||||
>
|
|
||||||
{min.toFixed(1)}
|
{min.toFixed(1)}
|
||||||
</text>
|
</text>
|
||||||
|
|
||||||
|
|||||||