Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c813e44d1 | |||
| fe71e8a6ea | |||
| aac7b8745a | |||
| 4e606feff0 | |||
| a4b91e08cf | |||
| 3e3646f9f9 | |||
| a01afe261b | |||
| 174e8562aa | |||
| 085b54bc88 | |||
| 2be3416baa | |||
| 7e3e94a08a | |||
| e261745c66 | |||
| 11b7913287 | |||
| c79114d40a | |||
| 430166d5eb | |||
| 5b4beaceef | |||
| fd3c28ffc5 | |||
| a846c4f18c | |||
| 5bae33a769 | |||
| 8f4ff01f93 | |||
| e8d4384cd2 | |||
| ce28485be2 | |||
| 3cd7837b1f | |||
| 0b31ccacc1 | |||
| 5938dbee8f | |||
| 66639e83f7 | |||
| 625b296720 |
@@ -11,7 +11,7 @@ jobs:
|
|||||||
issues: write
|
issues: write
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@v9
|
- uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f #v10.2.0
|
||||||
with:
|
with:
|
||||||
days-before-issue-stale: 14
|
days-before-issue-stale: 14
|
||||||
days-before-issue-close: 14
|
days-before-issue-close: 14
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
|
||||||
|
|
||||||
- name: Validate JSON Schema
|
- name: Validate JSON Schema
|
||||||
run: |
|
run: |
|
||||||
@@ -45,7 +45,7 @@ jobs:
|
|||||||
echo "✓ config-schema.json is valid"
|
echo "✓ config-schema.json is valid"
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.x"
|
python-version: "3.x"
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,11 @@ on:
|
|||||||
|
|
||||||
# Allows manual triggering of the workflow
|
# Allows manual triggering of the workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
dryrun:
|
||||||
|
description: "Run cleanup step in dry-run mode (log what would be deleted, delete nothing)"
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
|
||||||
# Run on workflow file changes (without pushing)
|
# Run on workflow file changes (without pushing)
|
||||||
push:
|
push:
|
||||||
@@ -33,7 +38,7 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
|
||||||
|
|
||||||
- name: Free up disk space
|
- name: Free up disk space
|
||||||
if: matrix.platform == 'rocm'
|
if: matrix.platform == 'rocm'
|
||||||
@@ -48,8 +53,18 @@ jobs:
|
|||||||
echo "After cleanup:"
|
echo "After cleanup:"
|
||||||
df -h
|
df -h
|
||||||
|
|
||||||
|
# QEMU enables arm64 cross-builds on the amd64 GitHub runner.
|
||||||
|
# Currently only the cpu backend goes multi-arch; the action is a
|
||||||
|
# no-op for amd64-only builds, so leaving it on for every matrix
|
||||||
|
# entry keeps the workflow simple.
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd #v4.0.0
|
||||||
|
|
||||||
- name: Log in to GitHub Container Registry
|
- name: Log in to GitHub Container Registry
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 #v4.1.0
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
@@ -60,14 +75,23 @@ jobs:
|
|||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
run: ./docker/build-container.sh ${{ matrix.platform }} ${{ github.event_name != 'push' }}
|
run: ./docker/build-container.sh ${{ matrix.platform }} ${{ github.event_name != 'push' }}
|
||||||
|
|
||||||
# note make sure mostlygeek/llama-swap has admin rights to the llama-swap package
|
# actions/delete-package-versions can't see manifest lists: pushing
|
||||||
# see: https://github.com/actions/delete-package-versions/issues/74
|
# a multi-arch image with `docker buildx --push` creates a tagged OCI
|
||||||
|
# index plus one untagged per-platform manifest per arch, and
|
||||||
|
# `delete-only-untagged-versions: true` then nukes the per-platform
|
||||||
|
# children, leaving the index dangling — `docker pull :cpu` 404s on
|
||||||
|
# the referenced digest. dataaxiom/ghcr-cleanup-action walks tagged
|
||||||
|
# manifest lists and excludes their children from deletion.
|
||||||
delete-untagged-containers:
|
delete-untagged-containers:
|
||||||
needs: build-and-push
|
needs: build-and-push
|
||||||
|
# Skip on forks — the delete API requires package-admin on the
|
||||||
|
# upstream account and would otherwise red-x every fork CI run.
|
||||||
|
if: github.repository == 'mostlygeek/llama-swap'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/delete-package-versions@v5
|
- uses: dataaxiom/ghcr-cleanup-action@cd0cdb900b5dbf3a6f2cc869f0dbb0b8211f50c4 # v1.0.16
|
||||||
with:
|
with:
|
||||||
package-name: 'llama-swap'
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
package-type: 'container'
|
package: llama-swap
|
||||||
delete-only-untagged-versions: 'true'
|
delete-untagged: true
|
||||||
|
dry-run: ${{ inputs.dryrun || false }}
|
||||||
|
|||||||
@@ -31,17 +31,17 @@ jobs:
|
|||||||
run-tests:
|
run-tests:
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c #6.4.0
|
||||||
with:
|
with:
|
||||||
go-version: '1.23'
|
go-version-file: go.mod
|
||||||
|
|
||||||
# cache simple-responder to save the build time
|
# cache simple-responder to save the build time
|
||||||
- name: Restore Simple Responder
|
- name: Restore Simple Responder
|
||||||
id: restore-simple-responder
|
id: restore-simple-responder
|
||||||
uses: actions/cache/restore@v4
|
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||||
with:
|
with:
|
||||||
path: ./build
|
path: ./build
|
||||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
||||||
@@ -56,11 +56,11 @@ jobs:
|
|||||||
# nothing new to save ... skip this step
|
# nothing new to save ... skip this step
|
||||||
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
||||||
id: save-simple-responder
|
id: save-simple-responder
|
||||||
uses: actions/cache/save@v4
|
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||||
with:
|
with:
|
||||||
path: ./build
|
path: ./build
|
||||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
- name: Test all
|
- name: Test all
|
||||||
shell: bash
|
shell: bash
|
||||||
run: make test-all
|
run: make test-all
|
||||||
|
|||||||
@@ -30,37 +30,38 @@ jobs:
|
|||||||
run-tests:
|
run-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c #6.4.0
|
||||||
with:
|
with:
|
||||||
go-version-file: go.mod
|
go-version-file: go.mod
|
||||||
|
|
||||||
# Only run in this linux based runner
|
# Only run in this linux based runner
|
||||||
- name: Check Formatting
|
- name: Check Formatting
|
||||||
run: |
|
run: |
|
||||||
if [ "$(gofmt -l . | grep -v 'event/.*_test.go' | wc -l)" -gt 0 ]; then
|
if [ "$(gofmt -l . | wc -l)" -gt 0 ]; then
|
||||||
gofmt -l . | grep -v 'event/.*_test.go'
|
gofmt -l .
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
# cache simple-responder to save the build time
|
# cache simple-responder to save the build time
|
||||||
- name: Restore Simple Responder
|
- name: Restore Simple Responder
|
||||||
id: restore-simple-responder
|
id: restore-simple-responder
|
||||||
uses: actions/cache/restore@v4
|
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||||
with:
|
with:
|
||||||
path: ./build
|
path: ./build
|
||||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
||||||
|
|
||||||
# necessary for testing proxy/Process swapping
|
# necessary for testing proxy/Process swapping
|
||||||
- name: Create simple-responder
|
- name: Create simple-responder
|
||||||
|
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
||||||
run: make simple-responder
|
run: make simple-responder
|
||||||
|
|
||||||
- name: Save Simple Responder
|
- name: Save Simple Responder
|
||||||
# nothing new to save ... skip this step
|
# nothing new to save ... skip this step
|
||||||
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
if: steps.restore-simple-responder.outputs.cache-hit != 'true'
|
||||||
id: save-simple-responder
|
id: save-simple-responder
|
||||||
uses: actions/cache/save@v4
|
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
|
||||||
with:
|
with:
|
||||||
path: ./build
|
path: ./build
|
||||||
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
|
||||||
|
|||||||
@@ -20,14 +20,16 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
ref: ${{ github.event.inputs.tag || github.ref }}
|
ref: ${{ github.event.inputs.tag || github.ref }}
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c #6.4.0
|
||||||
|
with:
|
||||||
|
go-version-file: go.mod
|
||||||
- name: Set up Node.js
|
- name: Set up Node.js
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # 6.4.0
|
||||||
with:
|
with:
|
||||||
node-version: "24"
|
node-version: "24"
|
||||||
- name: Install dependencies and build UI
|
- name: Install dependencies and build UI
|
||||||
@@ -37,7 +39,7 @@ jobs:
|
|||||||
npm run build
|
npm run build
|
||||||
|
|
||||||
- name: Run GoReleaser
|
- name: Run GoReleaser
|
||||||
uses: goreleaser/goreleaser-action@v6
|
uses: goreleaser/goreleaser-action@1a80836c5c9d9e5755a25cb59ec6f45a3b5f41a8 #7.2.1
|
||||||
with:
|
with:
|
||||||
# either 'goreleaser' (default) or 'goreleaser-pro'
|
# either 'goreleaser' (default) or 'goreleaser-pro'
|
||||||
distribution: goreleaser
|
distribution: goreleaser
|
||||||
@@ -61,7 +63,7 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
- name: "Trigger tap repository update"
|
- name: "Trigger tap repository update"
|
||||||
uses: peter-evans/repository-dispatch@v2
|
uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 #4.0.1
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.TAP_REPO_PAT }}
|
token: ${{ secrets.TAP_REPO_PAT }}
|
||||||
repository: mostlygeek/homebrew-llama-swap
|
repository: mostlygeek/homebrew-llama-swap
|
||||||
|
|||||||
@@ -19,24 +19,15 @@ jobs:
|
|||||||
|
|
||||||
run-tests:
|
run-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
working-directory: ui-svelte
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
|
||||||
|
|
||||||
- name: Set up Node.js
|
- name: Set up Node.js
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # 6.4.0
|
||||||
with:
|
with:
|
||||||
node-version: '24'
|
node-version: '24'
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: ui-svelte/package-lock.json
|
cache-dependency-path: ui-svelte/package-lock.json
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Run UI tests
|
||||||
run: npm ci
|
run: make test-ui
|
||||||
|
|
||||||
- name: Type check
|
|
||||||
run: npm run check
|
|
||||||
|
|
||||||
- name: Run tests
|
|
||||||
run: npm test
|
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ jobs:
|
|||||||
backend: ${{ fromJSON(needs.setup.outputs.matrix) }}
|
backend: ${{ fromJSON(needs.setup.outputs.matrix) }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
|
||||||
|
|
||||||
- name: Free up disk space
|
- name: Free up disk space
|
||||||
run: |
|
run: |
|
||||||
@@ -94,11 +94,11 @@ jobs:
|
|||||||
# llama-swap-builder (which has ccache warm) to avoid exhausting disk.
|
# llama-swap-builder (which has ccache warm) to avoid exhausting disk.
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
if: ${{ !env.ACT }}
|
if: ${{ !env.ACT }}
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd #v4.0.0
|
||||||
|
|
||||||
- name: Log in to GitHub Container Registry
|
- name: Log in to GitHub Container Registry
|
||||||
if: ${{ !env.ACT }}
|
if: ${{ !env.ACT }}
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 #v4.1.0
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
@@ -121,7 +121,7 @@ jobs:
|
|||||||
docker/unified/build-image.sh --${{ matrix.backend }}
|
docker/unified/build-image.sh --${{ matrix.backend }}
|
||||||
|
|
||||||
- name: Push to GitHub Container Registry
|
- name: Push to GitHub Container Registry
|
||||||
if: ${{ !env.ACT && inputs.push_to_ghcr == true }}
|
if: ${{ !env.ACT && (github.event_name == 'schedule' || inputs.push_to_ghcr == true) }}
|
||||||
run: |
|
run: |
|
||||||
BASE_TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}"
|
BASE_TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}"
|
||||||
DATE_TAG=$(date -u +%Y-%m-%d)
|
DATE_TAG=$(date -u +%Y-%m-%d)
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
|
|||||||
- Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
|
- Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
|
||||||
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
|
||||||
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
- Use `make test-all` before completing work. This includes long running concurrency tests.
|
||||||
|
- Use `make test-ui` after making changes to the UI in ui-svelte/
|
||||||
|
|
||||||
### Commit message example format:
|
### Commit message example format:
|
||||||
|
|
||||||
|
|||||||
@@ -25,15 +25,15 @@ proxy/ui_dist/placeholder.txt:
|
|||||||
|
|
||||||
# use cached test results while developing
|
# use cached test results while developing
|
||||||
test-dev: proxy/ui_dist/placeholder.txt
|
test-dev: proxy/ui_dist/placeholder.txt
|
||||||
go test -short ./proxy/...
|
go test -short ./proxy/... ./internal/...
|
||||||
staticcheck ./proxy/... || true
|
staticcheck ./proxy/... ./internal/... || true
|
||||||
|
|
||||||
test: proxy/ui_dist/placeholder.txt
|
test: proxy/ui_dist/placeholder.txt
|
||||||
go test -short -count=1 ./proxy/...
|
go test -short -count=1 ./proxy/... ./internal/...
|
||||||
|
|
||||||
# for CI - full test (takes longer)
|
# for CI - full test (takes longer)
|
||||||
test-all: proxy/ui_dist/placeholder.txt
|
test-all: proxy/ui_dist/placeholder.txt
|
||||||
go test -race -count=1 ./proxy/...
|
go test -race -count=1 ./proxy/... ./internal/...
|
||||||
|
|
||||||
ui/node_modules:
|
ui/node_modules:
|
||||||
cd ui-svelte && npm install
|
cd ui-svelte && npm install
|
||||||
@@ -97,6 +97,9 @@ wol-proxy: $(BUILD_DIR)
|
|||||||
@echo "Building wol-proxy"
|
@echo "Building wol-proxy"
|
||||||
go build -o $(BUILD_DIR)/wol-proxy-$(GOOS)-$(GOARCH)-$(shell date +%Y-%m-%d) cmd/wol-proxy/wol-proxy.go
|
go build -o $(BUILD_DIR)/wol-proxy-$(GOOS)-$(GOARCH)-$(shell date +%Y-%m-%d) cmd/wol-proxy/wol-proxy.go
|
||||||
|
|
||||||
|
test-ui:
|
||||||
|
cd ui-svelte && npm ci && npm run check && npm test
|
||||||
|
|
||||||
# Phony targets
|
# Phony targets
|
||||||
.PHONY: all clean ui mac windows simple-responder simple-responder-windows test test-all test-dev wol-proxy
|
.PHONY: all clean ui mac windows simple-responder simple-responder-windows test test-all test-dev test-ui wol-proxy
|
||||||
.PHONE: linux linux-arm64 linux-amd64
|
.PHONE: linux linux-arm64 linux-amd64
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
- `v1/chat/completions`
|
- `v1/chat/completions`
|
||||||
- `v1/responses`
|
- `v1/responses`
|
||||||
- `v1/embeddings`
|
- `v1/embeddings`
|
||||||
|
- `v1/models` - list available models
|
||||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||||
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
||||||
- `v1/audio/voices`
|
- `v1/audio/voices`
|
||||||
@@ -39,16 +40,26 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
|||||||
- ✅ llama-swap API
|
- ✅ llama-swap API
|
||||||
- `/ui` - web UI
|
- `/ui` - web UI
|
||||||
- `/upstream/:model_id` - direct access to upstream server ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
- `/upstream/:model_id` - direct access to upstream server ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
||||||
- `/models/unload` - manually unload running models ([#58](https://github.com/mostlygeek/llama-swap/issues/58))
|
|
||||||
- `/running` - list currently running models ([#61](https://github.com/mostlygeek/llama-swap/issues/61))
|
- `/running` - list currently running models ([#61](https://github.com/mostlygeek/llama-swap/issues/61))
|
||||||
- `/log` - remote log monitoring
|
- `POST /api/models/unload` - manually unload all running models ([#58](https://github.com/mostlygeek/llama-swap/issues/58))
|
||||||
|
- `POST /api/models/unload/:model_id` - unload a specific model
|
||||||
|
- `/logs` - remote log monitoring
|
||||||
|
- `GET /logs` returns buffered plain text logs.
|
||||||
|
- If `Accept: text/html` is sent, `/logs` redirects to `/ui/`.
|
||||||
|
- `GET /logs/stream` keeps the connection open for live log streaming.
|
||||||
|
- Stream endpoints send buffered history first by default; add `?no-history` to stream only new lines.
|
||||||
|
- `GET /logs/stream/proxy` streams proxy logs only.
|
||||||
|
- `GET /logs/stream/upstream` streams upstream process logs only.
|
||||||
|
- `GET /logs/stream/{model_id}` streams logs for one model (including IDs with slashes, like `author/model`).
|
||||||
- `/health` - just returns "OK"
|
- `/health` - just returns "OK"
|
||||||
|
- `/metrics` - system and GPU metrics for prometheus
|
||||||
- ✅ API Key support - define keys to restrict access to API endpoints
|
- ✅ API Key support - define keys to restrict access to API endpoints
|
||||||
- ✅ Customizable
|
- ✅ Customizable
|
||||||
- Run concurrent models with a custom DSL swap matrix ([#643](https://github.com/mostlygeek/llama-swap/issues/643))
|
- Run concurrent models with a custom DSL swap matrix ([#643](https://github.com/mostlygeek/llama-swap/issues/643))
|
||||||
- Automatic unloading of models after timeout by setting a `ttl`
|
- Automatic unloading of models after timeout by setting a `ttl`
|
||||||
- Reliable Docker and Podman support using `cmd` and `cmdStop` together
|
- Docker and Podman support using `cmd` and `cmdStop` together
|
||||||
- Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235))
|
- Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235))
|
||||||
|
- Apply filters to requests to control inference with `stripParams`, `setParams` and `setParamsByID`
|
||||||
|
|
||||||
### Web UI
|
### Web UI
|
||||||
|
|
||||||
@@ -84,8 +95,24 @@ llama-swap can be installed in multiple ways
|
|||||||
|
|
||||||
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
|
||||||
|
|
||||||
Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
|
Two types of container images are built nightly for llama-swap:
|
||||||
The stable-diffusion.cpp server is also included for the musa and vulkan platforms.
|
|
||||||
|
1. A unified container with llama-server, ik-llama-server, stable-diffusion.cpp, whisper.cpp and llama-swap built from source. This is only available for cuda and vulkan but has more capabilities. This one is recommended for use.
|
||||||
|
2. A legacy image that is based on llama.cpp's images and llama-swap copied into the container. Use this one if you prefer to stay close to llama.cpp's container images.
|
||||||
|
|
||||||
|
#### Unified container (Recommended)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ docker pull ghcr.io/mostlygeek/llama-swap:unified-cuda
|
||||||
|
|
||||||
|
# run with a custom configuration and models directory
|
||||||
|
$ docker run -it --rm --runtime nvidia -p 9292:8080 \
|
||||||
|
-v /path/to/models:/models \
|
||||||
|
-v /path/to/custom/config.yaml:/etc/llama-swap/config/config.yaml \
|
||||||
|
ghcr.io/mostlygeek/llama-swap:unified-cuda
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Legacy container
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda
|
$ docker pull ghcr.io/mostlygeek/llama-swap:cuda
|
||||||
@@ -95,14 +122,6 @@ $ docker run -it --rm --runtime nvidia -p 9292:8080 \
|
|||||||
-v /path/to/models:/models \
|
-v /path/to/models:/models \
|
||||||
-v /path/to/custom/config.yaml:/app/config.yaml \
|
-v /path/to/custom/config.yaml:/app/config.yaml \
|
||||||
ghcr.io/mostlygeek/llama-swap:cuda
|
ghcr.io/mostlygeek/llama-swap:cuda
|
||||||
|
|
||||||
# configuration hot reload supported with a
|
|
||||||
# directory volume mount
|
|
||||||
$ docker run -it --rm --runtime nvidia -p 9292:8080 \
|
|
||||||
-v /path/to/models:/models \
|
|
||||||
-v /path/to/custom/config.yaml:/app/config.yaml \
|
|
||||||
-v /path/to/config:/config \
|
|
||||||
ghcr.io/mostlygeek/llama-swap:cuda -config /config/config.yaml -watch-config
|
|
||||||
```
|
```
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
@@ -258,6 +277,6 @@ For Python based inference servers like vllm or tabbyAPI it is recommended to ru
|
|||||||
## Star History
|
## Star History
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> ⭐️ Star this project to help others discover it!
|
> Thank you to everyone who has given this project a ⭐️!
|
||||||
|
|
||||||
[](https://www.star-history.com/#mostlygeek/llama-swap&Date)
|
[](https://www.star-history.com/#mostlygeek/llama-swap&Date)
|
||||||
|
|||||||
@@ -0,0 +1,92 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
func printSysStat(s perf.SysStat) {
|
||||||
|
cores := make([]string, len(s.CpuUtilPerCore))
|
||||||
|
for i, v := range s.CpuUtilPerCore {
|
||||||
|
cores[i] = fmt.Sprintf("%.1f%%", v)
|
||||||
|
}
|
||||||
|
fmt.Printf("[SYS %s]\n", s.Timestamp.Format("15:04:05"))
|
||||||
|
fmt.Printf(" CPU: %s\n", strings.Join(cores, " "))
|
||||||
|
fmt.Printf(" Mem: %d MB used / %d MB total (%d MB free)\n", s.MemUsedMB, s.MemTotalMB, s.MemFreeMB)
|
||||||
|
fmt.Printf(" Swap: %d MB used / %d MB total\n", s.SwapUsedMB, s.SwapTotalMB)
|
||||||
|
fmt.Printf(" Load: %.2f %.2f %.2f (1m 5m 15m)\n", s.LoadAvg1, s.LoadAvg5, s.LoadAvg15)
|
||||||
|
}
|
||||||
|
|
||||||
|
func printGpuStats(gpus []perf.GpuStat) {
|
||||||
|
for _, g := range gpus {
|
||||||
|
fmt.Printf("[GPU %d %s]\n", g.ID, g.Name)
|
||||||
|
fmt.Printf(" Util: GPU %.1f%% Mem %.1f%%\n", g.GpuUtilPct, g.MemUtilPct)
|
||||||
|
fmt.Printf(" Mem: %d MB used / %d MB total\n", g.MemUsedMB, g.MemTotalMB)
|
||||||
|
fmt.Printf(" Temp: %d°C Fan: %.1f%% Power: %.1f W\n", g.TempC, g.FanSpeedPct, g.PowerDrawW)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
stream := flag.Bool("stream", false, "stream stats")
|
||||||
|
interval := flag.Duration("t", time.Second, "polling interval (clamped to 1s–1h)")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
every := *interval
|
||||||
|
if every < time.Second {
|
||||||
|
every = time.Second
|
||||||
|
} else if every > time.Hour {
|
||||||
|
every = time.Hour
|
||||||
|
}
|
||||||
|
|
||||||
|
l := logmon.New()
|
||||||
|
l.SetLogLevel(logmon.LevelDebug)
|
||||||
|
|
||||||
|
s, err := perf.ReadSysStats()
|
||||||
|
if err != nil && err != perf.ErrNotImplemented {
|
||||||
|
fmt.Println("Sys Error:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
printSysStat(s)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
gpuCh, err := perf.GetGpuStats(ctx, every, l)
|
||||||
|
if err != nil && !errors.Is(err, perf.ErrNotImplemented) && !errors.Is(err, perf.ErrNoGpuTool) {
|
||||||
|
fmt.Println("GPU Init Error:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if gpuCh != nil {
|
||||||
|
select {
|
||||||
|
case g := <-gpuCh:
|
||||||
|
printGpuStats(g)
|
||||||
|
case <-ctx.Done():
|
||||||
|
fmt.Println("GPU: timed out waiting for stats")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if *stream {
|
||||||
|
m, _ := perf.New(config.PerformanceConfig{Every: every}, l)
|
||||||
|
m.Start()
|
||||||
|
defer m.Stop()
|
||||||
|
sysCh, gpuCh, unsub := m.Subscribe()
|
||||||
|
defer unsub()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case s := <-sysCh:
|
||||||
|
printSysStat(s)
|
||||||
|
case g := <-gpuCh:
|
||||||
|
printGpuStats(g)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
+20
-1
@@ -142,6 +142,25 @@
|
|||||||
"default": 5,
|
"default": 5,
|
||||||
"description": "Size in megabytes of the buffer for storing request/response captures. Set to 0 to disable captures."
|
"description": "Size in megabytes of the buffer for storing request/response captures. Set to 0 to disable captures."
|
||||||
},
|
},
|
||||||
|
"performance": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"disabled": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false,
|
||||||
|
"description": "Disable system performance monitoring."
|
||||||
|
},
|
||||||
|
"every": {
|
||||||
|
"type": "string",
|
||||||
|
"pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$",
|
||||||
|
"default": "15s",
|
||||||
|
"description": "Delay between polling for new performance statistics. Minimum duration is 1s. Lower values use more RAM as stats are kept in memory."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"default": {},
|
||||||
|
"description": "Configuration for CPU, RAM and GPU monitoring statistics."
|
||||||
|
},
|
||||||
"startPort": {
|
"startPort": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 5800,
|
"default": 5800,
|
||||||
@@ -517,4 +536,4 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
+16
-4
@@ -55,6 +55,18 @@ metricsMaxInMemory: 1000
|
|||||||
# - set to 0 to disable
|
# - set to 0 to disable
|
||||||
captureBuffer: 15
|
captureBuffer: 15
|
||||||
|
|
||||||
|
# performance: configuration for system monitoring statistics
|
||||||
|
# - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc.
|
||||||
|
performance:
|
||||||
|
# disabled: boolean
|
||||||
|
# - default: false
|
||||||
|
disabled: false
|
||||||
|
|
||||||
|
# every: delay between polling for new performance statistics
|
||||||
|
# - default: 5s
|
||||||
|
# - minimum duration 5s
|
||||||
|
every: 15s
|
||||||
|
|
||||||
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
||||||
# - optional, default: 5800
|
# - optional, default: 5800
|
||||||
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
||||||
@@ -96,8 +108,7 @@ globalTTL: 0
|
|||||||
macros:
|
macros:
|
||||||
# Example of a multi-line macro
|
# Example of a multi-line macro
|
||||||
"latest-llama": >
|
"latest-llama": >
|
||||||
/path/to/llama-server/llama-server-ec9e0301
|
/path/to/llama-server/llama-server-ec9e0301 --port ${PORT}
|
||||||
--port ${PORT}
|
|
||||||
|
|
||||||
"default_ctx": 4096
|
"default_ctx": 4096
|
||||||
|
|
||||||
@@ -257,7 +268,8 @@ models:
|
|||||||
|
|
||||||
# the ${temp} macro will remain a float
|
# the ${temp} macro will remain a float
|
||||||
temperature: ${temp}
|
temperature: ${temp}
|
||||||
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, context=${default_ctx}"
|
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp},
|
||||||
|
context=${default_ctx}"
|
||||||
|
|
||||||
a_list:
|
a_list:
|
||||||
- 1
|
- 1
|
||||||
@@ -269,7 +281,7 @@ models:
|
|||||||
b: 2
|
b: 2
|
||||||
# objects can contain complex types with macro substitution
|
# objects can contain complex types with macro substitution
|
||||||
# becomes: c: [0.7, false, "model: llama"]
|
# becomes: c: [0.7, false, "model: llama"]
|
||||||
c: ["${temp}", false, "model: ${MODEL_ID}"]
|
c: [ "${temp}", false, "model: ${MODEL_ID}" ]
|
||||||
|
|
||||||
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
|
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
|
||||||
# - optional, default: 0
|
# - optional, default: 0
|
||||||
|
|||||||
@@ -46,13 +46,31 @@ fi
|
|||||||
BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp}
|
BASE_IMAGE=${BASE_LLAMACPP_IMAGE:-ghcr.io/ggml-org/llama.cpp}
|
||||||
SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp}
|
SD_IMAGE=${BASE_SDCPP_IMAGE:-ghcr.io/leejet/stable-diffusion.cpp}
|
||||||
|
|
||||||
# Set llama-swap repository, automatically uses GITHUB_REPOSITORY variable
|
# LS_REPO is the destination of the built container image — defaults to the
|
||||||
# to enable easy container builds on forked repos
|
# current GitHub repository so forked CI builds publish to the fork's own
|
||||||
|
# ghcr.io namespace without code changes.
|
||||||
LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap}
|
LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap}
|
||||||
|
|
||||||
|
# LS_BINARY_REPO is where the llama-swap release tarball is downloaded
|
||||||
|
# from. Decoupled from LS_REPO so forks (which usually have no releases of
|
||||||
|
# their own) can still build a container by pulling the canonical binary
|
||||||
|
# from upstream. Override via the LS_BINARY_REPO env var when you maintain
|
||||||
|
# fork-side releases.
|
||||||
|
LS_BINARY_REPO=${LS_BINARY_REPO:-mostlygeek/llama-swap}
|
||||||
|
|
||||||
# the most recent llama-swap tag
|
# the most recent llama-swap tag
|
||||||
# have to strip out the 'v' due to .tar.gz file naming
|
# have to strip out the 'v' due to .tar.gz file naming.
|
||||||
LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//')
|
# Authenticated request — unauth'd github.com API is 60/hr per IP and GHA
|
||||||
|
# runners share IPs, so the call regularly returns rate-limit JSON and
|
||||||
|
# `.tag_name` then resolves to "null", producing a bogus `vnull` URL below.
|
||||||
|
LS_VER=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
||||||
|
"https://api.github.com/repos/${LS_BINARY_REPO}/releases/latest" \
|
||||||
|
| jq -r .tag_name | sed 's/v//')
|
||||||
|
|
||||||
|
if [[ -z "$LS_VER" || "$LS_VER" == "null" ]]; then
|
||||||
|
log_info "Error: could not resolve latest llama-swap release tag from ${LS_BINARY_REPO}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# Fetches the most recent llama.cpp tag matching the given prefix
|
# Fetches the most recent llama.cpp tag matching the given prefix
|
||||||
# Handles pagination to search beyond the first 100 results
|
# Handles pagination to search beyond the first 100 results
|
||||||
@@ -126,6 +144,25 @@ if [[ ! -z "$DEBUG_ABORT_BUILD" ]]; then
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# cpu is the only backend with a multi-arch upstream base
|
||||||
|
# (ghcr.io/ggml-org/llama.cpp:server-bXXXX ships amd64+arm64); GPU backends
|
||||||
|
# are amd64-only and stay on the original `docker build` path so the
|
||||||
|
# sd-server layer can still FROM the just-built image via the local
|
||||||
|
# dockerd image store (buildx's container driver has a separate store
|
||||||
|
# that doesn't share with dockerd, which breaks the sd build).
|
||||||
|
if [ "$ARCH" == "cpu" ]; then
|
||||||
|
if [ "$PUSH_IMAGES" == "true" ]; then
|
||||||
|
BUILDX_FLAGS="--push --platform linux/amd64,linux/arm64"
|
||||||
|
else
|
||||||
|
# Smoke build: validate both platforms but emit no output. buildx
|
||||||
|
# on the docker-container driver defaults to cacheonly when
|
||||||
|
# neither --push nor --load is given, so each arch fully builds
|
||||||
|
# and a regression in either fails CI — without materializing the
|
||||||
|
# image or needing to --load (which is multi-arch-incompatible).
|
||||||
|
BUILDX_FLAGS="--platform linux/amd64,linux/arm64"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
for CONTAINER_TYPE in non-root root; do
|
for CONTAINER_TYPE in non-root root; do
|
||||||
CONTAINER_TAG="ghcr.io/${LS_REPO}:v${LS_VER}-${ARCH}-${LCPP_TAG}"
|
CONTAINER_TAG="ghcr.io/${LS_REPO}:v${LS_VER}-${ARCH}-${LCPP_TAG}"
|
||||||
CONTAINER_LATEST="ghcr.io/${LS_REPO}:${ARCH}"
|
CONTAINER_LATEST="ghcr.io/${LS_REPO}:${ARCH}"
|
||||||
@@ -142,11 +179,23 @@ for CONTAINER_TYPE in non-root root; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER"
|
||||||
docker build --provenance=false -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
if [ "$ARCH" == "cpu" ]; then
|
||||||
--build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
docker buildx build $BUILDX_FLAGS --provenance=false \
|
||||||
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
-f llama-swap.Containerfile \
|
||||||
|
--build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||||
|
--build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \
|
||||||
|
--build-arg BASE_IMAGE=${BASE_IMAGE} \
|
||||||
|
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} .
|
||||||
|
else
|
||||||
|
docker build --provenance=false -f llama-swap.Containerfile \
|
||||||
|
--build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \
|
||||||
|
--build-arg LS_REPO=${LS_BINARY_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} \
|
||||||
|
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \
|
||||||
|
--build-arg BASE_IMAGE=${BASE_IMAGE} .
|
||||||
|
fi
|
||||||
|
|
||||||
# For architectures with stable-diffusion.cpp support, layer sd-server on top
|
# For architectures with stable-diffusion.cpp support, layer sd-server on top.
|
||||||
|
# Stays on `docker build` so the base resolves from local dockerd.
|
||||||
case "$ARCH" in
|
case "$ARCH" in
|
||||||
"musa" | "vulkan")
|
"musa" | "vulkan")
|
||||||
log_info "Adding sd-server to $CONTAINER_TAG"
|
log_info "Adding sd-server to $CONTAINER_TAG"
|
||||||
@@ -157,7 +206,8 @@ for CONTAINER_TYPE in non-root root; do
|
|||||||
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;;
|
-t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . ;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
if [ "$PUSH_IMAGES" == "true" ]; then
|
# cpu builds push inline via buildx --push; all other archs push here.
|
||||||
|
if [ "$ARCH" != "cpu" ] && [ "$PUSH_IMAGES" == "true" ]; then
|
||||||
docker push ${CONTAINER_TAG}
|
docker push ${CONTAINER_TAG}
|
||||||
docker push ${CONTAINER_LATEST}
|
docker push ${CONTAINER_LATEST}
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -3,6 +3,9 @@ ARG BASE_TAG=server-cuda
|
|||||||
FROM ${BASE_IMAGE}:${BASE_TAG}
|
FROM ${BASE_IMAGE}:${BASE_TAG}
|
||||||
|
|
||||||
# has to be after the FROM
|
# has to be after the FROM
|
||||||
|
# TARGETARCH is auto-set by `docker buildx build --platform …` (amd64/arm64);
|
||||||
|
# falls back to amd64 when an older `docker build` runs without buildx.
|
||||||
|
ARG TARGETARCH=amd64
|
||||||
ARG LS_VER=170
|
ARG LS_VER=170
|
||||||
ARG LS_REPO=mostlygeek/llama-swap
|
ARG LS_REPO=mostlygeek/llama-swap
|
||||||
|
|
||||||
@@ -34,9 +37,9 @@ WORKDIR /app
|
|||||||
ENV PATH="/app:${PATH}"
|
ENV PATH="/app:${PATH}"
|
||||||
|
|
||||||
RUN \
|
RUN \
|
||||||
curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
curl -LO "https://github.com/${LS_REPO}/releases/download/v${LS_VER}/llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \
|
||||||
tar -zxf "llama-swap_${LS_VER}_linux_amd64.tar.gz" && \
|
tar -zxf "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz" && \
|
||||||
rm "llama-swap_${LS_VER}_linux_amd64.tar.gz"
|
rm "llama-swap_${LS_VER}_linux_${TARGETARCH}.tar.gz"
|
||||||
|
|
||||||
COPY --chown=$UID:$GID config.example.yaml /app/config.yaml
|
COPY --chown=$UID:$GID config.example.yaml /app/config.yaml
|
||||||
|
|
||||||
|
|||||||
@@ -149,7 +149,7 @@ ARG IK_LLAMA_COMMIT_HASH=unknown
|
|||||||
ARG RUN_UID=0
|
ARG RUN_UID=0
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
python3-numpy python3-sentencepiece \
|
python3-numpy python3-sentencepiece python3-pip \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Create non-root user when RUN_UID != 0
|
# Create non-root user when RUN_UID != 0
|
||||||
@@ -180,6 +180,9 @@ COPY --from=llama-build /install/bin/llama-cli /usr/local/bin/
|
|||||||
# Copy ik-llama-server (CUDA only; empty copy for vulkan)
|
# Copy ik-llama-server (CUDA only; empty copy for vulkan)
|
||||||
COPY --from=ik-llama-build /install/bin/ /usr/local/bin/
|
COPY --from=ik-llama-build /install/bin/ /usr/local/bin/
|
||||||
|
|
||||||
|
# Install uv
|
||||||
|
RUN pip install uv --break-system-packages
|
||||||
|
|
||||||
# Copy llama-swap binary
|
# Copy llama-swap binary
|
||||||
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
|
COPY --from=llama-swap-download /install/bin/llama-swap /usr/local/bin/
|
||||||
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
|
COPY --from=llama-swap-download /install/llama-swap-version /tmp/
|
||||||
|
|||||||
@@ -38,8 +38,16 @@ if [ "$VERSION" = "latest" ]; then
|
|||||||
echo "Latest version: ${VERSION}"
|
echo "Latest version: ${VERSION}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
ARCH=$(uname -m)
|
||||||
|
case "$ARCH" in
|
||||||
|
x86_64) ARCH="amd64" ;;
|
||||||
|
aarch64|arm64) ARCH="arm64" ;;
|
||||||
|
*) echo "FATAL: Unsupported architecture: $ARCH" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
|
||||||
# Download and extract
|
# Download and extract
|
||||||
URL="https://github.com/${REPO}/releases/download/v${VERSION}/llama-swap_${VERSION}_linux_amd64.tar.gz"
|
URL="https://github.com/${REPO}/releases/download/v${VERSION}/llama-swap_${VERSION}_linux_${ARCH}.tar.gz"
|
||||||
echo "=== Downloading llama-swap v${VERSION} ==="
|
echo "=== Downloading llama-swap v${VERSION} ==="
|
||||||
echo "URL: $URL"
|
echo "URL: $URL"
|
||||||
curl -fSL -o /tmp/llama-swap.tar.gz "$URL"
|
curl -fSL -o /tmp/llama-swap.tar.gz "$URL"
|
||||||
@@ -56,4 +64,4 @@ fi
|
|||||||
echo "$VERSION" > /install/llama-swap-version
|
echo "$VERSION" > /install/llama-swap-version
|
||||||
|
|
||||||
echo "=== llama-swap v${VERSION} installed ==="
|
echo "=== llama-swap v${VERSION} installed ==="
|
||||||
ls -la /install/bin/llama-swap
|
ls -la /install/bin/llama-swap
|
||||||
+15
-3
@@ -146,6 +146,18 @@ metricsMaxInMemory: 1000
|
|||||||
# - set to 0 to disable
|
# - set to 0 to disable
|
||||||
captureBuffer: 15
|
captureBuffer: 15
|
||||||
|
|
||||||
|
# performance: configuration for system monitoring statistics
|
||||||
|
# - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc.
|
||||||
|
performance:
|
||||||
|
# disabled: boolean
|
||||||
|
# - default: false
|
||||||
|
enable: true
|
||||||
|
|
||||||
|
# every: delay between polling for new performance statistics
|
||||||
|
# - default: 5s
|
||||||
|
# - minimum duration 5s
|
||||||
|
every: 5s
|
||||||
|
|
||||||
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
# startPort: sets the starting port number for the automatic ${PORT} macro.
|
||||||
# - optional, default: 5800
|
# - optional, default: 5800
|
||||||
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
# - the ${PORT} macro can be used in model.cmd and model.proxy settings
|
||||||
@@ -187,8 +199,7 @@ globalTTL: 0
|
|||||||
macros:
|
macros:
|
||||||
# Example of a multi-line macro
|
# Example of a multi-line macro
|
||||||
"latest-llama": >
|
"latest-llama": >
|
||||||
/path/to/llama-server/llama-server-ec9e0301
|
/path/to/llama-server/llama-server-ec9e0301 --port ${PORT}
|
||||||
--port ${PORT}
|
|
||||||
|
|
||||||
"default_ctx": 4096
|
"default_ctx": 4096
|
||||||
|
|
||||||
@@ -348,7 +359,8 @@ models:
|
|||||||
|
|
||||||
# the ${temp} macro will remain a float
|
# the ${temp} macro will remain a float
|
||||||
temperature: ${temp}
|
temperature: ${temp}
|
||||||
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, context=${default_ctx}"
|
note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp},
|
||||||
|
context=${default_ctx}"
|
||||||
|
|
||||||
a_list:
|
a_list:
|
||||||
- 1
|
- 1
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
+54
-54
@@ -1,54 +1,54 @@
|
|||||||
// Copyright (c) Roman Atachiants and contributore. All rights reserved.
|
// Copyright (c) Roman Atachiants and contributore. All rights reserved.
|
||||||
// Licensed under the MIT license. See LICENSE file in the project root for detaile.
|
// Licensed under the MIT license. See LICENSE file in the project root for detaile.
|
||||||
|
|
||||||
package event
|
package event
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
cpu: 13th Gen Intel(R) Core(TM) i7-13700K
|
cpu: 13th Gen Intel(R) Core(TM) i7-13700K
|
||||||
BenchmarkSubcribeConcurrent-24 1826686 606.3 ns/op 1648 B/op 5 allocs/op
|
BenchmarkSubcribeConcurrent-24 1826686 606.3 ns/op 1648 B/op 5 allocs/op
|
||||||
*/
|
*/
|
||||||
func BenchmarkSubscribeConcurrent(b *testing.B) {
|
func BenchmarkSubscribeConcurrent(b *testing.B) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
b.ReportAllocs()
|
b.ReportAllocs()
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
|
|
||||||
b.RunParallel(func(pb *testing.PB) {
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
for pb.Next() {
|
for pb.Next() {
|
||||||
unsub := Subscribe(d, func(ev MyEvent1) {})
|
unsub := Subscribe(d, func(ev MyEvent1) {})
|
||||||
unsub()
|
unsub()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDefaultPublish(t *testing.T) {
|
func TestDefaultPublish(t *testing.T) {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
// Subscribe
|
// Subscribe
|
||||||
var count int64
|
var count int64
|
||||||
defer On(func(ev MyEvent1) {
|
defer On(func(ev MyEvent1) {
|
||||||
atomic.AddInt64(&count, 1)
|
atomic.AddInt64(&count, 1)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
})()
|
})()
|
||||||
|
|
||||||
defer OnType(TypeEvent1, func(ev MyEvent1) {
|
defer OnType(TypeEvent1, func(ev MyEvent1) {
|
||||||
atomic.AddInt64(&count, 1)
|
atomic.AddInt64(&count, 1)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
})()
|
})()
|
||||||
|
|
||||||
// Publish
|
// Publish
|
||||||
wg.Add(4)
|
wg.Add(4)
|
||||||
Emit(MyEvent1{})
|
Emit(MyEvent1{})
|
||||||
Emit(MyEvent1{})
|
Emit(MyEvent1{})
|
||||||
|
|
||||||
// Wait and check
|
// Wait and check
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
assert.Equal(t, int64(4), count)
|
assert.Equal(t, int64(4), count)
|
||||||
}
|
}
|
||||||
|
|||||||
+324
-324
@@ -1,324 +1,324 @@
|
|||||||
// Copyright (c) Roman Atachiants and contributore. All rights reserved.
|
// Copyright (c) Roman Atachiants and contributore. All rights reserved.
|
||||||
// Licensed under the MIT license. See LICENSE file in the project root for detaile.
|
// Licensed under the MIT license. See LICENSE file in the project root for detaile.
|
||||||
|
|
||||||
package event
|
package event
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestPublish(t *testing.T) {
|
func TestPublish(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
// Subscribe, must be received in order
|
// Subscribe, must be received in order
|
||||||
var count int64
|
var count int64
|
||||||
defer Subscribe(d, func(ev MyEvent1) {
|
defer Subscribe(d, func(ev MyEvent1) {
|
||||||
assert.Equal(t, int(atomic.AddInt64(&count, 1)), ev.Number)
|
assert.Equal(t, int(atomic.AddInt64(&count, 1)), ev.Number)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
})()
|
})()
|
||||||
|
|
||||||
// Publish
|
// Publish
|
||||||
wg.Add(3)
|
wg.Add(3)
|
||||||
Publish(d, MyEvent1{Number: 1})
|
Publish(d, MyEvent1{Number: 1})
|
||||||
Publish(d, MyEvent1{Number: 2})
|
Publish(d, MyEvent1{Number: 2})
|
||||||
Publish(d, MyEvent1{Number: 3})
|
Publish(d, MyEvent1{Number: 3})
|
||||||
|
|
||||||
// Wait and check
|
// Wait and check
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
assert.Equal(t, int64(3), count)
|
assert.Equal(t, int64(3), count)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUnsubscribe(t *testing.T) {
|
func TestUnsubscribe(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
assert.Equal(t, 0, d.count(TypeEvent1))
|
assert.Equal(t, 0, d.count(TypeEvent1))
|
||||||
unsubscribe := Subscribe(d, func(ev MyEvent1) {
|
unsubscribe := Subscribe(d, func(ev MyEvent1) {
|
||||||
// Nothing
|
// Nothing
|
||||||
})
|
})
|
||||||
|
|
||||||
assert.Equal(t, 1, d.count(TypeEvent1))
|
assert.Equal(t, 1, d.count(TypeEvent1))
|
||||||
unsubscribe()
|
unsubscribe()
|
||||||
assert.Equal(t, 0, d.count(TypeEvent1))
|
assert.Equal(t, 0, d.count(TypeEvent1))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConcurrent(t *testing.T) {
|
func TestConcurrent(t *testing.T) {
|
||||||
const max = 1000000
|
const max = 1000000
|
||||||
var count int64
|
var count int64
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
defer Subscribe(d, func(ev MyEvent1) {
|
defer Subscribe(d, func(ev MyEvent1) {
|
||||||
if current := atomic.AddInt64(&count, 1); current == max {
|
if current := atomic.AddInt64(&count, 1); current == max {
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}
|
}
|
||||||
})()
|
})()
|
||||||
|
|
||||||
// Asynchronously publish
|
// Asynchronously publish
|
||||||
go func() {
|
go func() {
|
||||||
for i := 0; i < max; i++ {
|
for i := 0; i < max; i++ {
|
||||||
Publish(d, MyEvent1{})
|
Publish(d, MyEvent1{})
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
defer Subscribe(d, func(ev MyEvent1) {
|
defer Subscribe(d, func(ev MyEvent1) {
|
||||||
// Subscriber that does nothing
|
// Subscriber that does nothing
|
||||||
})()
|
})()
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
assert.Equal(t, max, int(count))
|
assert.Equal(t, max, int(count))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSubscribeDifferentType(t *testing.T) {
|
func TestSubscribeDifferentType(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
assert.Panics(t, func() {
|
assert.Panics(t, func() {
|
||||||
SubscribeTo(d, TypeEvent1, func(ev MyEvent1) {})
|
SubscribeTo(d, TypeEvent1, func(ev MyEvent1) {})
|
||||||
SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})
|
SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPublishDifferentType(t *testing.T) {
|
func TestPublishDifferentType(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
assert.Panics(t, func() {
|
assert.Panics(t, func() {
|
||||||
SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})
|
SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})
|
||||||
Publish(d, MyEvent1{})
|
Publish(d, MyEvent1{})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCloseDispatcher(t *testing.T) {
|
func TestCloseDispatcher(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
defer SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})()
|
defer SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})()
|
||||||
|
|
||||||
assert.NoError(t, d.Close())
|
assert.NoError(t, d.Close())
|
||||||
assert.Panics(t, func() {
|
assert.Panics(t, func() {
|
||||||
SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})
|
SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMatrix(t *testing.T) {
|
func TestMatrix(t *testing.T) {
|
||||||
const amount = 1000
|
const amount = 1000
|
||||||
for _, subs := range []int{1, 10, 100} {
|
for _, subs := range []int{1, 10, 100} {
|
||||||
for _, topics := range []int{1, 10} {
|
for _, topics := range []int{1, 10} {
|
||||||
expected := subs * topics * amount
|
expected := subs * topics * amount
|
||||||
t.Run(fmt.Sprintf("%dx%d", topics, subs), func(t *testing.T) {
|
t.Run(fmt.Sprintf("%dx%d", topics, subs), func(t *testing.T) {
|
||||||
var count atomic.Int64
|
var count atomic.Int64
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(expected)
|
wg.Add(expected)
|
||||||
|
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
for i := 0; i < subs; i++ {
|
for i := 0; i < subs; i++ {
|
||||||
for id := 0; id < topics; id++ {
|
for id := 0; id < topics; id++ {
|
||||||
defer SubscribeTo(d, uint32(id), func(ev MyEvent3) {
|
defer SubscribeTo(d, uint32(id), func(ev MyEvent3) {
|
||||||
count.Add(1)
|
count.Add(1)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
})()
|
})()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for n := 0; n < amount; n++ {
|
for n := 0; n < amount; n++ {
|
||||||
for id := 0; id < topics; id++ {
|
for id := 0; id < topics; id++ {
|
||||||
go Publish(d, MyEvent3{ID: id})
|
go Publish(d, MyEvent3{ID: id})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
assert.Equal(t, expected, int(count.Load()))
|
assert.Equal(t, expected, int(count.Load()))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConcurrentSubscriptionRace(t *testing.T) {
|
func TestConcurrentSubscriptionRace(t *testing.T) {
|
||||||
// This test specifically targets the race condition that occurs when multiple
|
// This test specifically targets the race condition that occurs when multiple
|
||||||
// goroutines try to subscribe to different event types simultaneously.
|
// goroutines try to subscribe to different event types simultaneously.
|
||||||
// Without the CAS loop, subscriptions could be lost due to registry corruption.
|
// Without the CAS loop, subscriptions could be lost due to registry corruption.
|
||||||
|
|
||||||
const numGoroutines = 100
|
const numGoroutines = 100
|
||||||
const numEventTypes = 50
|
const numEventTypes = 50
|
||||||
|
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
defer d.Close()
|
defer d.Close()
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var receivedCount int64
|
var receivedCount int64
|
||||||
var subscribedTypes sync.Map // Thread-safe map
|
var subscribedTypes sync.Map // Thread-safe map
|
||||||
|
|
||||||
wg.Add(numGoroutines)
|
wg.Add(numGoroutines)
|
||||||
|
|
||||||
// Start multiple goroutines that subscribe to different event types concurrently
|
// Start multiple goroutines that subscribe to different event types concurrently
|
||||||
for i := 0; i < numGoroutines; i++ {
|
for i := 0; i < numGoroutines; i++ {
|
||||||
go func(goroutineID int) {
|
go func(goroutineID int) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
// Each goroutine subscribes to a unique event type
|
// Each goroutine subscribes to a unique event type
|
||||||
eventType := uint32(goroutineID%numEventTypes + 1000) // Offset to avoid collision with other tests
|
eventType := uint32(goroutineID%numEventTypes + 1000) // Offset to avoid collision with other tests
|
||||||
|
|
||||||
// Subscribe to the event type
|
// Subscribe to the event type
|
||||||
SubscribeTo(d, eventType, func(ev MyEvent3) {
|
SubscribeTo(d, eventType, func(ev MyEvent3) {
|
||||||
atomic.AddInt64(&receivedCount, 1)
|
atomic.AddInt64(&receivedCount, 1)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Record that this type was subscribed
|
// Record that this type was subscribed
|
||||||
subscribedTypes.Store(eventType, true)
|
subscribedTypes.Store(eventType, true)
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all subscriptions to complete
|
// Wait for all subscriptions to complete
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
// Count the number of unique event types subscribed
|
// Count the number of unique event types subscribed
|
||||||
expectedTypes := 0
|
expectedTypes := 0
|
||||||
subscribedTypes.Range(func(key, value interface{}) bool {
|
subscribedTypes.Range(func(key, value interface{}) bool {
|
||||||
expectedTypes++
|
expectedTypes++
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
|
|
||||||
// Small delay to ensure all subscriptions are fully processed
|
// Small delay to ensure all subscriptions are fully processed
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
|
||||||
// Publish events to each subscribed type
|
// Publish events to each subscribed type
|
||||||
subscribedTypes.Range(func(key, value interface{}) bool {
|
subscribedTypes.Range(func(key, value interface{}) bool {
|
||||||
eventType := key.(uint32)
|
eventType := key.(uint32)
|
||||||
Publish(d, MyEvent3{ID: int(eventType)})
|
Publish(d, MyEvent3{ID: int(eventType)})
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
|
|
||||||
// Wait for all events to be processed
|
// Wait for all events to be processed
|
||||||
time.Sleep(50 * time.Millisecond)
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
// Verify that we received at least the expected number of events
|
// Verify that we received at least the expected number of events
|
||||||
// (there might be more if multiple goroutines subscribed to the same event type)
|
// (there might be more if multiple goroutines subscribed to the same event type)
|
||||||
received := atomic.LoadInt64(&receivedCount)
|
received := atomic.LoadInt64(&receivedCount)
|
||||||
assert.GreaterOrEqual(t, int(received), expectedTypes,
|
assert.GreaterOrEqual(t, int(received), expectedTypes,
|
||||||
"Should have received at least %d events, got %d", expectedTypes, received)
|
"Should have received at least %d events, got %d", expectedTypes, received)
|
||||||
|
|
||||||
// Verify that we have the expected number of unique event types
|
// Verify that we have the expected number of unique event types
|
||||||
assert.Equal(t, numEventTypes, expectedTypes,
|
assert.Equal(t, numEventTypes, expectedTypes,
|
||||||
"Should have exactly %d unique event types", numEventTypes)
|
"Should have exactly %d unique event types", numEventTypes)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConcurrentHandlerRegistration(t *testing.T) {
|
func TestConcurrentHandlerRegistration(t *testing.T) {
|
||||||
const numGoroutines = 100
|
const numGoroutines = 100
|
||||||
|
|
||||||
// Test concurrent subscriptions to the same event type
|
// Test concurrent subscriptions to the same event type
|
||||||
t.Run("SameEventType", func(t *testing.T) {
|
t.Run("SameEventType", func(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
var handlerCount int64
|
var handlerCount int64
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
// Start multiple goroutines subscribing to the same event type (0x1)
|
// Start multiple goroutines subscribing to the same event type (0x1)
|
||||||
for i := 0; i < numGoroutines; i++ {
|
for i := 0; i < numGoroutines; i++ {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
SubscribeTo(d, uint32(0x1), func(ev MyEvent1) {
|
SubscribeTo(d, uint32(0x1), func(ev MyEvent1) {
|
||||||
atomic.AddInt64(&handlerCount, 1)
|
atomic.AddInt64(&handlerCount, 1)
|
||||||
})
|
})
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
// Verify all handlers were registered by publishing an event
|
// Verify all handlers were registered by publishing an event
|
||||||
atomic.StoreInt64(&handlerCount, 0)
|
atomic.StoreInt64(&handlerCount, 0)
|
||||||
Publish(d, MyEvent1{})
|
Publish(d, MyEvent1{})
|
||||||
|
|
||||||
// Small delay to ensure all handlers have executed
|
// Small delay to ensure all handlers have executed
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
|
||||||
assert.Equal(t, int64(numGoroutines), atomic.LoadInt64(&handlerCount),
|
assert.Equal(t, int64(numGoroutines), atomic.LoadInt64(&handlerCount),
|
||||||
"Not all handlers were registered due to race condition")
|
"Not all handlers were registered due to race condition")
|
||||||
})
|
})
|
||||||
|
|
||||||
// Test concurrent subscriptions to different event types
|
// Test concurrent subscriptions to different event types
|
||||||
t.Run("DifferentEventTypes", func(t *testing.T) {
|
t.Run("DifferentEventTypes", func(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
receivedEvents := make(map[uint32]*int64)
|
receivedEvents := make(map[uint32]*int64)
|
||||||
|
|
||||||
// Create multiple event types and subscribe concurrently
|
// Create multiple event types and subscribe concurrently
|
||||||
for i := 0; i < numGoroutines; i++ {
|
for i := 0; i < numGoroutines; i++ {
|
||||||
eventType := uint32(100 + i)
|
eventType := uint32(100 + i)
|
||||||
counter := new(int64)
|
counter := new(int64)
|
||||||
receivedEvents[eventType] = counter
|
receivedEvents[eventType] = counter
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(et uint32, cnt *int64) {
|
go func(et uint32, cnt *int64) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
SubscribeTo(d, et, func(ev MyEvent3) {
|
SubscribeTo(d, et, func(ev MyEvent3) {
|
||||||
atomic.AddInt64(cnt, 1)
|
atomic.AddInt64(cnt, 1)
|
||||||
})
|
})
|
||||||
}(eventType, counter)
|
}(eventType, counter)
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
// Publish events to all types
|
// Publish events to all types
|
||||||
for eventType := uint32(100); eventType < uint32(100+numGoroutines); eventType++ {
|
for eventType := uint32(100); eventType < uint32(100+numGoroutines); eventType++ {
|
||||||
Publish(d, MyEvent3{ID: int(eventType)})
|
Publish(d, MyEvent3{ID: int(eventType)})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Small delay to ensure all handlers have executed
|
// Small delay to ensure all handlers have executed
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
|
||||||
// Verify all event types received their events
|
// Verify all event types received their events
|
||||||
for eventType, counter := range receivedEvents {
|
for eventType, counter := range receivedEvents {
|
||||||
assert.Equal(t, int64(1), atomic.LoadInt64(counter),
|
assert.Equal(t, int64(1), atomic.LoadInt64(counter),
|
||||||
"Event type %d did not receive its event", eventType)
|
"Event type %d did not receive its event", eventType)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBackpressure(t *testing.T) {
|
func TestBackpressure(t *testing.T) {
|
||||||
d := NewDispatcher()
|
d := NewDispatcher()
|
||||||
d.maxQueue = 10
|
d.maxQueue = 10
|
||||||
|
|
||||||
var processedCount int64
|
var processedCount int64
|
||||||
unsub := SubscribeTo(d, uint32(0x200), func(ev MyEvent3) {
|
unsub := SubscribeTo(d, uint32(0x200), func(ev MyEvent3) {
|
||||||
atomic.AddInt64(&processedCount, 1)
|
atomic.AddInt64(&processedCount, 1)
|
||||||
})
|
})
|
||||||
defer unsub()
|
defer unsub()
|
||||||
|
|
||||||
const eventsToPublish = 1000
|
const eventsToPublish = 1000
|
||||||
for i := 0; i < eventsToPublish; i++ {
|
for i := 0; i < eventsToPublish; i++ {
|
||||||
Publish(d, MyEvent3{ID: 0x200})
|
Publish(d, MyEvent3{ID: 0x200})
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
// Verify all events were eventually processed
|
// Verify all events were eventually processed
|
||||||
finalProcessed := atomic.LoadInt64(&processedCount)
|
finalProcessed := atomic.LoadInt64(&processedCount)
|
||||||
assert.Equal(t, int64(eventsToPublish), finalProcessed)
|
assert.Equal(t, int64(eventsToPublish), finalProcessed)
|
||||||
t.Logf("Events processed: %d/%d", finalProcessed, eventsToPublish)
|
t.Logf("Events processed: %d/%d", finalProcessed, eventsToPublish)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------- Test Events -------------------------------------
|
// ------------------------------------- Test Events -------------------------------------
|
||||||
|
|
||||||
const (
|
const (
|
||||||
TypeEvent1 = 0x1
|
TypeEvent1 = 0x1
|
||||||
TypeEvent2 = 0x2
|
TypeEvent2 = 0x2
|
||||||
)
|
)
|
||||||
|
|
||||||
type MyEvent1 struct {
|
type MyEvent1 struct {
|
||||||
Number int
|
Number int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t MyEvent1) Type() uint32 { return TypeEvent1 }
|
func (t MyEvent1) Type() uint32 { return TypeEvent1 }
|
||||||
|
|
||||||
type MyEvent2 struct {
|
type MyEvent2 struct {
|
||||||
Text string
|
Text string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t MyEvent2) Type() uint32 { return TypeEvent2 }
|
func (t MyEvent2) Type() uint32 { return TypeEvent2 }
|
||||||
|
|
||||||
type MyEvent3 struct {
|
type MyEvent3 struct {
|
||||||
ID int
|
ID int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t MyEvent3) Type() uint32 { return uint32(t.ID) }
|
func (t MyEvent3) Type() uint32 { return uint32(t.ID) }
|
||||||
|
|||||||
@@ -4,10 +4,11 @@ go 1.26.1
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/billziss-gh/golib v0.2.0
|
github.com/billziss-gh/golib v0.2.0
|
||||||
github.com/fsnotify/fsnotify v1.9.0
|
github.com/fxamacker/cbor/v2 v2.9.1
|
||||||
github.com/gin-gonic/gin v1.10.0
|
github.com/gin-gonic/gin v1.10.0
|
||||||
github.com/klauspost/compress v1.18.5
|
github.com/klauspost/compress v1.18.5
|
||||||
github.com/stretchr/testify v1.9.0
|
github.com/shirou/gopsutil/v4 v4.26.4
|
||||||
|
github.com/stretchr/testify v1.11.1
|
||||||
github.com/tidwall/gjson v1.18.0
|
github.com/tidwall/gjson v1.18.0
|
||||||
github.com/tidwall/sjson v1.2.5
|
github.com/tidwall/sjson v1.2.5
|
||||||
gopkg.in/yaml.v3 v3.0.1
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
@@ -19,8 +20,10 @@ require (
|
|||||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
|
github.com/ebitengine/purego v0.10.0 // indirect
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
|
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
|
||||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||||
|
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||||
github.com/go-playground/locales v0.14.1 // indirect
|
github.com/go-playground/locales v0.14.1 // indirect
|
||||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||||
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
||||||
@@ -28,19 +31,25 @@ require (
|
|||||||
github.com/json-iterator/go v1.1.12 // indirect
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
||||||
github.com/leodido/go-urn v1.4.0 // indirect
|
github.com/leodido/go-urn v1.4.0 // indirect
|
||||||
|
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
|
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
|
||||||
github.com/tidwall/match v1.1.1 // indirect
|
github.com/tidwall/match v1.1.1 // indirect
|
||||||
github.com/tidwall/pretty v1.2.1 // indirect
|
github.com/tidwall/pretty v1.2.1 // indirect
|
||||||
|
github.com/tklauser/go-sysconf v0.3.16 // indirect
|
||||||
|
github.com/tklauser/numcpus v0.11.0 // indirect
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||||
|
github.com/x448/float16 v0.8.4 // indirect
|
||||||
|
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||||
golang.org/x/arch v0.8.0 // indirect
|
golang.org/x/arch v0.8.0 // indirect
|
||||||
golang.org/x/crypto v0.45.0 // indirect
|
golang.org/x/crypto v0.45.0 // indirect
|
||||||
golang.org/x/net v0.47.0 // indirect
|
golang.org/x/net v0.47.0 // indirect
|
||||||
golang.org/x/sys v0.38.0 // indirect
|
golang.org/x/sys v0.41.0 // indirect
|
||||||
golang.org/x/text v0.31.0 // indirect
|
golang.org/x/text v0.31.0 // indirect
|
||||||
google.golang.org/protobuf v1.34.1 // indirect
|
google.golang.org/protobuf v1.34.1 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -11,14 +11,18 @@ github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQ
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU=
|
||||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
|
||||||
|
github.com/fxamacker/cbor/v2 v2.9.1 h1:2rWm8B193Ll4VdjsJY28jxs70IdDsHRWgQYAI80+rMQ=
|
||||||
|
github.com/fxamacker/cbor/v2 v2.9.1/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||||
|
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||||
|
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||||
@@ -29,8 +33,9 @@ github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBEx
|
|||||||
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
@@ -42,6 +47,8 @@ github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY
|
|||||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||||
|
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
|
||||||
|
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
|
||||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
@@ -53,6 +60,10 @@ github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6
|
|||||||
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
|
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
|
||||||
|
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
|
||||||
|
github.com/shirou/gopsutil/v4 v4.26.4 h1:B4SXVbcwTyrocPHEmWBC4uCYr4Xcu3MK1TXqbprAOWY=
|
||||||
|
github.com/shirou/gopsutil/v4 v4.26.4/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
@@ -63,8 +74,9 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
|||||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
|
||||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||||
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
|
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
|
||||||
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||||
@@ -75,10 +87,18 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
|
|||||||
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||||
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
|
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
|
||||||
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
|
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
|
||||||
|
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
|
||||||
|
github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI=
|
||||||
|
github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw=
|
||||||
|
github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||||
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||||
|
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
|
||||||
|
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
|
||||||
|
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
|
||||||
|
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||||
@@ -86,13 +106,14 @@ golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
|
|||||||
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
|
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
|
||||||
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
|
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
|
||||||
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
|
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
|
||||||
|
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
|
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
|
||||||
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||||
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
||||||
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
|
||||||
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package proxy
|
package logmon
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@@ -11,12 +11,22 @@ import (
|
|||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const DataEventID = 0x04
|
||||||
|
|
||||||
|
type DataEvent struct {
|
||||||
|
Data []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e DataEvent) Type() uint32 {
|
||||||
|
return DataEventID
|
||||||
|
}
|
||||||
|
|
||||||
// circularBuffer is a fixed-size circular byte buffer that overwrites
|
// circularBuffer is a fixed-size circular byte buffer that overwrites
|
||||||
// oldest data when full. It provides O(1) writes and O(n) reads.
|
// oldest data when full. It provides O(1) writes and O(n) reads.
|
||||||
type circularBuffer struct {
|
type circularBuffer struct {
|
||||||
data []byte // pre-allocated capacity
|
data []byte
|
||||||
head int // next write position
|
head int
|
||||||
size int // current number of bytes stored (0 to cap)
|
size int
|
||||||
}
|
}
|
||||||
|
|
||||||
func newCircularBuffer(capacity int) *circularBuffer {
|
func newCircularBuffer(capacity int) *circularBuffer {
|
||||||
@@ -27,8 +37,6 @@ func newCircularBuffer(capacity int) *circularBuffer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write appends bytes to the buffer, overwriting oldest data when full.
|
|
||||||
// Data is copied into the internal buffer (not stored by reference).
|
|
||||||
func (cb *circularBuffer) Write(p []byte) {
|
func (cb *circularBuffer) Write(p []byte) {
|
||||||
if len(p) == 0 {
|
if len(p) == 0 {
|
||||||
return
|
return
|
||||||
@@ -36,7 +44,6 @@ func (cb *circularBuffer) Write(p []byte) {
|
|||||||
|
|
||||||
cap := len(cb.data)
|
cap := len(cb.data)
|
||||||
|
|
||||||
// If input is larger than capacity, only keep the last cap bytes
|
|
||||||
if len(p) >= cap {
|
if len(p) >= cap {
|
||||||
copy(cb.data, p[len(p)-cap:])
|
copy(cb.data, p[len(p)-cap:])
|
||||||
cb.head = 0
|
cb.head = 0
|
||||||
@@ -44,28 +51,22 @@ func (cb *circularBuffer) Write(p []byte) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate how much space is available from head to end of buffer
|
|
||||||
firstPart := cap - cb.head
|
firstPart := cap - cb.head
|
||||||
if firstPart >= len(p) {
|
if firstPart >= len(p) {
|
||||||
// All data fits without wrapping
|
|
||||||
copy(cb.data[cb.head:], p)
|
copy(cb.data[cb.head:], p)
|
||||||
cb.head = (cb.head + len(p)) % cap
|
cb.head = (cb.head + len(p)) % cap
|
||||||
} else {
|
} else {
|
||||||
// Data wraps around
|
|
||||||
copy(cb.data[cb.head:], p[:firstPart])
|
copy(cb.data[cb.head:], p[:firstPart])
|
||||||
copy(cb.data[:len(p)-firstPart], p[firstPart:])
|
copy(cb.data[:len(p)-firstPart], p[firstPart:])
|
||||||
cb.head = len(p) - firstPart
|
cb.head = len(p) - firstPart
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update size
|
|
||||||
cb.size += len(p)
|
cb.size += len(p)
|
||||||
if cb.size > cap {
|
if cb.size > cap {
|
||||||
cb.size = cap
|
cb.size = cap
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetHistory returns all buffered data in correct order (oldest to newest).
|
|
||||||
// Returns a new slice (copy), not a view into internal buffer.
|
|
||||||
func (cb *circularBuffer) GetHistory() []byte {
|
func (cb *circularBuffer) GetHistory() []byte {
|
||||||
if cb.size == 0 {
|
if cb.size == 0 {
|
||||||
return nil
|
return nil
|
||||||
@@ -74,14 +75,11 @@ func (cb *circularBuffer) GetHistory() []byte {
|
|||||||
result := make([]byte, cb.size)
|
result := make([]byte, cb.size)
|
||||||
cap := len(cb.data)
|
cap := len(cb.data)
|
||||||
|
|
||||||
// Calculate start position (oldest data)
|
|
||||||
start := (cb.head - cb.size + cap) % cap
|
start := (cb.head - cb.size + cap) % cap
|
||||||
|
|
||||||
if start+cb.size <= cap {
|
if start+cb.size <= cap {
|
||||||
// Data is contiguous, single copy
|
|
||||||
copy(result, cb.data[start:start+cb.size])
|
copy(result, cb.data[start:start+cb.size])
|
||||||
} else {
|
} else {
|
||||||
// Data wraps around, two copies
|
|
||||||
firstPart := cap - start
|
firstPart := cap - start
|
||||||
copy(result[:firstPart], cb.data[start:])
|
copy(result[:firstPart], cb.data[start:])
|
||||||
copy(result[firstPart:], cb.data[:cb.size-firstPart])
|
copy(result[firstPart:], cb.data[:cb.size-firstPart])
|
||||||
@@ -90,42 +88,38 @@ func (cb *circularBuffer) GetHistory() []byte {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
type LogLevel int
|
type Level int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
LevelDebug LogLevel = iota
|
LevelDebug Level = iota
|
||||||
LevelInfo
|
LevelInfo
|
||||||
LevelWarn
|
LevelWarn
|
||||||
LevelError
|
LevelError
|
||||||
|
|
||||||
LogBufferSize = 100 * 1024
|
BufferSize = 100 * 1024
|
||||||
)
|
)
|
||||||
|
|
||||||
type LogMonitor struct {
|
type Monitor struct {
|
||||||
eventbus *event.Dispatcher
|
eventbus *event.Dispatcher
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
buffer *circularBuffer
|
buffer *circularBuffer
|
||||||
bufferMu sync.RWMutex
|
bufferMu sync.RWMutex
|
||||||
|
|
||||||
// typically this can be os.Stdout
|
|
||||||
stdout io.Writer
|
stdout io.Writer
|
||||||
|
|
||||||
// logging levels
|
level Level
|
||||||
level LogLevel
|
prefix string
|
||||||
prefix string
|
|
||||||
|
|
||||||
// timestamps
|
|
||||||
timeFormat string
|
timeFormat string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewLogMonitor() *LogMonitor {
|
func New() *Monitor {
|
||||||
return NewLogMonitorWriter(os.Stdout)
|
return NewWriter(os.Stdout)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewLogMonitorWriter(stdout io.Writer) *LogMonitor {
|
func NewWriter(stdout io.Writer) *Monitor {
|
||||||
return &LogMonitor{
|
return &Monitor{
|
||||||
eventbus: event.NewDispatcherConfig(1000),
|
eventbus: event.NewDispatcherConfig(1000),
|
||||||
buffer: nil, // lazy initialized on first Write
|
buffer: nil,
|
||||||
stdout: stdout,
|
stdout: stdout,
|
||||||
level: LevelInfo,
|
level: LevelInfo,
|
||||||
prefix: "",
|
prefix: "",
|
||||||
@@ -133,7 +127,7 @@ func NewLogMonitorWriter(stdout io.Writer) *LogMonitor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) Write(p []byte) (n int, err error) {
|
func (w *Monitor) Write(p []byte) (n int, err error) {
|
||||||
if len(p) == 0 {
|
if len(p) == 0 {
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
@@ -145,19 +139,18 @@ func (w *LogMonitor) Write(p []byte) (n int, err error) {
|
|||||||
|
|
||||||
w.bufferMu.Lock()
|
w.bufferMu.Lock()
|
||||||
if w.buffer == nil {
|
if w.buffer == nil {
|
||||||
w.buffer = newCircularBuffer(LogBufferSize)
|
w.buffer = newCircularBuffer(BufferSize)
|
||||||
}
|
}
|
||||||
w.buffer.Write(p)
|
w.buffer.Write(p)
|
||||||
w.bufferMu.Unlock()
|
w.bufferMu.Unlock()
|
||||||
|
|
||||||
// Make a copy for broadcast to preserve immutability
|
|
||||||
bufferCopy := make([]byte, len(p))
|
bufferCopy := make([]byte, len(p))
|
||||||
copy(bufferCopy, p)
|
copy(bufferCopy, p)
|
||||||
w.broadcast(bufferCopy)
|
w.broadcast(bufferCopy)
|
||||||
return n, nil
|
return n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) GetHistory() []byte {
|
func (w *Monitor) GetHistory() []byte {
|
||||||
w.bufferMu.RLock()
|
w.bufferMu.RLock()
|
||||||
defer w.bufferMu.RUnlock()
|
defer w.bufferMu.RUnlock()
|
||||||
if w.buffer == nil {
|
if w.buffer == nil {
|
||||||
@@ -168,41 +161,41 @@ func (w *LogMonitor) GetHistory() []byte {
|
|||||||
|
|
||||||
// Clear releases the buffer memory, making it eligible for GC.
|
// Clear releases the buffer memory, making it eligible for GC.
|
||||||
// The buffer will be lazily re-allocated on the next Write.
|
// The buffer will be lazily re-allocated on the next Write.
|
||||||
func (w *LogMonitor) Clear() {
|
func (w *Monitor) Clear() {
|
||||||
w.bufferMu.Lock()
|
w.bufferMu.Lock()
|
||||||
w.buffer = nil
|
w.buffer = nil
|
||||||
w.bufferMu.Unlock()
|
w.bufferMu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc {
|
func (w *Monitor) OnLogData(callback func(data []byte)) context.CancelFunc {
|
||||||
return event.Subscribe(w.eventbus, func(e LogDataEvent) {
|
return event.Subscribe(w.eventbus, func(e DataEvent) {
|
||||||
callback(e.Data)
|
callback(e.Data)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) broadcast(msg []byte) {
|
func (w *Monitor) broadcast(msg []byte) {
|
||||||
event.Publish(w.eventbus, LogDataEvent{Data: msg})
|
event.Publish(w.eventbus, DataEvent{Data: msg})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) SetPrefix(prefix string) {
|
func (w *Monitor) SetPrefix(prefix string) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
w.prefix = prefix
|
w.prefix = prefix
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) SetLogLevel(level LogLevel) {
|
func (w *Monitor) SetLogLevel(level Level) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
w.level = level
|
w.level = level
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) SetLogTimeFormat(timeFormat string) {
|
func (w *Monitor) SetLogTimeFormat(timeFormat string) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer w.mu.Unlock()
|
||||||
w.timeFormat = timeFormat
|
w.timeFormat = timeFormat
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) formatMessage(level string, msg string) []byte {
|
func (w *Monitor) formatMessage(level string, msg string) []byte {
|
||||||
prefix := ""
|
prefix := ""
|
||||||
if w.prefix != "" {
|
if w.prefix != "" {
|
||||||
prefix = fmt.Sprintf("[%s] ", w.prefix)
|
prefix = fmt.Sprintf("[%s] ", w.prefix)
|
||||||
@@ -211,49 +204,38 @@ func (w *LogMonitor) formatMessage(level string, msg string) []byte {
|
|||||||
if w.timeFormat != "" {
|
if w.timeFormat != "" {
|
||||||
timestamp = fmt.Sprintf("%s ", time.Now().Format(w.timeFormat))
|
timestamp = fmt.Sprintf("%s ", time.Now().Format(w.timeFormat))
|
||||||
}
|
}
|
||||||
return []byte(fmt.Sprintf("%s%s[%s] %s\n", timestamp, prefix, level, msg))
|
return fmt.Appendf(nil, "%s%s[%s] %s\n", timestamp, prefix, level, msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) log(level LogLevel, msg string) {
|
func (w *Monitor) log(level Level, msg string) {
|
||||||
if level < w.level {
|
if level < w.level {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
w.Write(w.formatMessage(level.String(), msg))
|
w.Write(w.formatMessage(level.String(), msg))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) Debug(msg string) {
|
func (w *Monitor) Debug(msg string) { w.log(LevelDebug, msg) }
|
||||||
w.log(LevelDebug, msg)
|
func (w *Monitor) Info(msg string) { w.log(LevelInfo, msg) }
|
||||||
}
|
func (w *Monitor) Warn(msg string) { w.log(LevelWarn, msg) }
|
||||||
|
func (w *Monitor) Error(msg string) { w.log(LevelError, msg) }
|
||||||
|
|
||||||
func (w *LogMonitor) Info(msg string) {
|
func (w *Monitor) Debugf(format string, args ...any) {
|
||||||
w.log(LevelInfo, msg)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *LogMonitor) Warn(msg string) {
|
|
||||||
w.log(LevelWarn, msg)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *LogMonitor) Error(msg string) {
|
|
||||||
w.log(LevelError, msg)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *LogMonitor) Debugf(format string, args ...interface{}) {
|
|
||||||
w.log(LevelDebug, fmt.Sprintf(format, args...))
|
w.log(LevelDebug, fmt.Sprintf(format, args...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) Infof(format string, args ...interface{}) {
|
func (w *Monitor) Infof(format string, args ...any) {
|
||||||
w.log(LevelInfo, fmt.Sprintf(format, args...))
|
w.log(LevelInfo, fmt.Sprintf(format, args...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) Warnf(format string, args ...interface{}) {
|
func (w *Monitor) Warnf(format string, args ...any) {
|
||||||
w.log(LevelWarn, fmt.Sprintf(format, args...))
|
w.log(LevelWarn, fmt.Sprintf(format, args...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *LogMonitor) Errorf(format string, args ...interface{}) {
|
func (w *Monitor) Errorf(format string, args ...any) {
|
||||||
w.log(LevelError, fmt.Sprintf(format, args...))
|
w.log(LevelError, fmt.Sprintf(format, args...))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l LogLevel) String() string {
|
func (l Level) String() string {
|
||||||
switch l {
|
switch l {
|
||||||
case LevelDebug:
|
case LevelDebug:
|
||||||
return "DEBUG"
|
return "DEBUG"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package proxy
|
package logmon
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
@@ -10,9 +10,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestLogMonitor(t *testing.T) {
|
func TestLogMonitor(t *testing.T) {
|
||||||
logMonitor := NewLogMonitorWriter(io.Discard)
|
logMonitor := NewWriter(io.Discard)
|
||||||
|
|
||||||
// A WaitGroup is used to wait for all the expected writes to complete
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
client1Messages := make([]byte, 0)
|
client1Messages := make([]byte, 0)
|
||||||
@@ -34,10 +33,8 @@ func TestLogMonitor(t *testing.T) {
|
|||||||
logMonitor.Write([]byte("2"))
|
logMonitor.Write([]byte("2"))
|
||||||
logMonitor.Write([]byte("3"))
|
logMonitor.Write([]byte("3"))
|
||||||
|
|
||||||
// wait for all writes to complete
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
// Check the buffer
|
|
||||||
expectedHistory := "123"
|
expectedHistory := "123"
|
||||||
history := string(logMonitor.GetHistory())
|
history := string(logMonitor.GetHistory())
|
||||||
|
|
||||||
@@ -57,14 +54,11 @@ func TestLogMonitor(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestWrite_ImmutableBuffer(t *testing.T) {
|
func TestWrite_ImmutableBuffer(t *testing.T) {
|
||||||
// Create a new LogMonitor instance
|
lm := NewWriter(io.Discard)
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
|
||||||
|
|
||||||
// Prepare a message to write
|
|
||||||
msg := []byte("Hello, World!")
|
msg := []byte("Hello, World!")
|
||||||
lenmsg := len(msg)
|
lenmsg := len(msg)
|
||||||
|
|
||||||
// Write the message to the LogMonitor
|
|
||||||
n, err := lm.Write(msg)
|
n, err := lm.Write(msg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Write failed: %v", err)
|
t.Fatalf("Write failed: %v", err)
|
||||||
@@ -74,13 +68,10 @@ func TestWrite_ImmutableBuffer(t *testing.T) {
|
|||||||
t.Errorf("Expected %d bytes written but got %d", lenmsg, n)
|
t.Errorf("Expected %d bytes written but got %d", lenmsg, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Change the original message
|
msg[0] = 'B'
|
||||||
msg[0] = 'B' // This should not affect the buffer
|
|
||||||
|
|
||||||
// Get the history from the LogMonitor
|
|
||||||
history := lm.GetHistory()
|
history := lm.GetHistory()
|
||||||
|
|
||||||
// Check that the history contains the original message, not the modified one
|
|
||||||
expected := []byte("Hello, World!")
|
expected := []byte("Hello, World!")
|
||||||
if !bytes.Equal(history, expected) {
|
if !bytes.Equal(history, expected) {
|
||||||
t.Errorf("Expected history to be %q, got %q", expected, history)
|
t.Errorf("Expected history to be %q, got %q", expected, history)
|
||||||
@@ -88,16 +79,12 @@ func TestWrite_ImmutableBuffer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestWrite_LogTimeFormat(t *testing.T) {
|
func TestWrite_LogTimeFormat(t *testing.T) {
|
||||||
// Create a new LogMonitor instance
|
lm := NewWriter(io.Discard)
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
|
||||||
|
|
||||||
// Enable timestamps
|
|
||||||
lm.timeFormat = time.RFC3339
|
lm.timeFormat = time.RFC3339
|
||||||
|
|
||||||
// Write the message to the LogMonitor
|
|
||||||
lm.Info("Hello, World!")
|
lm.Info("Hello, World!")
|
||||||
|
|
||||||
// Get the history from the LogMonitor
|
|
||||||
history := lm.GetHistory()
|
history := lm.GetHistory()
|
||||||
|
|
||||||
timestamp := ""
|
timestamp := ""
|
||||||
@@ -115,48 +102,40 @@ func TestWrite_LogTimeFormat(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCircularBuffer_WrapAround(t *testing.T) {
|
func TestCircularBuffer_WrapAround(t *testing.T) {
|
||||||
// Create a small buffer to test wrap-around
|
|
||||||
cb := newCircularBuffer(10)
|
cb := newCircularBuffer(10)
|
||||||
|
|
||||||
// Write "hello" (5 bytes)
|
|
||||||
cb.Write([]byte("hello"))
|
cb.Write([]byte("hello"))
|
||||||
if got := string(cb.GetHistory()); got != "hello" {
|
if got := string(cb.GetHistory()); got != "hello" {
|
||||||
t.Errorf("Expected 'hello', got %q", got)
|
t.Errorf("Expected 'hello', got %q", got)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write "world" (5 bytes) - buffer now full
|
|
||||||
cb.Write([]byte("world"))
|
cb.Write([]byte("world"))
|
||||||
if got := string(cb.GetHistory()); got != "helloworld" {
|
if got := string(cb.GetHistory()); got != "helloworld" {
|
||||||
t.Errorf("Expected 'helloworld', got %q", got)
|
t.Errorf("Expected 'helloworld', got %q", got)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write "12345" (5 bytes) - should overwrite "hello"
|
|
||||||
cb.Write([]byte("12345"))
|
cb.Write([]byte("12345"))
|
||||||
if got := string(cb.GetHistory()); got != "world12345" {
|
if got := string(cb.GetHistory()); got != "world12345" {
|
||||||
t.Errorf("Expected 'world12345', got %q", got)
|
t.Errorf("Expected 'world12345', got %q", got)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write data larger than buffer capacity
|
cb.Write([]byte("abcdefghijklmnop"))
|
||||||
cb.Write([]byte("abcdefghijklmnop")) // 16 bytes, only last 10 kept
|
|
||||||
if got := string(cb.GetHistory()); got != "ghijklmnop" {
|
if got := string(cb.GetHistory()); got != "ghijklmnop" {
|
||||||
t.Errorf("Expected 'ghijklmnop', got %q", got)
|
t.Errorf("Expected 'ghijklmnop', got %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCircularBuffer_BoundaryConditions(t *testing.T) {
|
func TestCircularBuffer_BoundaryConditions(t *testing.T) {
|
||||||
// Test empty buffer
|
|
||||||
cb := newCircularBuffer(10)
|
cb := newCircularBuffer(10)
|
||||||
if got := cb.GetHistory(); got != nil {
|
if got := cb.GetHistory(); got != nil {
|
||||||
t.Errorf("Expected nil for empty buffer, got %q", got)
|
t.Errorf("Expected nil for empty buffer, got %q", got)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test exact capacity
|
|
||||||
cb.Write([]byte("1234567890"))
|
cb.Write([]byte("1234567890"))
|
||||||
if got := string(cb.GetHistory()); got != "1234567890" {
|
if got := string(cb.GetHistory()); got != "1234567890" {
|
||||||
t.Errorf("Expected '1234567890', got %q", got)
|
t.Errorf("Expected '1234567890', got %q", got)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test write exactly at capacity boundary
|
|
||||||
cb = newCircularBuffer(10)
|
cb = newCircularBuffer(10)
|
||||||
cb.Write([]byte("12345"))
|
cb.Write([]byte("12345"))
|
||||||
cb.Write([]byte("67890"))
|
cb.Write([]byte("67890"))
|
||||||
@@ -166,19 +145,16 @@ func TestCircularBuffer_BoundaryConditions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestLogMonitor_LazyInit(t *testing.T) {
|
func TestLogMonitor_LazyInit(t *testing.T) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
|
|
||||||
// Buffer should be nil before any writes
|
|
||||||
if lm.buffer != nil {
|
if lm.buffer != nil {
|
||||||
t.Error("Expected buffer to be nil before first write")
|
t.Error("Expected buffer to be nil before first write")
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetHistory should return nil when buffer is nil
|
|
||||||
if got := lm.GetHistory(); got != nil {
|
if got := lm.GetHistory(); got != nil {
|
||||||
t.Errorf("Expected nil history before first write, got %q", got)
|
t.Errorf("Expected nil history before first write, got %q", got)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write should lazily initialize the buffer
|
|
||||||
lm.Write([]byte("test"))
|
lm.Write([]byte("test"))
|
||||||
|
|
||||||
if lm.buffer == nil {
|
if lm.buffer == nil {
|
||||||
@@ -191,15 +167,13 @@ func TestLogMonitor_LazyInit(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestLogMonitor_Clear(t *testing.T) {
|
func TestLogMonitor_Clear(t *testing.T) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
|
|
||||||
// Write some data
|
|
||||||
lm.Write([]byte("hello"))
|
lm.Write([]byte("hello"))
|
||||||
if got := string(lm.GetHistory()); got != "hello" {
|
if got := string(lm.GetHistory()); got != "hello" {
|
||||||
t.Errorf("Expected 'hello', got %q", got)
|
t.Errorf("Expected 'hello', got %q", got)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear should release the buffer
|
|
||||||
lm.Clear()
|
lm.Clear()
|
||||||
|
|
||||||
if lm.buffer != nil {
|
if lm.buffer != nil {
|
||||||
@@ -212,9 +186,8 @@ func TestLogMonitor_Clear(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestLogMonitor_ClearAndReuse(t *testing.T) {
|
func TestLogMonitor_ClearAndReuse(t *testing.T) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
|
|
||||||
// Write, clear, then write again
|
|
||||||
lm.Write([]byte("first"))
|
lm.Write([]byte("first"))
|
||||||
lm.Clear()
|
lm.Clear()
|
||||||
lm.Write([]byte("second"))
|
lm.Write([]byte("second"))
|
||||||
@@ -225,13 +198,12 @@ func TestLogMonitor_ClearAndReuse(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkLogMonitorWrite(b *testing.B) {
|
func BenchmarkLogMonitorWrite(b *testing.B) {
|
||||||
// Test data of varying sizes
|
|
||||||
smallMsg := []byte("small message\n")
|
smallMsg := []byte("small message\n")
|
||||||
mediumMsg := []byte(strings.Repeat("medium message content ", 10) + "\n")
|
mediumMsg := []byte(strings.Repeat("medium message content ", 10) + "\n")
|
||||||
largeMsg := []byte(strings.Repeat("large message content for benchmarking ", 100) + "\n")
|
largeMsg := []byte(strings.Repeat("large message content for benchmarking ", 100) + "\n")
|
||||||
|
|
||||||
b.Run("SmallWrite", func(b *testing.B) {
|
b.Run("SmallWrite", func(b *testing.B) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
lm.Write(smallMsg)
|
lm.Write(smallMsg)
|
||||||
@@ -239,7 +211,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
b.Run("MediumWrite", func(b *testing.B) {
|
b.Run("MediumWrite", func(b *testing.B) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
lm.Write(mediumMsg)
|
lm.Write(mediumMsg)
|
||||||
@@ -247,7 +219,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
b.Run("LargeWrite", func(b *testing.B) {
|
b.Run("LargeWrite", func(b *testing.B) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
lm.Write(largeMsg)
|
lm.Write(largeMsg)
|
||||||
@@ -255,8 +227,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
b.Run("WithSubscribers", func(b *testing.B) {
|
b.Run("WithSubscribers", func(b *testing.B) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
// Add some subscribers
|
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
lm.OnLogData(func(data []byte) {})
|
lm.OnLogData(func(data []byte) {})
|
||||||
}
|
}
|
||||||
@@ -267,8 +238,7 @@ func BenchmarkLogMonitorWrite(b *testing.B) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
b.Run("GetHistory", func(b *testing.B) {
|
b.Run("GetHistory", func(b *testing.B) {
|
||||||
lm := NewLogMonitorWriter(io.Discard)
|
lm := NewWriter(io.Discard)
|
||||||
// Pre-populate with data
|
|
||||||
for i := 0; i < 1000; i++ {
|
for i := 0; i < 1000; i++ {
|
||||||
lm.Write(mediumMsg)
|
lm.Write(mediumMsg)
|
||||||
}
|
}
|
||||||
@@ -278,39 +248,3 @@ func BenchmarkLogMonitorWrite(b *testing.B) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
Benchmark Results - MBP M1 Pro
|
|
||||||
|
|
||||||
Before (ring.Ring):
|
|
||||||
| Benchmark | ns/op | bytes/op | allocs/op |
|
|
||||||
|---------------------------------|------------|----------|-----------|
|
|
||||||
| SmallWrite (14B) | 43 ns | 40 B | 2 |
|
|
||||||
| MediumWrite (241B) | 76 ns | 264 B | 2 |
|
|
||||||
| LargeWrite (4KB) | 504 ns | 4,120 B | 2 |
|
|
||||||
| WithSubscribers (5 subs) | 355 ns | 264 B | 2 |
|
|
||||||
| GetHistory (after 1000 writes) | 145,000 ns | 1.2 MB | 22 |
|
|
||||||
|
|
||||||
After (circularBuffer 10KB):
|
|
||||||
| Benchmark | ns/op | bytes/op | allocs/op |
|
|
||||||
|---------------------------------|------------|----------|-----------|
|
|
||||||
| SmallWrite (14B) | 26 ns | 16 B | 1 |
|
|
||||||
| MediumWrite (241B) | 67 ns | 240 B | 1 |
|
|
||||||
| LargeWrite (4KB) | 774 ns | 4,096 B | 1 |
|
|
||||||
| WithSubscribers (5 subs) | 325 ns | 240 B | 1 |
|
|
||||||
| GetHistory (after 1000 writes) | 1,042 ns | 10,240 B | 1 |
|
|
||||||
|
|
||||||
After (circularBuffer 100KB):
|
|
||||||
| Benchmark | ns/op | bytes/op | allocs/op |
|
|
||||||
|---------------------------------|------------|-----------|-----------|
|
|
||||||
| SmallWrite (14B) | 26 ns | 16 B | 1 |
|
|
||||||
| MediumWrite (241B) | 66 ns | 240 B | 1 |
|
|
||||||
| LargeWrite (4KB) | 753 ns | 4,096 B | 1 |
|
|
||||||
| WithSubscribers (5 subs) | 309 ns | 240 B | 1 |
|
|
||||||
| GetHistory (after 1000 writes) | 7,788 ns | 106,496 B | 1 |
|
|
||||||
|
|
||||||
Summary:
|
|
||||||
- GetHistory: 139x faster (10KB), 18x faster (100KB)
|
|
||||||
- Allocations: reduced from 2 to 1 across all operations
|
|
||||||
- Small/medium writes: ~1.1-1.6x faster
|
|
||||||
*/
|
|
||||||
@@ -0,0 +1,206 @@
|
|||||||
|
package perf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/ring"
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrNotImplemented = errors.New("Not Implemented")
|
||||||
|
ErrNoGpuTool = errors.New("no GPU monitoring tool available")
|
||||||
|
)
|
||||||
|
|
||||||
|
type Monitor struct {
|
||||||
|
mutex sync.RWMutex
|
||||||
|
log *logmon.Monitor
|
||||||
|
conf config.PerformanceConfig
|
||||||
|
sysRing ring.Buffer[SysStat]
|
||||||
|
gpuRing ring.Buffer[[]GpuStat]
|
||||||
|
|
||||||
|
stopCtx context.Context
|
||||||
|
stopCancel context.CancelFunc
|
||||||
|
|
||||||
|
sysListeners map[chan SysStat]struct{}
|
||||||
|
gpuListeners map[chan []GpuStat]struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ringCapacity(c config.PerformanceConfig) int {
|
||||||
|
n := int(time.Hour / c.Every)
|
||||||
|
if n < 1 {
|
||||||
|
n = 1
|
||||||
|
}
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(c config.PerformanceConfig, logger *logmon.Monitor) (*Monitor, error) {
|
||||||
|
|
||||||
|
if c.Every < 100*time.Millisecond {
|
||||||
|
c.Every = 100 * time.Millisecond
|
||||||
|
}
|
||||||
|
|
||||||
|
if logger == nil {
|
||||||
|
return nil, errors.New("logger is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
capacity := ringCapacity(c)
|
||||||
|
return &Monitor{
|
||||||
|
conf: c,
|
||||||
|
log: logger,
|
||||||
|
sysRing: ring.NewBuffer[SysStat](capacity),
|
||||||
|
gpuRing: ring.NewBuffer[[]GpuStat](capacity),
|
||||||
|
sysListeners: make(map[chan SysStat]struct{}),
|
||||||
|
gpuListeners: make(map[chan []GpuStat]struct{}),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Monitor) Stop() {
|
||||||
|
m.mutex.Lock()
|
||||||
|
defer m.mutex.Unlock()
|
||||||
|
if m.stopCancel == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.stopCancel()
|
||||||
|
m.stopCancel = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateConfig updates the monitor configuration and restarts if changed.
|
||||||
|
func (m *Monitor) UpdateConfig(newConf config.PerformanceConfig) {
|
||||||
|
m.mutex.RLock()
|
||||||
|
changed := m.conf != newConf
|
||||||
|
m.mutex.RUnlock()
|
||||||
|
|
||||||
|
if !changed {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Stop()
|
||||||
|
m.mutex.Lock()
|
||||||
|
m.conf = newConf
|
||||||
|
capacity := ringCapacity(newConf)
|
||||||
|
m.sysRing = ring.NewBuffer[SysStat](capacity)
|
||||||
|
m.gpuRing = ring.NewBuffer[[]GpuStat](capacity)
|
||||||
|
m.mutex.Unlock()
|
||||||
|
if !newConf.Disabled {
|
||||||
|
m.Start()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subscribe returns channels to listen to system and GPU stats.
|
||||||
|
func (m *Monitor) Subscribe() (chan SysStat, chan []GpuStat, func()) {
|
||||||
|
m.mutex.Lock()
|
||||||
|
defer m.mutex.Unlock()
|
||||||
|
sysChan := make(chan SysStat, 1)
|
||||||
|
gpuChan := make(chan []GpuStat, 1)
|
||||||
|
|
||||||
|
m.sysListeners[sysChan] = struct{}{}
|
||||||
|
m.gpuListeners[gpuChan] = struct{}{}
|
||||||
|
|
||||||
|
unsub := func() {
|
||||||
|
m.mutex.Lock()
|
||||||
|
defer m.mutex.Unlock()
|
||||||
|
delete(m.sysListeners, sysChan)
|
||||||
|
delete(m.gpuListeners, gpuChan)
|
||||||
|
}
|
||||||
|
|
||||||
|
return sysChan, gpuChan, unsub
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Monitor) Start() {
|
||||||
|
m.mutex.Lock()
|
||||||
|
defer m.mutex.Unlock()
|
||||||
|
if m.stopCancel != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
m.stopCtx, m.stopCancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
tick := time.NewTicker(m.conf.Every)
|
||||||
|
defer tick.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-m.stopCtx.Done():
|
||||||
|
return
|
||||||
|
case <-tick.C:
|
||||||
|
s, err := ReadSysStats()
|
||||||
|
if err != nil {
|
||||||
|
if err != ErrNotImplemented {
|
||||||
|
m.log.Errorf("failed to read sys stats: %s", err.Error())
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m.mutex.Lock()
|
||||||
|
m.sysRing.Push(s)
|
||||||
|
for l := range m.sysListeners {
|
||||||
|
select {
|
||||||
|
case l <- s:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mutex.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
gpuCh, err := getGpuStats(m.stopCtx, m.conf.Every, m.log)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrNotImplemented) || errors.Is(err, ErrNoGpuTool) {
|
||||||
|
m.log.Infof("GPU monitoring not available: %s", err.Error())
|
||||||
|
} else {
|
||||||
|
m.log.Errorf("failed to initialize GPU monitoring: %s", err.Error())
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-m.stopCtx.Done():
|
||||||
|
return
|
||||||
|
case g, ok := <-gpuCh:
|
||||||
|
if !ok {
|
||||||
|
m.log.Errorf("failed reading from gpuCh - stopping read goroutine")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.mutex.Lock()
|
||||||
|
m.gpuRing.Push(g)
|
||||||
|
for l := range m.gpuListeners {
|
||||||
|
select {
|
||||||
|
case l <- g:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mutex.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Current returns a copy of the current log of system and GPU stats.
|
||||||
|
func (m *Monitor) Current() ([]SysStat, []GpuStat) {
|
||||||
|
m.mutex.RLock()
|
||||||
|
defer m.mutex.RUnlock()
|
||||||
|
|
||||||
|
sysStats := m.sysRing.Slice()
|
||||||
|
|
||||||
|
snapshots := m.gpuRing.Slice()
|
||||||
|
var gpuStats []GpuStat
|
||||||
|
for _, snapshot := range snapshots {
|
||||||
|
gpuStats = append(gpuStats, snapshot...)
|
||||||
|
}
|
||||||
|
return sysStats, gpuStats
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReadSysStats() (SysStat, error) {
|
||||||
|
return readSysStats()
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
return getGpuStats(ctx, every, logger)
|
||||||
|
}
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
package perf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/shirou/gopsutil/v4/cpu"
|
||||||
|
"github.com/shirou/gopsutil/v4/load"
|
||||||
|
"github.com/shirou/gopsutil/v4/mem"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
return nil, ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
|
func readSysStats() (SysStat, error) {
|
||||||
|
cpuPcts, err := cpu.Percent(0, true)
|
||||||
|
if err != nil {
|
||||||
|
return SysStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
vmStat, err := mem.VirtualMemory()
|
||||||
|
if err != nil {
|
||||||
|
return SysStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
const toMB = 1024 * 1024
|
||||||
|
|
||||||
|
var swapTotalMB, swapUsedMB int
|
||||||
|
if swapStat, err := mem.SwapMemory(); err == nil {
|
||||||
|
swapTotalMB = int(swapStat.Total / toMB)
|
||||||
|
swapUsedMB = int(swapStat.Used / toMB)
|
||||||
|
}
|
||||||
|
|
||||||
|
var loadAvg1, loadAvg5, loadAvg15 float64
|
||||||
|
if loadStat, err := load.Avg(); err == nil {
|
||||||
|
loadAvg1 = loadStat.Load1
|
||||||
|
loadAvg5 = loadStat.Load5
|
||||||
|
loadAvg15 = loadStat.Load15
|
||||||
|
}
|
||||||
|
|
||||||
|
return SysStat{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
CpuUtilPerCore: cpuPcts,
|
||||||
|
MemTotalMB: int(vmStat.Total / toMB),
|
||||||
|
MemUsedMB: int(vmStat.Used / toMB),
|
||||||
|
MemFreeMB: int(vmStat.Free / toMB),
|
||||||
|
SwapTotalMB: swapTotalMB,
|
||||||
|
SwapUsedMB: swapUsedMB,
|
||||||
|
LoadAvg1: loadAvg1,
|
||||||
|
LoadAvg5: loadAvg5,
|
||||||
|
LoadAvg15: loadAvg15,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,226 @@
|
|||||||
|
package perf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTestLogger() *logmon.Monitor {
|
||||||
|
return logmon.NewWriter(io.Discard)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_DefaultConfig(t *testing.T) {
|
||||||
|
logger := newTestLogger()
|
||||||
|
|
||||||
|
m, err := New(config.PerformanceConfig{}, logger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, m)
|
||||||
|
|
||||||
|
assert.Equal(t, 100*time.Millisecond, m.conf.Every)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_CustomConfig(t *testing.T) {
|
||||||
|
logger := newTestLogger()
|
||||||
|
|
||||||
|
cfg := config.PerformanceConfig{
|
||||||
|
Every: 500 * time.Millisecond,
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := New(cfg, logger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, 500*time.Millisecond, m.conf.Every)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_NilLogger(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, nil)
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Nil(t, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_BelowMinimumConfig(t *testing.T) {
|
||||||
|
logger := newTestLogger()
|
||||||
|
|
||||||
|
cfg := config.PerformanceConfig{
|
||||||
|
Every: 1 * time.Millisecond,
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := New(cfg, logger)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, 100*time.Millisecond, m.conf.Every)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubscribe_ReturnsChannels(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sysCh, gpuCh, unsub := m.Subscribe()
|
||||||
|
defer unsub()
|
||||||
|
|
||||||
|
assert.NotNil(t, sysCh)
|
||||||
|
assert.NotNil(t, gpuCh)
|
||||||
|
assert.NotNil(t, unsub)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubscribe_UnsubscribeRemovesListeners(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
_, _, unsub := m.Subscribe()
|
||||||
|
|
||||||
|
m.mutex.RLock()
|
||||||
|
assert.Len(t, m.sysListeners, 1)
|
||||||
|
assert.Len(t, m.gpuListeners, 1)
|
||||||
|
m.mutex.RUnlock()
|
||||||
|
|
||||||
|
unsub()
|
||||||
|
|
||||||
|
m.mutex.RLock()
|
||||||
|
assert.Len(t, m.sysListeners, 0)
|
||||||
|
assert.Len(t, m.gpuListeners, 0)
|
||||||
|
m.mutex.RUnlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubscribe_MultipleSubscriptions(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sysCh1, gpuCh1, unsub1 := m.Subscribe()
|
||||||
|
sysCh2, gpuCh2, unsub2 := m.Subscribe()
|
||||||
|
defer unsub1()
|
||||||
|
defer unsub2()
|
||||||
|
|
||||||
|
assert.NotEqual(t, sysCh1, sysCh2)
|
||||||
|
assert.NotEqual(t, gpuCh1, gpuCh2)
|
||||||
|
|
||||||
|
m.mutex.RLock()
|
||||||
|
assert.Len(t, m.sysListeners, 2)
|
||||||
|
assert.Len(t, m.gpuListeners, 2)
|
||||||
|
m.mutex.RUnlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCurrent_EmptyByDefault(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sysStats, gpuStats := m.Current()
|
||||||
|
assert.Empty(t, sysStats)
|
||||||
|
assert.Empty(t, gpuStats)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCurrent_ReturnsCopies(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
m.sysRing.Push(SysStat{Timestamp: now, MemTotalMB: 1024})
|
||||||
|
m.gpuRing.Push([]GpuStat{{Timestamp: now, ID: 0, Name: "gpu0"}})
|
||||||
|
|
||||||
|
sysStats, gpuStats := m.Current()
|
||||||
|
|
||||||
|
assert.Len(t, sysStats, 1)
|
||||||
|
assert.Len(t, gpuStats, 1)
|
||||||
|
assert.Equal(t, 1024, sysStats[0].MemTotalMB)
|
||||||
|
assert.Equal(t, "gpu0", gpuStats[0].Name)
|
||||||
|
|
||||||
|
// modifying the returned slice should not affect the original
|
||||||
|
sysStats[0].MemTotalMB = 999
|
||||||
|
original, _ := m.Current()
|
||||||
|
assert.Equal(t, 1024, original[0].MemTotalMB)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStart_CollectsSysStats(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping slow test")
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
m.Start()
|
||||||
|
|
||||||
|
time.Sleep(350 * time.Millisecond)
|
||||||
|
m.Stop()
|
||||||
|
|
||||||
|
sysStats, _ := m.Current()
|
||||||
|
assert.NotEmpty(t, sysStats, "expected sys stats to be collected")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStart_StopStopsGoroutines(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping slow test")
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
m.Start()
|
||||||
|
if m.stopCancel == nil {
|
||||||
|
t.Error("stopCancel should not be nil after Start()")
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Stop()
|
||||||
|
if m.stopCancel != nil {
|
||||||
|
t.Error("stopCancel should be nil after Stop()")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStart_SubscriberReceivesStats(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping slow test")
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sysCh, _, unsub := m.Subscribe()
|
||||||
|
defer unsub()
|
||||||
|
|
||||||
|
m.Start()
|
||||||
|
defer m.Stop()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case s := <-sysCh:
|
||||||
|
assert.False(t, s.Timestamp.IsZero())
|
||||||
|
assert.NotEmpty(t, s.CpuUtilPerCore)
|
||||||
|
case <-time.After(500 * time.Millisecond):
|
||||||
|
t.Fatal("timed out waiting for sys stats")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadSysStats(t *testing.T) {
|
||||||
|
s, err := ReadSysStats()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.False(t, s.Timestamp.IsZero())
|
||||||
|
assert.NotEmpty(t, s.CpuUtilPerCore)
|
||||||
|
assert.Greater(t, s.MemTotalMB, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCurrent_ConcurrentAccess(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
m.sysRing.Push(SysStat{Timestamp: time.Now(), MemTotalMB: 1024})
|
||||||
|
m.gpuRing.Push([]GpuStat{{Timestamp: time.Now(), ID: 0}})
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
sys, gpu := m.Current()
|
||||||
|
assert.Len(t, sys, 1)
|
||||||
|
assert.Len(t, gpu, 1)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
@@ -0,0 +1,464 @@
|
|||||||
|
//go:build unix && !darwin
|
||||||
|
|
||||||
|
package perf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"os/user"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/shirou/gopsutil/v4/cpu"
|
||||||
|
"github.com/shirou/gopsutil/v4/load"
|
||||||
|
"github.com/shirou/gopsutil/v4/mem"
|
||||||
|
psnet "github.com/shirou/gopsutil/v4/net"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
if ch, err := tryLACT(ctx, every, logger); err == nil {
|
||||||
|
logger.Info("using LACT for GPU monitoring")
|
||||||
|
return ch, nil
|
||||||
|
} else {
|
||||||
|
logger.Debugf("LACT: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if ch, err := tryNvidiaSmi(ctx, every, logger); err == nil {
|
||||||
|
logger.Info("using nvidia-smi for GPU monitoring")
|
||||||
|
return ch, nil
|
||||||
|
} else {
|
||||||
|
logger.Debugf("nvidia-smi: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if ch, err := trySysfs(ctx, every, logger); err == nil {
|
||||||
|
logger.Info("using sysfs for GPU monitoring")
|
||||||
|
return ch, nil
|
||||||
|
} else {
|
||||||
|
logger.Debugf("sysfs: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, ErrNoGpuTool
|
||||||
|
}
|
||||||
|
|
||||||
|
func tryLACT(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
socketPath := lactSocketPath()
|
||||||
|
if socketPath == "" {
|
||||||
|
return nil, ErrNoGpuTool
|
||||||
|
}
|
||||||
|
|
||||||
|
conn, err := net.DialTimeout("unix", socketPath, 2*time.Second)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("cannot connect to LACT socket: %w", err)
|
||||||
|
}
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
conn.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
|
||||||
|
devices, err := lactListDevices(conn)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("LACT ListDevices failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(devices) == 0 {
|
||||||
|
return nil, fmt.Errorf("LACT returned no devices")
|
||||||
|
}
|
||||||
|
|
||||||
|
ch := make(chan []GpuStat, 1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
ticker := time.NewTicker(every)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
socketPath := lactSocketPath()
|
||||||
|
if socketPath == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
conn, err := net.DialTimeout("unix", socketPath, 2*time.Second)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
conn.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
|
||||||
|
devices, err := lactListDevices(conn)
|
||||||
|
if err != nil {
|
||||||
|
conn.Close()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
stats := make([]GpuStat, 0, len(devices))
|
||||||
|
for i, d := range devices {
|
||||||
|
stat, err := lactGetDeviceStats(conn, d.ID, d.Name, i)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if stat.MemTotalMB == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
stats = append(stats, stat)
|
||||||
|
}
|
||||||
|
conn.Close()
|
||||||
|
|
||||||
|
if len(stats) > 0 {
|
||||||
|
select {
|
||||||
|
case ch <- stats:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func tryNvidiaSmi(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
if _, err := exec.LookPath("nvidia-smi"); err != nil {
|
||||||
|
return nil, ErrNoGpuTool
|
||||||
|
}
|
||||||
|
|
||||||
|
sec := int(every.Seconds())
|
||||||
|
if sec < 1 {
|
||||||
|
sec = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, "nvidia-smi",
|
||||||
|
"--query-gpu=index,name,uuid,temperature.gpu,utilization.gpu,memory.used,memory.total,fan.speed,power.draw",
|
||||||
|
"--format=csv,noheader,nounits",
|
||||||
|
"-loop", fmt.Sprintf("%d", sec),
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout, err := cmd.StdoutPipe()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("nvidia-smi stdout pipe failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
return nil, fmt.Errorf("nvidia-smi start failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ch := make(chan []GpuStat, 1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(stdout)
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
stat := parseNvidiaSmiLine(line)
|
||||||
|
if stat != nil {
|
||||||
|
select {
|
||||||
|
case ch <- []GpuStat{*stat}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cmd.Wait()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseNvidiaSmiLine(line string) *GpuStat {
|
||||||
|
fields := strings.Split(line, ", ")
|
||||||
|
if len(fields) < 9 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
id, _ := strconv.Atoi(strings.TrimSpace(fields[0]))
|
||||||
|
name := strings.TrimSpace(fields[1])
|
||||||
|
uuid := strings.TrimSpace(fields[2])
|
||||||
|
tempC, _ := strconv.Atoi(strings.TrimSpace(fields[3]))
|
||||||
|
gpuUtil, _ := strconv.ParseFloat(strings.TrimSpace(fields[4]), 64)
|
||||||
|
memUsed, _ := strconv.Atoi(strings.TrimSpace(fields[5]))
|
||||||
|
memTotal, _ := strconv.Atoi(strings.TrimSpace(fields[6]))
|
||||||
|
fanSpeed, _ := strconv.ParseFloat(strings.TrimSpace(fields[7]), 64)
|
||||||
|
powerDraw, _ := strconv.ParseFloat(strings.TrimSpace(fields[8]), 64)
|
||||||
|
|
||||||
|
var memUtil float64
|
||||||
|
if memTotal > 0 {
|
||||||
|
memUtil = float64(memUsed) / float64(memTotal) * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
return &GpuStat{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
ID: id,
|
||||||
|
Name: name,
|
||||||
|
UUID: uuid,
|
||||||
|
TempC: tempC,
|
||||||
|
GpuUtilPct: gpuUtil,
|
||||||
|
MemUtilPct: memUtil,
|
||||||
|
MemUsedMB: memUsed,
|
||||||
|
MemTotalMB: memTotal,
|
||||||
|
FanSpeedPct: fanSpeed,
|
||||||
|
PowerDrawW: powerDraw,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func trySysfs(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
return nil, ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
|
func lactSocketPath() string {
|
||||||
|
if p := os.Getenv("LACT_DAEMON_SOCKET_PATH"); p != "" {
|
||||||
|
if _, err := os.Stat(p); err == nil {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rootPath := "/run/lactd.sock"
|
||||||
|
if _, err := os.Stat(rootPath); err == nil {
|
||||||
|
return rootPath
|
||||||
|
}
|
||||||
|
|
||||||
|
u, err := user.Current()
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
userPath := filepath.Join("/run/user", u.Uid, "lactd.sock")
|
||||||
|
if _, err := os.Stat(userPath); err == nil {
|
||||||
|
return userPath
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
type lactRequest struct {
|
||||||
|
Command string `json:"command"`
|
||||||
|
Args interface{} `json:"args,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type lactResponse struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Data json.RawMessage `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type lactDeviceEntry struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type lactDeviceStats struct {
|
||||||
|
Fan struct {
|
||||||
|
PwmCurrent *uint8 `json:"pwm_current"`
|
||||||
|
} `json:"fan"`
|
||||||
|
Vram struct {
|
||||||
|
Total *uint64 `json:"total"`
|
||||||
|
Used *uint64 `json:"used"`
|
||||||
|
} `json:"vram"`
|
||||||
|
Power struct {
|
||||||
|
Average *float64 `json:"average"`
|
||||||
|
Current *float64 `json:"current"`
|
||||||
|
} `json:"power"`
|
||||||
|
Temps map[string]lactTempEntry `json:"temps"`
|
||||||
|
BusyPercent *uint8 `json:"busy_percent"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type lactTempEntry struct {
|
||||||
|
Current *float64 `json:"current"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func lactSendRequest(conn net.Conn, req lactRequest) (json.RawMessage, error) {
|
||||||
|
data, err := json.Marshal(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
data = append(data, '\n')
|
||||||
|
|
||||||
|
if _, err := conn.Write(data); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
reader := bufio.NewReader(conn)
|
||||||
|
line, err := reader.ReadBytes('\n')
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var resp lactResponse
|
||||||
|
if err := json.Unmarshal(line, &resp); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.Status != "ok" {
|
||||||
|
return nil, fmt.Errorf("LACT error: %s", string(resp.Data))
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp.Data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func lactListDevices(conn net.Conn) ([]lactDeviceEntry, error) {
|
||||||
|
data, err := lactSendRequest(conn, lactRequest{Command: "list_devices"})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var devices []lactDeviceEntry
|
||||||
|
if err := json.Unmarshal(data, &devices); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return devices, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func lactGetDeviceStats(conn net.Conn, id string, name string, index int) (GpuStat, error) {
|
||||||
|
data, err := lactSendRequest(conn, lactRequest{
|
||||||
|
Command: "device_stats",
|
||||||
|
Args: struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
}{ID: id},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return GpuStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var stats lactDeviceStats
|
||||||
|
if err := json.Unmarshal(data, &stats); err != nil {
|
||||||
|
return GpuStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var memUsedMB, memTotalMB int
|
||||||
|
if stats.Vram.Used != nil {
|
||||||
|
memUsedMB = int(*stats.Vram.Used / 1024 / 1024)
|
||||||
|
}
|
||||||
|
if stats.Vram.Total != nil {
|
||||||
|
memTotalMB = int(*stats.Vram.Total / 1024 / 1024)
|
||||||
|
}
|
||||||
|
|
||||||
|
var memUtil float64
|
||||||
|
if memTotalMB > 0 {
|
||||||
|
memUtil = float64(memUsedMB) / float64(memTotalMB) * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
var gpuUtil float64
|
||||||
|
if stats.BusyPercent != nil {
|
||||||
|
gpuUtil = float64(*stats.BusyPercent)
|
||||||
|
}
|
||||||
|
|
||||||
|
var fanSpeed float64
|
||||||
|
if stats.Fan.PwmCurrent != nil {
|
||||||
|
fanSpeed = float64(*stats.Fan.PwmCurrent) / 255.0 * 100.0
|
||||||
|
}
|
||||||
|
|
||||||
|
var powerDraw float64
|
||||||
|
if stats.Power.Average != nil && *stats.Power.Average > 0 {
|
||||||
|
powerDraw = *stats.Power.Average
|
||||||
|
} else if stats.Power.Current != nil {
|
||||||
|
powerDraw = *stats.Power.Current
|
||||||
|
}
|
||||||
|
|
||||||
|
var tempC int
|
||||||
|
if t, ok := stats.Temps["edge"]; ok && t.Current != nil {
|
||||||
|
tempC = int(*t.Current)
|
||||||
|
} else if t, ok := stats.Temps["junction"]; ok && t.Current != nil {
|
||||||
|
tempC = int(*t.Current)
|
||||||
|
} else {
|
||||||
|
for _, t := range stats.Temps {
|
||||||
|
if t.Current != nil {
|
||||||
|
tempC = int(*t.Current)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var vramTempC int
|
||||||
|
// nvidia uses "VRAM", amd "mem"
|
||||||
|
for _, key := range []string{"mem", "VRAM"} {
|
||||||
|
if t, ok := stats.Temps[key]; ok && t.Current != nil && *t.Current > 0 {
|
||||||
|
vramTempC = int(*t.Current)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return GpuStat{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
ID: index,
|
||||||
|
Name: name,
|
||||||
|
UUID: id,
|
||||||
|
TempC: tempC,
|
||||||
|
VramTempC: vramTempC,
|
||||||
|
GpuUtilPct: gpuUtil,
|
||||||
|
MemUtilPct: memUtil,
|
||||||
|
MemUsedMB: memUsedMB,
|
||||||
|
MemTotalMB: memTotalMB,
|
||||||
|
FanSpeedPct: fanSpeed,
|
||||||
|
PowerDrawW: powerDraw,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readSysfs() ([]GpuStat, error) {
|
||||||
|
return nil, ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
|
func readSysStats() (SysStat, error) {
|
||||||
|
cpuPcts, err := cpu.Percent(0, true)
|
||||||
|
if err != nil {
|
||||||
|
return SysStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
vmStat, err := mem.VirtualMemory()
|
||||||
|
if err != nil {
|
||||||
|
return SysStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
const toMB = 1024 * 1024
|
||||||
|
|
||||||
|
var swapTotalMB, swapUsedMB int
|
||||||
|
if swapStat, err := mem.SwapMemory(); err == nil {
|
||||||
|
swapTotalMB = int(swapStat.Total / toMB)
|
||||||
|
swapUsedMB = int(swapStat.Used / toMB)
|
||||||
|
}
|
||||||
|
|
||||||
|
var loadAvg1, loadAvg5, loadAvg15 float64
|
||||||
|
if loadStat, err := load.Avg(); err == nil {
|
||||||
|
loadAvg1 = loadStat.Load1
|
||||||
|
loadAvg5 = loadStat.Load5
|
||||||
|
loadAvg15 = loadStat.Load15
|
||||||
|
}
|
||||||
|
|
||||||
|
netIO := make([]NetIOStat, 0)
|
||||||
|
if ioCounters, err := psnet.IOCounters(true); err == nil {
|
||||||
|
for _, ioc := range ioCounters {
|
||||||
|
if ioc.Name == "lo" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
netIO = append(netIO, NetIOStat{
|
||||||
|
Name: ioc.Name,
|
||||||
|
BytesRecv: ioc.BytesRecv,
|
||||||
|
BytesSent: ioc.BytesSent,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return SysStat{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
CpuUtilPerCore: cpuPcts,
|
||||||
|
MemTotalMB: int(vmStat.Total / toMB),
|
||||||
|
MemUsedMB: int(vmStat.Used / toMB),
|
||||||
|
MemFreeMB: int(vmStat.Free / toMB),
|
||||||
|
SwapTotalMB: swapTotalMB,
|
||||||
|
SwapUsedMB: swapUsedMB,
|
||||||
|
LoadAvg1: loadAvg1,
|
||||||
|
LoadAvg5: loadAvg5,
|
||||||
|
LoadAvg15: loadAvg15,
|
||||||
|
NetIO: netIO,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
package perf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/shirou/gopsutil/v4/cpu"
|
||||||
|
"github.com/shirou/gopsutil/v4/mem"
|
||||||
|
"github.com/shirou/gopsutil/v4/net"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) {
|
||||||
|
return nil, ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
|
func readSysStats() (SysStat, error) {
|
||||||
|
cpuPcts, err := cpu.Percent(0, true)
|
||||||
|
if err != nil {
|
||||||
|
return SysStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
vmStat, err := mem.VirtualMemory()
|
||||||
|
if err != nil {
|
||||||
|
return SysStat{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
const toMB = 1024 * 1024
|
||||||
|
|
||||||
|
netIO := make([]NetIOStat, 0)
|
||||||
|
if ioCounters, err := net.IOCounters(true); err == nil {
|
||||||
|
for _, ioc := range ioCounters {
|
||||||
|
netIO = append(netIO, NetIOStat{
|
||||||
|
Name: ioc.Name,
|
||||||
|
BytesRecv: ioc.BytesRecv,
|
||||||
|
BytesSent: ioc.BytesSent,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return SysStat{
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
CpuUtilPerCore: cpuPcts,
|
||||||
|
MemTotalMB: int(vmStat.Total / toMB),
|
||||||
|
MemUsedMB: int(vmStat.Used / toMB),
|
||||||
|
MemFreeMB: int(vmStat.Free / toMB),
|
||||||
|
NetIO: netIO,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
package perf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const mbToBytes = int64(1024 * 1024)
|
||||||
|
|
||||||
|
// MetricsHandler returns an http.HandlerFunc serving Prometheus text format metrics
|
||||||
|
// with the most recent system and GPU stats.
|
||||||
|
func (m *Monitor) MetricsHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
sysStats, gpuStats := m.Current()
|
||||||
|
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||||
|
|
||||||
|
if len(sysStats) > 0 {
|
||||||
|
writeSysMetrics(w, sysStats[len(sysStats)-1])
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(gpuStats) > 0 {
|
||||||
|
writeGpuMetrics(w, latestPerGPU(gpuStats))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSysMetrics(w http.ResponseWriter, s SysStat) {
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_cpu_util_percent CPU utilization per core (0-100)\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_cpu_util_percent gauge\n")
|
||||||
|
for i, pct := range s.CpuUtilPerCore {
|
||||||
|
fmt.Fprintf(w, "llamaswap_cpu_util_percent{core=\"%d\"} %g\n", i, pct)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_memory_total_bytes Total memory in bytes\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_memory_total_bytes gauge\n")
|
||||||
|
fmt.Fprintf(w, "llamaswap_memory_total_bytes %d\n", int64(s.MemTotalMB)*mbToBytes)
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_memory_used_bytes Used memory in bytes\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_memory_used_bytes gauge\n")
|
||||||
|
fmt.Fprintf(w, "llamaswap_memory_used_bytes %d\n", int64(s.MemUsedMB)*mbToBytes)
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_memory_free_bytes Free memory in bytes\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_memory_free_bytes gauge\n")
|
||||||
|
fmt.Fprintf(w, "llamaswap_memory_free_bytes %d\n", int64(s.MemFreeMB)*mbToBytes)
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_swap_total_bytes Total swap in bytes\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_swap_total_bytes gauge\n")
|
||||||
|
fmt.Fprintf(w, "llamaswap_swap_total_bytes %d\n", int64(s.SwapTotalMB)*mbToBytes)
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_swap_used_bytes Used swap in bytes\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_swap_used_bytes gauge\n")
|
||||||
|
fmt.Fprintf(w, "llamaswap_swap_used_bytes %d\n", int64(s.SwapUsedMB)*mbToBytes)
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_load_average Load average\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_load_average gauge\n")
|
||||||
|
fmt.Fprintf(w, "llamaswap_load_average{interval=\"1m\"} %g\n", s.LoadAvg1)
|
||||||
|
fmt.Fprintf(w, "llamaswap_load_average{interval=\"5m\"} %g\n", s.LoadAvg5)
|
||||||
|
fmt.Fprintf(w, "llamaswap_load_average{interval=\"15m\"} %g\n", s.LoadAvg15)
|
||||||
|
|
||||||
|
if len(s.NetIO) > 0 {
|
||||||
|
fmt.Fprintf(w, "# HELP llamaswap_network_bytes_total Total network bytes transferred\n")
|
||||||
|
fmt.Fprintf(w, "# TYPE llamaswap_network_bytes_total counter\n")
|
||||||
|
for _, io := range s.NetIO {
|
||||||
|
iface := sanitizeLabel(io.Name)
|
||||||
|
fmt.Fprintf(w, "llamaswap_network_bytes_total{interface=\"%s\",direction=\"recv\"} %d\n", iface, io.BytesRecv)
|
||||||
|
fmt.Fprintf(w, "llamaswap_network_bytes_total{interface=\"%s\",direction=\"sent\"} %d\n", iface, io.BytesSent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeGpuMetrics(w http.ResponseWriter, gpus []GpuStat) {
|
||||||
|
if len(gpus) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
type gpuMetric struct {
|
||||||
|
help string
|
||||||
|
name string
|
||||||
|
value func(GpuStat) float64
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics := []gpuMetric{
|
||||||
|
{"GPU temperature in Celsius", "llamaswap_gpu_temperature_celsius", func(g GpuStat) float64 { return float64(g.TempC) }},
|
||||||
|
{"GPU VRAM temperature in Celsius", "llamaswap_gpu_vram_temperature_celsius", func(g GpuStat) float64 { return float64(g.VramTempC) }},
|
||||||
|
{"GPU utilization percent (0-100)", "llamaswap_gpu_util_percent", func(g GpuStat) float64 { return g.GpuUtilPct }},
|
||||||
|
{"GPU memory utilization percent (0-100)", "llamaswap_gpu_memory_util_percent", func(g GpuStat) float64 { return g.MemUtilPct }},
|
||||||
|
{"GPU memory used in bytes", "llamaswap_gpu_memory_used_bytes", func(g GpuStat) float64 { return float64(g.MemUsedMB) * float64(mbToBytes) }},
|
||||||
|
{"GPU memory total in bytes", "llamaswap_gpu_memory_total_bytes", func(g GpuStat) float64 { return float64(g.MemTotalMB) * float64(mbToBytes) }},
|
||||||
|
{"GPU fan speed percent (0-100)", "llamaswap_gpu_fan_speed_percent", func(g GpuStat) float64 { return g.FanSpeedPct }},
|
||||||
|
{"GPU power draw in watts", "llamaswap_gpu_power_draw_watts", func(g GpuStat) float64 { return g.PowerDrawW }},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, m := range metrics {
|
||||||
|
fmt.Fprintf(w, "# HELP %s %s\n", m.name, m.help)
|
||||||
|
fmt.Fprintf(w, "# TYPE %s gauge\n", m.name)
|
||||||
|
for _, g := range gpus {
|
||||||
|
if g.UUID != "" {
|
||||||
|
fmt.Fprintf(w, "%s{id=\"%d\",name=\"%s\",uuid=\"%s\"} %g\n",
|
||||||
|
m.name, g.ID, sanitizeLabel(g.Name), sanitizeLabel(g.UUID), m.value(g))
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(w, "%s{id=\"%d\",name=\"%s\"} %g\n",
|
||||||
|
m.name, g.ID, sanitizeLabel(g.Name), m.value(g))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestPerGPU returns the most recent GpuStat for each GPU ID, sorted by ID.
|
||||||
|
func latestPerGPU(stats []GpuStat) []GpuStat {
|
||||||
|
latest := make(map[int]GpuStat)
|
||||||
|
for _, g := range stats {
|
||||||
|
if prev, ok := latest[g.ID]; !ok || g.Timestamp.After(prev.Timestamp) {
|
||||||
|
latest[g.ID] = g
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result := make([]GpuStat, 0, len(latest))
|
||||||
|
for _, g := range latest {
|
||||||
|
result = append(result, g)
|
||||||
|
}
|
||||||
|
sort.Slice(result, func(i, j int) bool { return result[i].ID < result[j].ID })
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// sanitizeLabel escapes characters that are invalid in Prometheus label values.
|
||||||
|
func sanitizeLabel(s string) string {
|
||||||
|
return strings.NewReplacer(`"`, `\"`, `\`, `\\`, "\n", `\n`).Replace(s)
|
||||||
|
}
|
||||||
@@ -0,0 +1,248 @@
|
|||||||
|
package perf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSanitizeLabel(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"normal", "normal"},
|
||||||
|
{"", ""},
|
||||||
|
{`with"quote`, `with\"quote`},
|
||||||
|
{`with\backslash`, `with\\backslash`},
|
||||||
|
{"with\nnewline", `with\nnewline`},
|
||||||
|
{`"both\n"`, `\"both\\n\"`},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
assert.Equal(t, tc.want, sanitizeLabel(tc.input), "input: %q", tc.input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLatestPerGPU_Empty(t *testing.T) {
|
||||||
|
result := latestPerGPU(nil)
|
||||||
|
assert.Empty(t, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLatestPerGPU_Single(t *testing.T) {
|
||||||
|
now := time.Now()
|
||||||
|
stats := []GpuStat{{ID: 0, Name: "gpu0", Timestamp: now}}
|
||||||
|
result := latestPerGPU(stats)
|
||||||
|
require.Len(t, result, 1)
|
||||||
|
assert.Equal(t, "gpu0", result[0].Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLatestPerGPU_PicksLatest(t *testing.T) {
|
||||||
|
earlier := time.Now().Add(-time.Second)
|
||||||
|
later := time.Now()
|
||||||
|
stats := []GpuStat{
|
||||||
|
{ID: 0, Name: "old", TempC: 50, Timestamp: earlier},
|
||||||
|
{ID: 0, Name: "new", TempC: 70, Timestamp: later},
|
||||||
|
}
|
||||||
|
result := latestPerGPU(stats)
|
||||||
|
require.Len(t, result, 1)
|
||||||
|
assert.Equal(t, "new", result[0].Name)
|
||||||
|
assert.Equal(t, 70, result[0].TempC)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLatestPerGPU_MultipleGPUsSortedByID(t *testing.T) {
|
||||||
|
now := time.Now()
|
||||||
|
stats := []GpuStat{
|
||||||
|
{ID: 2, Name: "gpu2", Timestamp: now},
|
||||||
|
{ID: 0, Name: "gpu0", Timestamp: now},
|
||||||
|
{ID: 1, Name: "gpu1", Timestamp: now},
|
||||||
|
}
|
||||||
|
result := latestPerGPU(stats)
|
||||||
|
require.Len(t, result, 3)
|
||||||
|
assert.Equal(t, 0, result[0].ID)
|
||||||
|
assert.Equal(t, 1, result[1].ID)
|
||||||
|
assert.Equal(t, 2, result[2].ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteSysMetrics(t *testing.T) {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
s := SysStat{
|
||||||
|
CpuUtilPerCore: []float64{10.5, 20.0},
|
||||||
|
MemTotalMB: 8192,
|
||||||
|
MemUsedMB: 4096,
|
||||||
|
MemFreeMB: 4096,
|
||||||
|
SwapTotalMB: 2048,
|
||||||
|
SwapUsedMB: 512,
|
||||||
|
LoadAvg1: 1.5,
|
||||||
|
LoadAvg5: 1.2,
|
||||||
|
LoadAvg15: 0.9,
|
||||||
|
NetIO: []NetIOStat{
|
||||||
|
{Name: "eth0", BytesRecv: 1000, BytesSent: 2000},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
writeSysMetrics(rec, s)
|
||||||
|
body := rec.Body.String()
|
||||||
|
|
||||||
|
assert.Contains(t, body, `llamaswap_cpu_util_percent{core="0"} 10.5`)
|
||||||
|
assert.Contains(t, body, `llamaswap_cpu_util_percent{core="1"} 20`)
|
||||||
|
assert.Contains(t, body, "llamaswap_memory_total_bytes 8589934592")
|
||||||
|
assert.Contains(t, body, "llamaswap_memory_used_bytes 4294967296")
|
||||||
|
assert.Contains(t, body, "llamaswap_memory_free_bytes 4294967296")
|
||||||
|
assert.Contains(t, body, "llamaswap_swap_total_bytes 2147483648")
|
||||||
|
assert.Contains(t, body, "llamaswap_swap_used_bytes 536870912")
|
||||||
|
assert.Contains(t, body, `llamaswap_load_average{interval="1m"} 1.5`)
|
||||||
|
assert.Contains(t, body, `llamaswap_load_average{interval="5m"} 1.2`)
|
||||||
|
assert.Contains(t, body, `llamaswap_load_average{interval="15m"} 0.9`)
|
||||||
|
assert.Contains(t, body, `llamaswap_network_bytes_total{interface="eth0",direction="recv"} 1000`)
|
||||||
|
assert.Contains(t, body, `llamaswap_network_bytes_total{interface="eth0",direction="sent"} 2000`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteSysMetrics_NoNetIO(t *testing.T) {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
writeSysMetrics(rec, SysStat{CpuUtilPerCore: []float64{5.0}})
|
||||||
|
body := rec.Body.String()
|
||||||
|
assert.NotContains(t, body, "llamaswap_network_bytes_total")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteGpuMetrics_Empty(t *testing.T) {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
writeGpuMetrics(rec, nil)
|
||||||
|
assert.Empty(t, rec.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteGpuMetrics(t *testing.T) {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
gpus := []GpuStat{
|
||||||
|
{
|
||||||
|
ID: 0,
|
||||||
|
Name: "NVIDIA RTX 4090",
|
||||||
|
UUID: "GPU-1234",
|
||||||
|
TempC: 75,
|
||||||
|
GpuUtilPct: 85.5,
|
||||||
|
MemUtilPct: 60.0,
|
||||||
|
MemUsedMB: 8192,
|
||||||
|
MemTotalMB: 24576,
|
||||||
|
FanSpeedPct: 55.0,
|
||||||
|
PowerDrawW: 300.5,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
writeGpuMetrics(rec, gpus)
|
||||||
|
body := rec.Body.String()
|
||||||
|
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_temperature_celsius{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 75`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_vram_temperature_celsius{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 0`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_util_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 85.5`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_memory_util_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 60`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_memory_used_bytes{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"}`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_memory_total_bytes{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"}`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_fan_speed_percent{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 55`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_power_draw_watts{id="0",name="NVIDIA RTX 4090",uuid="GPU-1234"} 300.5`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteGpuMetrics_VramTemp(t *testing.T) {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
gpus := []GpuStat{
|
||||||
|
{ID: 0, Name: "AMD RX 7900", UUID: "GPU-5678", TempC: 70, VramTempC: 85},
|
||||||
|
}
|
||||||
|
writeGpuMetrics(rec, gpus)
|
||||||
|
body := rec.Body.String()
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_temperature_celsius{id="0",name="AMD RX 7900",uuid="GPU-5678"} 70`)
|
||||||
|
assert.Contains(t, body, `llamaswap_gpu_vram_temperature_celsius{id="0",name="AMD RX 7900",uuid="GPU-5678"} 85`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteGpuMetrics_EmptyUUID(t *testing.T) {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
gpus := []GpuStat{{ID: 3, Name: "AMD RX 7900", UUID: ""}}
|
||||||
|
writeGpuMetrics(rec, gpus)
|
||||||
|
body := rec.Body.String()
|
||||||
|
assert.NotContains(t, body, "uuid=")
|
||||||
|
assert.Contains(t, body, `name="AMD RX 7900"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteGpuMetrics_LabelSanitization(t *testing.T) {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
gpus := []GpuStat{
|
||||||
|
{ID: 0, Name: `GPU "special"`, UUID: "uuid\nline"},
|
||||||
|
}
|
||||||
|
writeGpuMetrics(rec, gpus)
|
||||||
|
body := rec.Body.String()
|
||||||
|
assert.Contains(t, body, `name="GPU \"special\""`)
|
||||||
|
assert.Contains(t, body, `uuid="uuid\nline"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsHandler_ContentType(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
m.MetricsHandler()(rec, req)
|
||||||
|
|
||||||
|
assert.Equal(t, "text/plain; version=0.0.4; charset=utf-8", rec.Header().Get("Content-Type"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsHandler_EmptyStats(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
m.MetricsHandler()(rec, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusOK, rec.Code)
|
||||||
|
assert.Empty(t, strings.TrimSpace(rec.Body.String()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsHandler_WithSysStats(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
m.sysRing.Push(SysStat{Timestamp: time.Now(), CpuUtilPerCore: []float64{25.0}, MemTotalMB: 4096, MemUsedMB: 2048, MemFreeMB: 2048})
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
m.MetricsHandler()(rec, req)
|
||||||
|
|
||||||
|
body := rec.Body.String()
|
||||||
|
assert.Contains(t, body, "llamaswap_cpu_util_percent")
|
||||||
|
assert.Contains(t, body, "llamaswap_memory_total_bytes")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsHandler_UsesLatestSysStat(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
m.sysRing.Push(SysStat{Timestamp: now.Add(-time.Second), MemTotalMB: 1000})
|
||||||
|
m.sysRing.Push(SysStat{Timestamp: now, MemTotalMB: 8192})
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
m.MetricsHandler()(rec, req)
|
||||||
|
|
||||||
|
body := rec.Body.String()
|
||||||
|
// 8192 MB = 8589934592 bytes
|
||||||
|
assert.Contains(t, body, "llamaswap_memory_total_bytes 8589934592")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetricsHandler_WithGpuStats(t *testing.T) {
|
||||||
|
m, err := New(config.PerformanceConfig{}, newTestLogger())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
m.gpuRing.Push([]GpuStat{{ID: 0, Name: "TestGPU", UUID: "uuid-0", TempC: 65, Timestamp: time.Now()}})
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
m.MetricsHandler()(rec, req)
|
||||||
|
|
||||||
|
body := rec.Body.String()
|
||||||
|
assert.Contains(t, body, "llamaswap_gpu_temperature_celsius")
|
||||||
|
assert.Contains(t, body, `name="TestGPU"`)
|
||||||
|
}
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
package perf
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
type GpuStat struct {
|
||||||
|
Timestamp time.Time `json:"timestamp"`
|
||||||
|
|
||||||
|
ID int `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
UUID string `json:"uuid"`
|
||||||
|
TempC int `json:"temp_c"`
|
||||||
|
VramTempC int `json:"vram_temp_c"`
|
||||||
|
GpuUtilPct float64 `json:"gpu_util_pct"`
|
||||||
|
MemUtilPct float64 `json:"mem_util_pct"`
|
||||||
|
MemUsedMB int `json:"mem_used_mb"`
|
||||||
|
MemTotalMB int `json:"mem_total_mb"`
|
||||||
|
FanSpeedPct float64 `json:"fan_speed_pct"`
|
||||||
|
PowerDrawW float64 `json:"power_draw_w"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NetIOStat struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
BytesRecv uint64 `json:"bytes_recv"`
|
||||||
|
BytesSent uint64 `json:"bytes_sent"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type SysStat struct {
|
||||||
|
Timestamp time.Time `json:"timestamp"`
|
||||||
|
|
||||||
|
CpuUtilPerCore []float64 `json:"cpu_util_per_core"`
|
||||||
|
MemTotalMB int `json:"mem_total_mb"`
|
||||||
|
MemUsedMB int `json:"mem_used_mb"`
|
||||||
|
MemFreeMB int `json:"mem_free_mb"`
|
||||||
|
SwapTotalMB int `json:"swap_total_mb"`
|
||||||
|
SwapUsedMB int `json:"swap_used_mb"`
|
||||||
|
LoadAvg1 float64 `json:"load_avg_1"`
|
||||||
|
LoadAvg5 float64 `json:"load_avg_5"`
|
||||||
|
LoadAvg15 float64 `json:"load_avg_15"`
|
||||||
|
NetIO []NetIOStat `json:"net_io"`
|
||||||
|
}
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
package ring
|
||||||
|
|
||||||
|
type Buffer[T any] struct {
|
||||||
|
buf []T
|
||||||
|
head int
|
||||||
|
size int
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewBuffer[T any](capacity int) Buffer[T] {
|
||||||
|
if capacity < 1 {
|
||||||
|
capacity = 1
|
||||||
|
}
|
||||||
|
return Buffer[T]{buf: make([]T, capacity)}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push adds v, overwriting the oldest entry when the buffer is full.
|
||||||
|
func (r *Buffer[T]) Push(v T) {
|
||||||
|
cap := len(r.buf)
|
||||||
|
if r.size < cap {
|
||||||
|
r.buf[(r.head+r.size)%cap] = v
|
||||||
|
r.size++
|
||||||
|
} else {
|
||||||
|
r.buf[r.head] = v
|
||||||
|
r.head = (r.head + 1) % cap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slice returns all entries in insertion order as a new slice.
|
||||||
|
func (r *Buffer[T]) Slice() []T {
|
||||||
|
if r.size == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
cap := len(r.buf)
|
||||||
|
result := make([]T, r.size)
|
||||||
|
for i := 0; i < r.size; i++ {
|
||||||
|
result[i] = r.buf[(r.head+i)%cap]
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
package ring
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
const benchCap = 600 // matches default MaxAge/Every (1min / 100ms)
|
||||||
|
|
||||||
|
func BenchmarkBuffer_PushNoWrap(b *testing.B) {
|
||||||
|
for b.Loop() {
|
||||||
|
buf := NewBuffer[int](b.N + 1)
|
||||||
|
for i := range b.N {
|
||||||
|
buf.Push(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkBuffer_PushWrap(b *testing.B) {
|
||||||
|
buf := NewBuffer[int](benchCap)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := range b.N {
|
||||||
|
buf.Push(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkBuffer_Slice(b *testing.B) {
|
||||||
|
buf := NewBuffer[int](benchCap)
|
||||||
|
for i := range benchCap {
|
||||||
|
buf.Push(i)
|
||||||
|
}
|
||||||
|
b.ResetTimer()
|
||||||
|
for range b.N {
|
||||||
|
_ = buf.Slice()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkBuffer_PushAndSlice(b *testing.B) {
|
||||||
|
buf := NewBuffer[int](benchCap)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := range b.N {
|
||||||
|
buf.Push(i)
|
||||||
|
if i%benchCap == 0 {
|
||||||
|
_ = buf.Slice()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
package ring
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBuffer_EmptySliceIsNil(t *testing.T) {
|
||||||
|
b := NewBuffer[int](4)
|
||||||
|
assert.Nil(t, b.Slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_PushBelowCapacity(t *testing.T) {
|
||||||
|
b := NewBuffer[int](4)
|
||||||
|
b.Push(1)
|
||||||
|
b.Push(2)
|
||||||
|
assert.Equal(t, []int{1, 2}, b.Slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_PushAtCapacity(t *testing.T) {
|
||||||
|
b := NewBuffer[int](3)
|
||||||
|
b.Push(1)
|
||||||
|
b.Push(2)
|
||||||
|
b.Push(3)
|
||||||
|
assert.Equal(t, []int{1, 2, 3}, b.Slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_PushOverCapacityEvictsOldest(t *testing.T) {
|
||||||
|
b := NewBuffer[int](3)
|
||||||
|
b.Push(1)
|
||||||
|
b.Push(2)
|
||||||
|
b.Push(3)
|
||||||
|
b.Push(4)
|
||||||
|
assert.Equal(t, []int{2, 3, 4}, b.Slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_CapacityOne(t *testing.T) {
|
||||||
|
b := NewBuffer[int](1)
|
||||||
|
b.Push(1)
|
||||||
|
b.Push(2)
|
||||||
|
assert.Equal(t, []int{2}, b.Slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_ZeroCapacityDefaultsToOne(t *testing.T) {
|
||||||
|
b := NewBuffer[int](0)
|
||||||
|
b.Push(42)
|
||||||
|
assert.Equal(t, []int{42}, b.Slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_SliceReturnsCopy(t *testing.T) {
|
||||||
|
b := NewBuffer[int](4)
|
||||||
|
b.Push(10)
|
||||||
|
s := b.Slice()
|
||||||
|
s[0] = 99
|
||||||
|
assert.Equal(t, []int{10}, b.Slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_InsertionOrderPreservedAfterWrap(t *testing.T) {
|
||||||
|
b := NewBuffer[int](4)
|
||||||
|
for i := 1; i <= 8; i++ {
|
||||||
|
b.Push(i)
|
||||||
|
}
|
||||||
|
assert.Equal(t, []int{5, 6, 7, 8}, b.Slice())
|
||||||
|
}
|
||||||
+108
-78
@@ -4,19 +4,22 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/fsnotify/fsnotify"
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||||
"github.com/mostlygeek/llama-swap/proxy"
|
"github.com/mostlygeek/llama-swap/proxy"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/configwatcher"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -33,24 +36,40 @@ func main() {
|
|||||||
keyFile := flag.String("tls-key-file", "", "TLS key file")
|
keyFile := flag.String("tls-key-file", "", "TLS key file")
|
||||||
showVersion := flag.Bool("version", false, "show version of build")
|
showVersion := flag.Bool("version", false, "show version of build")
|
||||||
watchConfig := flag.Bool("watch-config", false, "Automatically reload config file on change")
|
watchConfig := flag.Bool("watch-config", false, "Automatically reload config file on change")
|
||||||
|
mainLogger := logmon.New()
|
||||||
|
|
||||||
flag.Parse() // Parse the command-line flags
|
flag.Parse() // Parse the command-line flags
|
||||||
|
|
||||||
if *showVersion {
|
if *showVersion {
|
||||||
fmt.Printf("version: %s (%s), built at %s\n", version, commit, date)
|
fmt.Printf("version: %s (%s), built at %s", version, commit, date)
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
conf, err := config.LoadConfig(*configPath)
|
conf, err := config.LoadConfig(*configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error loading config: %v\n", err)
|
mainLogger.Errorf("Error loading config: %v", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(conf.Profiles) > 0 {
|
if len(conf.Profiles) > 0 {
|
||||||
fmt.Println("WARNING: Profile functionality has been removed in favor of Groups. See the README for more information.")
|
mainLogger.Warn("Profile functionality has been removed in favor of Groups. See the README for more information.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch strings.ToLower(strings.TrimSpace(conf.LogLevel)) {
|
||||||
|
case "debug":
|
||||||
|
mainLogger.SetLogLevel(logmon.LevelDebug)
|
||||||
|
case "info":
|
||||||
|
mainLogger.SetLogLevel(logmon.LevelInfo)
|
||||||
|
case "warn":
|
||||||
|
mainLogger.SetLogLevel(logmon.LevelWarn)
|
||||||
|
case "error":
|
||||||
|
mainLogger.SetLogLevel(logmon.LevelError)
|
||||||
|
default:
|
||||||
|
mainLogger.SetLogLevel(logmon.LevelInfo)
|
||||||
|
}
|
||||||
|
|
||||||
|
mainLogger.Debugf("PID: %d", os.Getpid())
|
||||||
|
|
||||||
if mode := os.Getenv("GIN_MODE"); mode != "" {
|
if mode := os.Getenv("GIN_MODE"); mode != "" {
|
||||||
gin.SetMode(mode)
|
gin.SetMode(mode)
|
||||||
} else {
|
} else {
|
||||||
@@ -74,31 +93,66 @@ func main() {
|
|||||||
listenStr = &defaultPort
|
listenStr = &defaultPort
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var mon *perf.Monitor
|
||||||
|
if !conf.Performance.Disabled {
|
||||||
|
mon, err = perf.New(conf.Performance, mainLogger)
|
||||||
|
if err != nil {
|
||||||
|
mainLogger.Errorf("failed to create monitor: %s", err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
mon.Start()
|
||||||
|
} else {
|
||||||
|
mainLogger.Info("performance monitoring is disabled")
|
||||||
|
}
|
||||||
|
|
||||||
// Setup channels for server management
|
// Setup channels for server management
|
||||||
exitChan := make(chan struct{})
|
exitChan := make(chan struct{})
|
||||||
sigChan := make(chan os.Signal, 1)
|
sigChan := make(chan os.Signal, 1)
|
||||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)
|
||||||
|
|
||||||
// Create server with initial handler
|
// Context that bounds the lifetime of background watcher goroutines.
|
||||||
|
watcherCtx, watcherCancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
// Create server with initial handlergit
|
||||||
srv := &http.Server{
|
srv := &http.Server{
|
||||||
Addr: *listenStr,
|
Addr: *listenStr,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Support for watching config and reloading when it changes
|
// Support for watching config and reloading when it changes
|
||||||
|
reloading := false
|
||||||
|
var reloadMutex sync.Mutex
|
||||||
reloadProxyManager := func() {
|
reloadProxyManager := func() {
|
||||||
|
reloadMutex.Lock()
|
||||||
|
if reloading {
|
||||||
|
reloadMutex.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
reloading = true
|
||||||
|
reloadMutex.Unlock()
|
||||||
|
defer func() {
|
||||||
|
reloadMutex.Lock()
|
||||||
|
reloading = false
|
||||||
|
reloadMutex.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
if currentPM, ok := srv.Handler.(*proxy.ProxyManager); ok {
|
if currentPM, ok := srv.Handler.(*proxy.ProxyManager); ok {
|
||||||
|
mainLogger.Info("Reloading Configuration")
|
||||||
conf, err = config.LoadConfig(*configPath)
|
conf, err = config.LoadConfig(*configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Warning, unable to reload configuration: %v\n", err)
|
mainLogger.Warnf("Unable to reload configuration: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("Configuration Changed")
|
mainLogger.Debug("Configuration Changed")
|
||||||
currentPM.Shutdown()
|
currentPM.Shutdown()
|
||||||
|
if mon != nil {
|
||||||
|
mon.UpdateConfig(conf.Performance)
|
||||||
|
}
|
||||||
newPM := proxy.New(conf)
|
newPM := proxy.New(conf)
|
||||||
newPM.SetVersion(date, commit, version)
|
newPM.SetVersion(date, commit, version)
|
||||||
|
newPM.SetPerfMonitor(mon)
|
||||||
srv.Handler = newPM
|
srv.Handler = newPM
|
||||||
fmt.Println("Configuration Reloaded")
|
mainLogger.Debug("Configuration Reloaded")
|
||||||
|
|
||||||
// wait a few seconds and tell any UI to reload
|
// wait a few seconds and tell any UI to reload
|
||||||
time.AfterFunc(3*time.Second, func() {
|
time.AfterFunc(3*time.Second, func() {
|
||||||
@@ -109,111 +163,87 @@ func main() {
|
|||||||
} else {
|
} else {
|
||||||
conf, err = config.LoadConfig(*configPath)
|
conf, err = config.LoadConfig(*configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error, unable to load configuration: %v\n", err)
|
mainLogger.Errorf("Unable to load configuration: %v", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
newPM := proxy.New(conf)
|
newPM := proxy.New(conf)
|
||||||
newPM.SetVersion(date, commit, version)
|
newPM.SetVersion(date, commit, version)
|
||||||
|
newPM.SetPerfMonitor(mon)
|
||||||
srv.Handler = newPM
|
srv.Handler = newPM
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// load the initial proxy manager
|
// load the initial proxy manager
|
||||||
reloadProxyManager()
|
reloadProxyManager()
|
||||||
debouncedReload := debounce(time.Second, reloadProxyManager)
|
|
||||||
if *watchConfig {
|
|
||||||
defer event.On(func(e proxy.ConfigFileChangedEvent) {
|
|
||||||
if e.ReloadingState == proxy.ReloadingStateStart {
|
|
||||||
debouncedReload()
|
|
||||||
}
|
|
||||||
})()
|
|
||||||
|
|
||||||
fmt.Println("Watching Configuration for changes")
|
if *watchConfig {
|
||||||
go func() {
|
go func() {
|
||||||
absConfigPath, err := filepath.Abs(*configPath)
|
absConfigPath, err := filepath.Abs(*configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error getting absolute path for watching config file: %v\n", err)
|
mainLogger.Errorf("watch-config unable to determine absolute path for watching config file: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
watcher, err := fsnotify.NewWatcher()
|
mainLogger.Info("Watching configuration for changes (poll-based, 2s interval)")
|
||||||
if err != nil {
|
(&configwatcher.Watcher{
|
||||||
fmt.Printf("Error creating file watcher: %v. File watching disabled.\n", err)
|
Path: absConfigPath,
|
||||||
return
|
Interval: configwatcher.DefaultInterval,
|
||||||
}
|
OnChange: func() {
|
||||||
|
reloadProxyManager()
|
||||||
configDir := filepath.Dir(absConfigPath)
|
},
|
||||||
err = watcher.Add(configDir)
|
}).Run(watcherCtx)
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("Error adding config path directory (%s) to watcher: %v. File watching disabled.", configDir, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
defer watcher.Close()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case changeEvent := <-watcher.Events:
|
|
||||||
if changeEvent.Name == absConfigPath && (changeEvent.Has(fsnotify.Write) || changeEvent.Has(fsnotify.Create) || changeEvent.Has(fsnotify.Remove)) {
|
|
||||||
event.Emit(proxy.ConfigFileChangedEvent{
|
|
||||||
ReloadingState: proxy.ReloadingStateStart,
|
|
||||||
})
|
|
||||||
} else if changeEvent.Name == filepath.Join(configDir, "..data") && changeEvent.Has(fsnotify.Create) {
|
|
||||||
// the change for k8s configmap
|
|
||||||
event.Emit(proxy.ConfigFileChangedEvent{
|
|
||||||
ReloadingState: proxy.ReloadingStateStart,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
case err := <-watcher.Errors:
|
|
||||||
log.Printf("File watcher error: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
// shutdown on signal
|
// Signal handling
|
||||||
go func() {
|
go func() {
|
||||||
sig := <-sigChan
|
for {
|
||||||
fmt.Printf("Received signal %v, shutting down...\n", sig)
|
sig := <-sigChan
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
switch sig {
|
||||||
defer cancel()
|
case syscall.SIGHUP:
|
||||||
|
mainLogger.Debug("Received SIGHUP")
|
||||||
|
reloadProxyManager()
|
||||||
|
case syscall.SIGINT, syscall.SIGTERM:
|
||||||
|
mainLogger.Debugf("Received signal %v, shutting down...", sig)
|
||||||
|
if mon != nil {
|
||||||
|
mon.Stop()
|
||||||
|
}
|
||||||
|
watcherCancel()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
if pm, ok := srv.Handler.(*proxy.ProxyManager); ok {
|
if pm, ok := srv.Handler.(*proxy.ProxyManager); ok {
|
||||||
pm.Shutdown()
|
pm.Shutdown()
|
||||||
} else {
|
} else {
|
||||||
fmt.Println("srv.Handler is not of type *proxy.ProxyManager")
|
mainLogger.Error("srv.Handler is not of type *proxy.ProxyManager")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := srv.Shutdown(ctx); err != nil {
|
if err := srv.Shutdown(ctx); err != nil {
|
||||||
fmt.Printf("Server shutdown error: %v\n", err)
|
mainLogger.Errorf("Server shutdown: %v", err)
|
||||||
|
}
|
||||||
|
close(exitChan)
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
// do nothing on other signals
|
||||||
|
}
|
||||||
}
|
}
|
||||||
close(exitChan)
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Start server
|
// Start server
|
||||||
go func() {
|
go func() {
|
||||||
var err error
|
var err error
|
||||||
if useTLS {
|
if useTLS {
|
||||||
fmt.Printf("llama-swap listening with TLS on https://%s\n", *listenStr)
|
mainLogger.Infof("llama-swap listening with TLS on https://%s", *listenStr)
|
||||||
err = srv.ListenAndServeTLS(*certFile, *keyFile)
|
err = srv.ListenAndServeTLS(*certFile, *keyFile)
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("llama-swap listening on http://%s\n", *listenStr)
|
mainLogger.Infof("llama-swap listening on http://%s", *listenStr)
|
||||||
err = srv.ListenAndServe()
|
err = srv.ListenAndServe()
|
||||||
}
|
}
|
||||||
if err != nil && err != http.ErrServerClosed {
|
if err != nil && err != http.ErrServerClosed {
|
||||||
log.Fatalf("Fatal server error: %v\n", err)
|
mainLogger.Errorf("Fatal server error: %v", err)
|
||||||
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Wait for exit signal
|
// Wait for exit signal
|
||||||
<-exitChan
|
<-exitChan
|
||||||
}
|
}
|
||||||
|
|
||||||
func debounce(interval time.Duration, f func()) func() {
|
|
||||||
var timer *time.Timer
|
|
||||||
return func() {
|
|
||||||
if timer != nil {
|
|
||||||
timer.Stop()
|
|
||||||
}
|
|
||||||
timer = time.AfterFunc(interval, f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Vendored
+102
@@ -0,0 +1,102 @@
|
|||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrExceedsMaxSize = errors.New("item exceeds maximum cache size")
|
||||||
|
ErrNotFound = errors.New("item not found")
|
||||||
|
)
|
||||||
|
|
||||||
|
type Cache struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
items map[int][]byte
|
||||||
|
order []int
|
||||||
|
size int
|
||||||
|
maxSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(maxBytes int) *Cache {
|
||||||
|
return &Cache{
|
||||||
|
items: make(map[int][]byte),
|
||||||
|
order: make([]int, 0),
|
||||||
|
maxSize: maxBytes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cache) Add(id int, data []byte) error {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
dataSize := len(data)
|
||||||
|
if dataSize > c.maxSize {
|
||||||
|
return ErrExceedsMaxSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// If key already exists, remove old entry from size and order
|
||||||
|
if old, exists := c.items[id]; exists {
|
||||||
|
c.size -= len(old)
|
||||||
|
c.removeOrder(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Evict oldest (FIFO) until room available
|
||||||
|
for c.size+dataSize > c.maxSize && len(c.order) > 0 {
|
||||||
|
oldestID := c.order[0]
|
||||||
|
c.order = c.order[1:]
|
||||||
|
if evicted, exists := c.items[oldestID]; exists {
|
||||||
|
c.size -= len(evicted)
|
||||||
|
delete(c.items, oldestID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c.items[id] = data
|
||||||
|
c.order = append(c.order, id)
|
||||||
|
c.size += dataSize
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cache) removeOrder(id int) {
|
||||||
|
for i, v := range c.order {
|
||||||
|
if v == id {
|
||||||
|
c.order = append(c.order[:i], c.order[i+1:]...)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cache) Get(id int) ([]byte, error) {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
data, exists := c.items[id]
|
||||||
|
if !exists {
|
||||||
|
return nil, ErrNotFound
|
||||||
|
}
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cache) Has(id int) bool {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
_, exists := c.items[id]
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cache) Size() int {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
return c.size
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cache) Clear() {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
c.items = make(map[int][]byte)
|
||||||
|
c.order = c.order[:0]
|
||||||
|
c.size = 0
|
||||||
|
}
|
||||||
Vendored
+130
@@ -0,0 +1,130 @@
|
|||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCache_Add(t *testing.T) {
|
||||||
|
t.Run("adds and retrieves item", func(t *testing.T) {
|
||||||
|
c := New(1024)
|
||||||
|
data := []byte("hello")
|
||||||
|
require.NoError(t, c.Add(1, data))
|
||||||
|
|
||||||
|
got, err := c.Get(1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, data, got)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("returns error for oversized item", func(t *testing.T) {
|
||||||
|
c := New(10)
|
||||||
|
err := c.Add(1, make([]byte, 20))
|
||||||
|
assert.ErrorIs(t, err, ErrExceedsMaxSize)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("evicts oldest items to make room", func(t *testing.T) {
|
||||||
|
c := New(100)
|
||||||
|
|
||||||
|
require.NoError(t, c.Add(1, make([]byte, 40)))
|
||||||
|
require.NoError(t, c.Add(2, make([]byte, 40)))
|
||||||
|
// Adding item 3 should evict item 1
|
||||||
|
require.NoError(t, c.Add(3, make([]byte, 40)))
|
||||||
|
|
||||||
|
assert.False(t, c.Has(1))
|
||||||
|
assert.True(t, c.Has(2))
|
||||||
|
assert.True(t, c.Has(3))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("overwrites existing key", func(t *testing.T) {
|
||||||
|
c := New(100)
|
||||||
|
require.NoError(t, c.Add(1, []byte("old")))
|
||||||
|
require.NoError(t, c.Add(1, []byte("new")))
|
||||||
|
|
||||||
|
got, err := c.Get(1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, []byte("new"), got)
|
||||||
|
assert.Equal(t, 3, c.Size())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_Get(t *testing.T) {
|
||||||
|
t.Run("returns ErrNotFound for missing key", func(t *testing.T) {
|
||||||
|
c := New(100)
|
||||||
|
_, err := c.Get(99)
|
||||||
|
assert.ErrorIs(t, err, ErrNotFound)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_Has(t *testing.T) {
|
||||||
|
t.Run("returns true for existing key", func(t *testing.T) {
|
||||||
|
c := New(100)
|
||||||
|
require.NoError(t, c.Add(1, []byte("data")))
|
||||||
|
assert.True(t, c.Has(1))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("returns false for missing key", func(t *testing.T) {
|
||||||
|
c := New(100)
|
||||||
|
assert.False(t, c.Has(1))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_Size(t *testing.T) {
|
||||||
|
t.Run("tracks byte usage", func(t *testing.T) {
|
||||||
|
c := New(1000)
|
||||||
|
assert.Equal(t, 0, c.Size())
|
||||||
|
|
||||||
|
require.NoError(t, c.Add(1, make([]byte, 100)))
|
||||||
|
assert.Equal(t, 100, c.Size())
|
||||||
|
|
||||||
|
require.NoError(t, c.Add(2, make([]byte, 200)))
|
||||||
|
assert.Equal(t, 300, c.Size())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("updates on eviction", func(t *testing.T) {
|
||||||
|
c := New(150)
|
||||||
|
require.NoError(t, c.Add(1, make([]byte, 100)))
|
||||||
|
require.NoError(t, c.Add(2, make([]byte, 100)))
|
||||||
|
|
||||||
|
// Item 1 should be evicted, size = 100
|
||||||
|
assert.Equal(t, 100, c.Size())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_Clear(t *testing.T) {
|
||||||
|
t.Run("removes all items and resets size", func(t *testing.T) {
|
||||||
|
c := New(1000)
|
||||||
|
require.NoError(t, c.Add(1, []byte("a")))
|
||||||
|
require.NoError(t, c.Add(2, []byte("b")))
|
||||||
|
|
||||||
|
c.Clear()
|
||||||
|
|
||||||
|
assert.Equal(t, 0, c.Size())
|
||||||
|
assert.False(t, c.Has(1))
|
||||||
|
assert.False(t, c.Has(2))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCache_Concurrent(t *testing.T) {
|
||||||
|
t.Run("concurrent operations are safe", func(t *testing.T) {
|
||||||
|
c := New(10000)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(id int) {
|
||||||
|
defer wg.Done()
|
||||||
|
for j := 0; j < 100; j++ {
|
||||||
|
key := id*100 + j
|
||||||
|
_ = c.Add(key, []byte("data"))
|
||||||
|
_, _ = c.Get(key)
|
||||||
|
_ = c.Has(key)
|
||||||
|
_ = c.Size()
|
||||||
|
}
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
})
|
||||||
|
}
|
||||||
+10
-3
@@ -9,6 +9,7 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/billziss-gh/golib/shlex"
|
"github.com/billziss-gh/golib/shlex"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
@@ -124,6 +125,7 @@ type Config struct {
|
|||||||
LogToStdout string `yaml:"logToStdout"`
|
LogToStdout string `yaml:"logToStdout"`
|
||||||
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
|
||||||
CaptureBuffer int `yaml:"captureBuffer"`
|
CaptureBuffer int `yaml:"captureBuffer"`
|
||||||
|
Performance PerformanceConfig `yaml:"performance"`
|
||||||
GlobalTTL int `yaml:"globalTTL"`
|
GlobalTTL int `yaml:"globalTTL"`
|
||||||
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
|
||||||
Profiles map[string][]string `yaml:"profiles"`
|
Profiles map[string][]string `yaml:"profiles"`
|
||||||
@@ -220,6 +222,14 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
config.HealthCheckTimeout = 15
|
config.HealthCheckTimeout = 15
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply defaults for performance config when section is missing
|
||||||
|
if config.Performance.Every == 0 {
|
||||||
|
config.Performance.Every = 5 * time.Second
|
||||||
|
}
|
||||||
|
if err = config.Performance.Validate(); err != nil {
|
||||||
|
return Config{}, fmt.Errorf("performance: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
if config.StartPort < 1 {
|
if config.StartPort < 1 {
|
||||||
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
return Config{}, fmt.Errorf("startPort must be greater than 1")
|
||||||
}
|
}
|
||||||
@@ -646,9 +656,6 @@ func validateMacro(name string, value any) error {
|
|||||||
// Validate that value is a scalar type
|
// Validate that value is a scalar type
|
||||||
switch v := value.(type) {
|
switch v := value.(type) {
|
||||||
case string:
|
case string:
|
||||||
if len(v) >= 1024 {
|
|
||||||
return fmt.Errorf("macro value for '%s' exceeds maximum length of 1024 characters", name)
|
|
||||||
}
|
|
||||||
// Check for self-reference
|
// Check for self-reference
|
||||||
macroSlug := fmt.Sprintf("${%s}", name)
|
macroSlug := fmt.Sprintf("${%s}", name)
|
||||||
if strings.Contains(v, macroSlug) {
|
if strings.Contains(v, macroSlug) {
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
@@ -229,6 +230,9 @@ groups:
|
|||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
CaptureBuffer: 5,
|
CaptureBuffer: 5,
|
||||||
|
Performance: PerformanceConfig{
|
||||||
|
Every: 5 * time.Second,
|
||||||
|
},
|
||||||
Profiles: map[string][]string{
|
Profiles: map[string][]string{
|
||||||
"test": {"model1", "model2"},
|
"test": {"model1", "model2"},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
@@ -218,6 +219,9 @@ groups:
|
|||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
MetricsMaxInMemory: 1000,
|
MetricsMaxInMemory: 1000,
|
||||||
CaptureBuffer: 5,
|
CaptureBuffer: 5,
|
||||||
|
Performance: PerformanceConfig{
|
||||||
|
Every: 5 * time.Second,
|
||||||
|
},
|
||||||
Profiles: map[string][]string{
|
Profiles: map[string][]string{
|
||||||
"test": {"model1", "model2"},
|
"test": {"model1", "model2"},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PerformanceConfig holds configuration for system performance monitoring
|
||||||
|
type PerformanceConfig struct {
|
||||||
|
Disabled bool `yaml:"disabled"`
|
||||||
|
Every time.Duration `yaml:"every"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *PerformanceConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
|
type rawPerformanceConfig PerformanceConfig
|
||||||
|
defaults := rawPerformanceConfig{
|
||||||
|
Every: 5 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := unmarshal(&defaults); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
*p = PerformanceConfig(defaults)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate checks the PerformanceConfig values and returns an error if invalid
|
||||||
|
func (p *PerformanceConfig) Validate() error {
|
||||||
|
if p.Every < 5*time.Second {
|
||||||
|
return fmt.Errorf("every must be at least 5s, got %v", p.Every)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,98 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPerformanceConfig_Defaults(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// When performance section is missing, defaults should be applied
|
||||||
|
assert.False(t, config.Performance.Disabled)
|
||||||
|
assert.Equal(t, 5*time.Second, config.Performance.Every)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPerformanceConfig_CustomValues(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
performance:
|
||||||
|
enable: true
|
||||||
|
every: 30s
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.False(t, config.Performance.Disabled)
|
||||||
|
assert.Equal(t, 30*time.Second, config.Performance.Every)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPerformanceConfig_Disabled(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
performance:
|
||||||
|
disabled: true
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.True(t, config.Performance.Disabled)
|
||||||
|
// Duration defaults should still apply
|
||||||
|
assert.Equal(t, 5*time.Second, config.Performance.Every)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPerformanceConfig_PartialValues(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
performance:
|
||||||
|
every: 10s
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// enable should default to true
|
||||||
|
assert.False(t, config.Performance.Disabled)
|
||||||
|
assert.Equal(t, 10*time.Second, config.Performance.Every)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPerformanceConfig_InvalidEvery(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
performance:
|
||||||
|
every: 4s
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
_, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "every must be at least 5s")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPerformanceConfig_ComplexDurations(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
performance:
|
||||||
|
every: 1m30s
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: path/to/cmd --port ${PORT}
|
||||||
|
`
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, 90*time.Second, config.Performance.Every)
|
||||||
|
}
|
||||||
@@ -0,0 +1,85 @@
|
|||||||
|
// Package configwatcher provides a simple cross-platform file watcher based
|
||||||
|
// on os.Stat polling. It works correctly inside Docker containers where the
|
||||||
|
// config file is bind-mounted as an individual file, and for k8s ConfigMap
|
||||||
|
// projections (which present the file as a symlink to an atomically swapped
|
||||||
|
// target) — both cases where inotify-based watchers are unreliable.
|
||||||
|
package configwatcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"io/fs"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const DefaultInterval = 2 * time.Second
|
||||||
|
|
||||||
|
type Watcher struct {
|
||||||
|
Path string
|
||||||
|
Interval time.Duration
|
||||||
|
OnChange func()
|
||||||
|
}
|
||||||
|
|
||||||
|
type snapshot struct {
|
||||||
|
exists bool
|
||||||
|
modTime time.Time
|
||||||
|
size int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run blocks until ctx is canceled. It polls Path on Interval and invokes
|
||||||
|
// OnChange whenever the file's modification time or size changes, or when
|
||||||
|
// the file reappears after being missing. The baseline poll establishes
|
||||||
|
// initial state and does not fire OnChange.
|
||||||
|
func (w *Watcher) Run(ctx context.Context) {
|
||||||
|
interval := w.Interval
|
||||||
|
if interval <= 0 {
|
||||||
|
interval = DefaultInterval
|
||||||
|
}
|
||||||
|
|
||||||
|
prev := stat(w.Path)
|
||||||
|
|
||||||
|
ticker := time.NewTicker(interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
cur := stat(w.Path)
|
||||||
|
if changed(prev, cur) && w.OnChange != nil {
|
||||||
|
w.OnChange()
|
||||||
|
}
|
||||||
|
prev = cur
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stat(path string) snapshot {
|
||||||
|
fi, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
if !errors.Is(err, fs.ErrNotExist) {
|
||||||
|
log.Printf("configwatcher: stat %s: %v", path, err)
|
||||||
|
}
|
||||||
|
return snapshot{}
|
||||||
|
}
|
||||||
|
return snapshot{
|
||||||
|
exists: true,
|
||||||
|
modTime: fi.ModTime(),
|
||||||
|
size: fi.Size(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func changed(prev, cur snapshot) bool {
|
||||||
|
// Present → missing: stay quiet (likely a transient rename-style write).
|
||||||
|
// Missing → present: fire so we reload as soon as the file comes back.
|
||||||
|
if !cur.exists {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !prev.exists {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return !prev.modTime.Equal(cur.modTime) || prev.size != cur.size
|
||||||
|
}
|
||||||
@@ -0,0 +1,191 @@
|
|||||||
|
package configwatcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
const testInterval = 25 * time.Millisecond
|
||||||
|
|
||||||
|
// startWatcher launches w.Run in a goroutine and returns a function that
|
||||||
|
// cancels the context and waits for Run to return.
|
||||||
|
func startWatcher(t *testing.T, w *Watcher) func() {
|
||||||
|
t.Helper()
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
w.Run(ctx)
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
return func() {
|
||||||
|
cancel()
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatal("watcher did not stop within 2s of cancel")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// waitForCount blocks until counter reaches want or timeout elapses.
|
||||||
|
func waitForCount(t *testing.T, counter *int64, want int64, timeout time.Duration) bool {
|
||||||
|
t.Helper()
|
||||||
|
deadline := time.Now().Add(timeout)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
if atomic.LoadInt64(counter) >= want {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
time.Sleep(5 * time.Millisecond)
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWatcher_NoFireOnBaseline(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||||
|
|
||||||
|
var n int64
|
||||||
|
stop := startWatcher(t, &Watcher{
|
||||||
|
Path: path,
|
||||||
|
Interval: testInterval,
|
||||||
|
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||||
|
})
|
||||||
|
defer stop()
|
||||||
|
|
||||||
|
time.Sleep(testInterval * 5)
|
||||||
|
require.Equal(t, int64(0), atomic.LoadInt64(&n), "baseline poll must not fire")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWatcher_DetectsModTimeChange(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||||
|
|
||||||
|
// Force a known baseline mtime.
|
||||||
|
base := time.Now().Add(-1 * time.Hour).Truncate(time.Second)
|
||||||
|
require.NoError(t, os.Chtimes(path, base, base))
|
||||||
|
|
||||||
|
var n int64
|
||||||
|
stop := startWatcher(t, &Watcher{
|
||||||
|
Path: path,
|
||||||
|
Interval: testInterval,
|
||||||
|
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||||
|
})
|
||||||
|
defer stop()
|
||||||
|
|
||||||
|
// Let the baseline settle.
|
||||||
|
time.Sleep(testInterval * 2)
|
||||||
|
|
||||||
|
// Bump mtime well above the baseline so low-resolution filesystems still notice.
|
||||||
|
require.NoError(t, os.Chtimes(path, base.Add(10*time.Second), base.Add(10*time.Second)))
|
||||||
|
|
||||||
|
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire after mtime change")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWatcher_DetectsSizeChangeWithSameModTime(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||||
|
|
||||||
|
fi, err := os.Stat(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
originalMtime := fi.ModTime()
|
||||||
|
|
||||||
|
var n int64
|
||||||
|
stop := startWatcher(t, &Watcher{
|
||||||
|
Path: path,
|
||||||
|
Interval: testInterval,
|
||||||
|
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||||
|
})
|
||||||
|
defer stop()
|
||||||
|
time.Sleep(testInterval * 2)
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("aaaaa"), 0o644))
|
||||||
|
// Reset mtime back to the original so size is the only signal.
|
||||||
|
require.NoError(t, os.Chtimes(path, originalMtime, originalMtime))
|
||||||
|
|
||||||
|
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire on size change")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWatcher_SymlinkTargetSwap(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
targetA := filepath.Join(dir, "targetA")
|
||||||
|
targetB := filepath.Join(dir, "targetB")
|
||||||
|
link := filepath.Join(dir, "config.yaml")
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(targetA, []byte("AAAA"), 0o644))
|
||||||
|
require.NoError(t, os.WriteFile(targetB, []byte("BBBBBBBB"), 0o644))
|
||||||
|
|
||||||
|
if err := os.Symlink(targetA, link); err != nil {
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
t.Skipf("symlink creation requires privilege on Windows: %v", err)
|
||||||
|
}
|
||||||
|
t.Fatalf("os.Symlink: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var n int64
|
||||||
|
stop := startWatcher(t, &Watcher{
|
||||||
|
Path: link,
|
||||||
|
Interval: testInterval,
|
||||||
|
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||||
|
})
|
||||||
|
defer stop()
|
||||||
|
time.Sleep(testInterval * 2)
|
||||||
|
|
||||||
|
// Atomic symlink swap (k8s ConfigMap pattern): create new symlink at a
|
||||||
|
// temp name, then rename over the existing one.
|
||||||
|
tmpLink := filepath.Join(dir, "config.yaml.tmp")
|
||||||
|
require.NoError(t, os.Symlink(targetB, tmpLink))
|
||||||
|
require.NoError(t, os.Rename(tmpLink, link))
|
||||||
|
|
||||||
|
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire after symlink target swap")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWatcher_FileMissingThenReturns(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||||
|
|
||||||
|
var n int64
|
||||||
|
stop := startWatcher(t, &Watcher{
|
||||||
|
Path: path,
|
||||||
|
Interval: testInterval,
|
||||||
|
OnChange: func() { atomic.AddInt64(&n, 1) },
|
||||||
|
})
|
||||||
|
defer stop()
|
||||||
|
time.Sleep(testInterval * 2)
|
||||||
|
|
||||||
|
require.NoError(t, os.Remove(path))
|
||||||
|
time.Sleep(testInterval * 3)
|
||||||
|
require.Equal(t, int64(0), atomic.LoadInt64(&n), "removal alone must not fire")
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("b"), 0o644))
|
||||||
|
require.True(t, waitForCount(t, &n, 1, time.Second), "callback should fire when file returns")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWatcher_ContextCancelStopsRun(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a"), 0o644))
|
||||||
|
|
||||||
|
w := &Watcher{Path: path, Interval: testInterval}
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() { w.Run(ctx); close(done) }()
|
||||||
|
|
||||||
|
time.Sleep(testInterval * 2)
|
||||||
|
cancel()
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatal("Run did not return within 2s of cancel")
|
||||||
|
}
|
||||||
|
}
|
||||||
+1
-10
@@ -5,8 +5,7 @@ package proxy
|
|||||||
const ProcessStateChangeEventID = 0x01
|
const ProcessStateChangeEventID = 0x01
|
||||||
const ChatCompletionStatsEventID = 0x02
|
const ChatCompletionStatsEventID = 0x02
|
||||||
const ConfigFileChangedEventID = 0x03
|
const ConfigFileChangedEventID = 0x03
|
||||||
const LogDataEventID = 0x04
|
const ActivityLogEventID = 0x05
|
||||||
const TokenMetricsEventID = 0x05
|
|
||||||
const ModelPreloadedEventID = 0x06
|
const ModelPreloadedEventID = 0x06
|
||||||
const InFlightRequestsEventID = 0x07
|
const InFlightRequestsEventID = 0x07
|
||||||
|
|
||||||
@@ -43,14 +42,6 @@ func (e ConfigFileChangedEvent) Type() uint32 {
|
|||||||
return ConfigFileChangedEventID
|
return ConfigFileChangedEventID
|
||||||
}
|
}
|
||||||
|
|
||||||
type LogDataEvent struct {
|
|
||||||
Data []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e LogDataEvent) Type() uint32 {
|
|
||||||
return LogDataEventID
|
|
||||||
}
|
|
||||||
|
|
||||||
type ModelPreloadedEvent struct {
|
type ModelPreloadedEvent struct {
|
||||||
ModelName string
|
ModelName string
|
||||||
Success bool
|
Success bool
|
||||||
|
|||||||
+37
-25
@@ -8,12 +8,14 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
@@ -23,7 +25,7 @@ import (
|
|||||||
var (
|
var (
|
||||||
nextTestPort int = 12000
|
nextTestPort int = 12000
|
||||||
portMutex sync.Mutex
|
portMutex sync.Mutex
|
||||||
testLogger = NewLogMonitorWriter(os.Stdout)
|
testLogger = logmon.NewWriter(os.Stdout)
|
||||||
simpleResponderPath = getSimpleResponderPath()
|
simpleResponderPath = getSimpleResponderPath()
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -39,13 +41,13 @@ func TestMain(m *testing.M) {
|
|||||||
|
|
||||||
switch os.Getenv("LOG_LEVEL") {
|
switch os.Getenv("LOG_LEVEL") {
|
||||||
case "debug":
|
case "debug":
|
||||||
testLogger.SetLogLevel(LevelDebug)
|
testLogger.SetLogLevel(logmon.LevelDebug)
|
||||||
case "warn":
|
case "warn":
|
||||||
testLogger.SetLogLevel(LevelWarn)
|
testLogger.SetLogLevel(logmon.LevelWarn)
|
||||||
case "info":
|
case "info":
|
||||||
testLogger.SetLogLevel(LevelInfo)
|
testLogger.SetLogLevel(logmon.LevelInfo)
|
||||||
default:
|
default:
|
||||||
testLogger.SetLogLevel(LevelWarn)
|
testLogger.SetLogLevel(logmon.LevelWarn)
|
||||||
}
|
}
|
||||||
|
|
||||||
m.Run()
|
m.Run()
|
||||||
@@ -125,6 +127,22 @@ func injectTestHandlers(pm *ProxyManager, modelResponses map[string]string) {
|
|||||||
// newTestHandler returns an http.Handler that mimics simple-responder's API.
|
// newTestHandler returns an http.Handler that mimics simple-responder's API.
|
||||||
// It supports the endpoints that routing tests depend on, without launching
|
// It supports the endpoints that routing tests depend on, without launching
|
||||||
// any subprocess or binding any port.
|
// any subprocess or binding any port.
|
||||||
|
func respondJSON(w http.ResponseWriter, respond string, bodyBytes []byte) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"responseMessage": respond,
|
||||||
|
"h_content_length": strconv.Itoa(len(bodyBytes)),
|
||||||
|
"request_body": string(bodyBytes),
|
||||||
|
"usage": map[string]any{
|
||||||
|
"completion_tokens": 10, "prompt_tokens": 25, "total_tokens": 35,
|
||||||
|
},
|
||||||
|
"timings": map[string]any{
|
||||||
|
"prompt_n": 25, "prompt_ms": 13, "predicted_n": 10,
|
||||||
|
"predicted_ms": 17, "predicted_per_second": 10,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func newTestHandler(respond string) http.Handler {
|
func newTestHandler(respond string) http.Handler {
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
@@ -170,19 +188,7 @@ func newTestHandler(respond string) http.Handler {
|
|||||||
fmt.Fprintf(w, "event: message\ndata: [DONE]\n\n")
|
fmt.Fprintf(w, "event: message\ndata: [DONE]\n\n")
|
||||||
flusher.Flush()
|
flusher.Flush()
|
||||||
} else {
|
} else {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
respondJSON(w, respond, bodyBytes)
|
||||||
json.NewEncoder(w).Encode(map[string]any{
|
|
||||||
"responseMessage": respond,
|
|
||||||
"h_content_length": r.Header.Get("Content-Length"),
|
|
||||||
"request_body": string(bodyBytes),
|
|
||||||
"usage": map[string]any{
|
|
||||||
"completion_tokens": 10, "prompt_tokens": 25, "total_tokens": 35,
|
|
||||||
},
|
|
||||||
"timings": map[string]any{
|
|
||||||
"prompt_n": 25, "prompt_ms": 13, "predicted_n": 10,
|
|
||||||
"predicted_ms": 17, "predicted_per_second": 10,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -198,15 +204,21 @@ func newTestHandler(respond string) http.Handler {
|
|||||||
})
|
})
|
||||||
|
|
||||||
mux.HandleFunc("/v1/completions", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/v1/completions", func(w http.ResponseWriter, r *http.Request) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
bodyBytes, _ := io.ReadAll(r.Body)
|
||||||
json.NewEncoder(w).Encode(map[string]any{
|
respondJSON(w, respond, bodyBytes)
|
||||||
"responseMessage": respond,
|
|
||||||
"usage": map[string]any{
|
|
||||||
"completion_tokens": 10, "prompt_tokens": 25, "total_tokens": 35,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
for _, path := range []string{
|
||||||
|
"/chat/completions", "/completions",
|
||||||
|
"/responses", "/messages", "/messages/count_tokens",
|
||||||
|
"/embeddings", "/rerank", "/reranking",
|
||||||
|
} {
|
||||||
|
mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
bodyBytes, _ := io.ReadAll(r.Body)
|
||||||
|
respondJSON(w, respond, bodyBytes)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
mux.HandleFunc("/completion", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/completion", func(w http.ResponseWriter, r *http.Request) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
json.NewEncoder(w).Encode(map[string]any{
|
json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
|||||||
+5
-4
@@ -7,6 +7,7 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -145,8 +146,8 @@ type Matrix struct {
|
|||||||
solver *MatrixSolver
|
solver *MatrixSolver
|
||||||
processes map[string]*Process // all processes keyed by real model name
|
processes map[string]*Process // all processes keyed by real model name
|
||||||
config config.Config
|
config config.Config
|
||||||
proxyLogger *LogMonitor
|
proxyLogger *logmon.Monitor
|
||||||
upstreamLogger *LogMonitor
|
upstreamLogger *logmon.Monitor
|
||||||
|
|
||||||
// inflight tracks ProxyRequest calls that have released m.Lock but may
|
// inflight tracks ProxyRequest calls that have released m.Lock but may
|
||||||
// not yet have incremented Process.inFlightRequests. A concurrent
|
// not yet have incremented Process.inFlightRequests. A concurrent
|
||||||
@@ -165,10 +166,10 @@ type Matrix struct {
|
|||||||
|
|
||||||
// NewMatrix creates a Matrix from config. It creates a Process for every
|
// NewMatrix creates a Matrix from config. It creates a Process for every
|
||||||
// model defined in the config (any model can run alone even if not in a set).
|
// model defined in the config (any model can run alone even if not in a set).
|
||||||
func NewMatrix(cfg config.Config, proxyLogger, upstreamLogger *LogMonitor) *Matrix {
|
func NewMatrix(cfg config.Config, proxyLogger, upstreamLogger *logmon.Monitor) *Matrix {
|
||||||
processes := make(map[string]*Process)
|
processes := make(map[string]*Process)
|
||||||
for modelID, modelConfig := range cfg.Models {
|
for modelID, modelConfig := range cfg.Models {
|
||||||
processLogger := NewLogMonitorWriter(upstreamLogger)
|
processLogger := logmon.NewWriter(upstreamLogger)
|
||||||
process := NewProcess(modelID, cfg.HealthCheckTimeout, modelConfig, processLogger, proxyLogger)
|
process := NewProcess(modelID, cfg.HealthCheckTimeout, modelConfig, processLogger, proxyLogger)
|
||||||
processes[modelID] = process
|
processes[modelID] = process
|
||||||
}
|
}
|
||||||
|
|||||||
+316
-219
@@ -12,9 +12,13 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/fxamacker/cbor/v2"
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/klauspost/compress/zstd"
|
"github.com/klauspost/compress/zstd"
|
||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/ring"
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/cache"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -42,37 +46,53 @@ var zstdDecPool = &sync.Pool{
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// compressCapture marshals a ReqRespCapture to JSON and compresses it with zstd.
|
// compressCapture marshals a ReqRespCapture to CBOR and compresses it with zstd.
|
||||||
// Returns compressed bytes and the original JSON byte count for logging.
|
// Returns compressed bytes and the original CBOR byte count for logging.
|
||||||
func compressCapture(c *ReqRespCapture) ([]byte, int, error) {
|
func compressCapture(c *ReqRespCapture) ([]byte, int, error) {
|
||||||
jsonBytes, err := json.Marshal(c)
|
cborBytes, err := cbor.Marshal(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, 0, fmt.Errorf("marshal capture: %w", err)
|
return nil, 0, fmt.Errorf("marshal capture: %w", err)
|
||||||
}
|
}
|
||||||
enc := zstdEncPool.Get().(*zstd.Encoder)
|
zenc := zstdEncPool.Get().(*zstd.Encoder)
|
||||||
defer zstdEncPool.Put(enc)
|
defer zstdEncPool.Put(zenc)
|
||||||
return enc.EncodeAll(jsonBytes, nil), len(jsonBytes), nil
|
return zenc.EncodeAll(cborBytes, nil), len(cborBytes), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// decompressCapture decompresses zstd-compressed JSON and returns it.
|
// decompressCapture decompresses zstd-compressed CBOR and unmarshals it into a ReqRespCapture.
|
||||||
func decompressCapture(data []byte) ([]byte, error) {
|
func decompressCapture(data []byte) (*ReqRespCapture, error) {
|
||||||
dec := zstdDecPool.Get().(*zstd.Decoder)
|
dec := zstdDecPool.Get().(*zstd.Decoder)
|
||||||
defer zstdDecPool.Put(dec)
|
defer zstdDecPool.Put(dec)
|
||||||
return dec.DecodeAll(data, nil)
|
cborBytes, err := dec.DecodeAll(data, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("decompress capture: %w", err)
|
||||||
|
}
|
||||||
|
var capture ReqRespCapture
|
||||||
|
if err := cbor.Unmarshal(cborBytes, &capture); err != nil {
|
||||||
|
return nil, fmt.Errorf("unmarshal capture: %w", err)
|
||||||
|
}
|
||||||
|
return &capture, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TokenMetrics represents parsed token statistics from llama-server logs
|
// TokenMetrics holds token usage and performance metrics
|
||||||
type TokenMetrics struct {
|
type TokenMetrics struct {
|
||||||
ID int `json:"id"`
|
CachedTokens int `json:"cache_tokens"`
|
||||||
Timestamp time.Time `json:"timestamp"`
|
InputTokens int `json:"input_tokens"`
|
||||||
Model string `json:"model"`
|
OutputTokens int `json:"output_tokens"`
|
||||||
CachedTokens int `json:"cache_tokens"`
|
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||||
InputTokens int `json:"input_tokens"`
|
TokensPerSecond float64 `json:"tokens_per_second"`
|
||||||
OutputTokens int `json:"output_tokens"`
|
}
|
||||||
PromptPerSecond float64 `json:"prompt_per_second"`
|
|
||||||
TokensPerSecond float64 `json:"tokens_per_second"`
|
// ActivityLogEntry represents parsed token statistics from llama-server logs
|
||||||
DurationMs int `json:"duration_ms"`
|
type ActivityLogEntry struct {
|
||||||
HasCapture bool `json:"has_capture"`
|
ID int `json:"id"`
|
||||||
|
Timestamp time.Time `json:"timestamp"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
ReqPath string `json:"req_path"`
|
||||||
|
RespContentType string `json:"resp_content_type"`
|
||||||
|
RespStatusCode int `json:"resp_status_code"`
|
||||||
|
Tokens TokenMetrics `json:"tokens"`
|
||||||
|
DurationMs int `json:"duration_ms"`
|
||||||
|
HasCapture bool `json:"has_capture"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ReqRespCapture struct {
|
type ReqRespCapture struct {
|
||||||
@@ -84,168 +104,179 @@ type ReqRespCapture struct {
|
|||||||
RespBody []byte `json:"resp_body"`
|
RespBody []byte `json:"resp_body"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// TokenMetricsEvent represents a token metrics event
|
// ActivityLogEvent represents a token metrics event
|
||||||
type TokenMetricsEvent struct {
|
type ActivityLogEvent struct {
|
||||||
Metrics TokenMetrics
|
Metrics ActivityLogEntry
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e TokenMetricsEvent) Type() uint32 {
|
func (e ActivityLogEvent) Type() uint32 {
|
||||||
return TokenMetricsEventID // defined in events.go
|
return ActivityLogEventID // defined in events.go
|
||||||
}
|
}
|
||||||
|
|
||||||
// metricsMonitor parses llama-server output for token statistics
|
// metricsMonitor parses llama-server output for token statistics
|
||||||
type metricsMonitor struct {
|
type metricsMonitor struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
metrics []TokenMetrics
|
metrics ring.Buffer[ActivityLogEntry]
|
||||||
maxMetrics int
|
nextID int
|
||||||
nextID int
|
logger *logmon.Monitor
|
||||||
logger *LogMonitor
|
|
||||||
|
|
||||||
// capture fields
|
// capture fields
|
||||||
enableCaptures bool
|
enableCaptures bool
|
||||||
captures map[int][]byte // zstd-compressed JSON of ReqRespCapture
|
captureCache *cache.Cache // zstd-compressed CBOR of ReqRespCapture
|
||||||
captureOrder []int // track insertion order for FIFO eviction
|
|
||||||
captureSize int // current total compressed size in bytes
|
|
||||||
maxCaptureSize int // max bytes for captures (uncompressed)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// newMetricsMonitor creates a new metricsMonitor. captureBufferMB is the
|
// newMetricsMonitor creates a new metricsMonitor. captureBufferMB is the
|
||||||
// capture buffer size in megabytes; 0 disables captures.
|
// capture buffer size in megabytes; 0 disables captures.
|
||||||
func newMetricsMonitor(logger *LogMonitor, maxMetrics int, captureBufferMB int) *metricsMonitor {
|
func newMetricsMonitor(logger *logmon.Monitor, maxMetrics int, captureBufferMB int) *metricsMonitor {
|
||||||
return &metricsMonitor{
|
mm := &metricsMonitor{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
maxMetrics: maxMetrics,
|
metrics: ring.NewBuffer[ActivityLogEntry](maxMetrics),
|
||||||
enableCaptures: captureBufferMB > 0,
|
enableCaptures: captureBufferMB > 0,
|
||||||
captures: make(map[int][]byte),
|
|
||||||
captureOrder: make([]int, 0),
|
|
||||||
captureSize: 0,
|
|
||||||
maxCaptureSize: captureBufferMB * 1024 * 1024,
|
|
||||||
}
|
}
|
||||||
|
if captureBufferMB > 0 {
|
||||||
|
mm.captureCache = cache.New(captureBufferMB * 1024 * 1024)
|
||||||
|
}
|
||||||
|
return mm
|
||||||
}
|
}
|
||||||
|
|
||||||
// addMetrics adds a new metric to the collection and publishes an event.
|
// queueMetrics adds a new metric to the collection without emitting an event.
|
||||||
// Returns the assigned metric ID.
|
// Returns the assigned metric ID. Call emitMetric after capture setup.
|
||||||
func (mp *metricsMonitor) addMetrics(metric TokenMetrics) int {
|
func (mp *metricsMonitor) queueMetrics(metric ActivityLogEntry) int {
|
||||||
mp.mu.Lock()
|
mp.mu.Lock()
|
||||||
defer mp.mu.Unlock()
|
defer mp.mu.Unlock()
|
||||||
|
|
||||||
metric.ID = mp.nextID
|
metric.ID = mp.nextID
|
||||||
mp.nextID++
|
mp.nextID++
|
||||||
mp.metrics = append(mp.metrics, metric)
|
mp.metrics.Push(metric)
|
||||||
if len(mp.metrics) > mp.maxMetrics {
|
|
||||||
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
|
|
||||||
}
|
|
||||||
event.Emit(TokenMetricsEvent{Metrics: metric})
|
|
||||||
return metric.ID
|
return metric.ID
|
||||||
}
|
}
|
||||||
|
|
||||||
// addCapture adds a new capture to the buffer with size-based eviction.
|
// emitMetric publishes an ActivityLogEvent for the given metric.
|
||||||
// Captures are skipped if enableCaptures is false or if compressed data exceeds maxCaptureSize.
|
func (mp *metricsMonitor) emitMetric(metric ActivityLogEntry) {
|
||||||
func (mp *metricsMonitor) addCapture(capture ReqRespCapture) {
|
event.Emit(ActivityLogEvent{Metrics: metric})
|
||||||
|
}
|
||||||
|
|
||||||
|
// addCapture compresses and stores a capture in the cache.
|
||||||
|
// Returns true if the capture was stored, false otherwise.
|
||||||
|
func (mp *metricsMonitor) addCapture(capture ReqRespCapture) bool {
|
||||||
if !mp.enableCaptures {
|
if !mp.enableCaptures {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
compressed, uncompressedBytes, err := compressCapture(&capture)
|
compressed, uncompressedBytes, err := compressCapture(&capture)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mp.logger.Warnf("failed to compress capture: %v, skipping", err)
|
mp.logger.Warnf("failed to compress capture: %v, skipping", err)
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
captureSize := len(compressed)
|
if err := mp.captureCache.Add(capture.ID, compressed); err != nil {
|
||||||
if captureSize > mp.maxCaptureSize {
|
mp.logger.Warnf("capture %d too large (%d bytes), skipping: %v", capture.ID, len(compressed), err)
|
||||||
mp.logger.Warnf("compressed capture size %d exceeds max %d, skipping", captureSize, mp.maxCaptureSize)
|
return false
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
compressionRatio := (1 - float64(captureSize)/float64(uncompressedBytes)) * 100
|
compressionRatio := (1 - float64(len(compressed))/float64(uncompressedBytes)) * 100
|
||||||
|
|
||||||
mp.mu.Lock()
|
|
||||||
defer mp.mu.Unlock()
|
|
||||||
|
|
||||||
// Evict oldest (FIFO) until room available for the compressed data
|
|
||||||
for mp.captureSize+captureSize > mp.maxCaptureSize && len(mp.captureOrder) > 0 {
|
|
||||||
oldestID := mp.captureOrder[0]
|
|
||||||
mp.captureOrder = mp.captureOrder[1:]
|
|
||||||
if evicted, exists := mp.captures[oldestID]; exists {
|
|
||||||
l := len(evicted)
|
|
||||||
mp.captureSize -= l
|
|
||||||
delete(mp.captures, oldestID)
|
|
||||||
mp.logger.Debugf("Capture %d evicted to make space: %d bytes", oldestID, l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
mp.captures[capture.ID] = compressed
|
|
||||||
mp.captureOrder = append(mp.captureOrder, capture.ID)
|
|
||||||
mp.captureSize += captureSize
|
|
||||||
|
|
||||||
mp.logger.Debugf("Capture %d compressed and saved: %d bytes -> %d bytes (%.1f%% compression)", capture.ID, uncompressedBytes, len(compressed), compressionRatio)
|
mp.logger.Debugf("Capture %d compressed and saved: %d bytes -> %d bytes (%.1f%% compression)", capture.ID, uncompressedBytes, len(compressed), compressionRatio)
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// getCompressedBytes returns the raw compressed bytes for a capture by ID.
|
// getCompressedBytes returns the raw compressed bytes for a capture by ID.
|
||||||
func (mp *metricsMonitor) getCompressedBytes(id int) ([]byte, bool) {
|
func (mp *metricsMonitor) getCompressedBytes(id int) ([]byte, bool) {
|
||||||
mp.mu.RLock()
|
if mp.captureCache == nil {
|
||||||
defer mp.mu.RUnlock()
|
return nil, false
|
||||||
|
}
|
||||||
data, exists := mp.captures[id]
|
data, err := mp.captureCache.Get(id)
|
||||||
return data, exists
|
if err != nil {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
return data, true
|
||||||
}
|
}
|
||||||
|
|
||||||
// getCaptureByID returns decompressed capture bytes if found and decompress=true.
|
// getCaptureByID decompresses and unmarshals a capture by ID.
|
||||||
// If decompress=false, returns the raw zstd-compressed bytes.
|
// Returns nil if the capture is not found or decompression fails.
|
||||||
// Returns nil if the capture is not found.
|
func (mp *metricsMonitor) getCaptureByID(id int) *ReqRespCapture {
|
||||||
func (mp *metricsMonitor) getCaptureByID(id int, decompress bool) []byte {
|
if mp.captureCache == nil {
|
||||||
mp.mu.RLock()
|
return nil
|
||||||
defer mp.mu.RUnlock()
|
}
|
||||||
|
data, exists := mp.getCompressedBytes(id)
|
||||||
data, exists := mp.captures[id]
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if !decompress {
|
capture, err := decompressCapture(data)
|
||||||
return data
|
|
||||||
}
|
|
||||||
|
|
||||||
decompressed, err := decompressCapture(data)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mp.logger.Warnf("failed to decompress capture %d: %v", id, err)
|
mp.logger.Warnf("failed to decompress capture %d: %v", id, err)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return decompressed
|
return capture
|
||||||
}
|
}
|
||||||
|
|
||||||
// getMetrics returns a copy of the current metrics
|
// getMetrics returns a copy of the current metrics with HasCapture resolved from cache.
|
||||||
func (mp *metricsMonitor) getMetrics() []TokenMetrics {
|
func (mp *metricsMonitor) getMetrics() []ActivityLogEntry {
|
||||||
mp.mu.RLock()
|
mp.mu.RLock()
|
||||||
defer mp.mu.RUnlock()
|
defer mp.mu.RUnlock()
|
||||||
|
|
||||||
result := make([]TokenMetrics, len(mp.metrics))
|
result := mp.metrics.Slice()
|
||||||
copy(result, mp.metrics)
|
if result == nil {
|
||||||
|
return []ActivityLogEntry{}
|
||||||
|
}
|
||||||
|
if mp.captureCache != nil {
|
||||||
|
for i := range result {
|
||||||
|
result[i].HasCapture = mp.captureCache.Has(result[i].ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
// getMetricsJSON returns metrics as JSON
|
// getMetricsJSON returns metrics as JSON with HasCapture resolved from cache.
|
||||||
func (mp *metricsMonitor) getMetricsJSON() ([]byte, error) {
|
func (mp *metricsMonitor) getMetricsJSON() ([]byte, error) {
|
||||||
mp.mu.RLock()
|
mp.mu.RLock()
|
||||||
defer mp.mu.RUnlock()
|
defer mp.mu.RUnlock()
|
||||||
return json.Marshal(mp.metrics)
|
|
||||||
|
result := mp.metrics.Slice()
|
||||||
|
if result == nil {
|
||||||
|
return json.Marshal([]ActivityLogEntry{})
|
||||||
|
}
|
||||||
|
if mp.captureCache != nil {
|
||||||
|
for i := range result {
|
||||||
|
result[i].HasCapture = mp.captureCache.Has(result[i].ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return json.Marshal(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
// wrapHandler wraps the proxy handler to extract token metrics
|
// Capture field flags for controlling what is saved in ReqRespCapture.
|
||||||
|
type captureFields uint
|
||||||
|
|
||||||
|
const (
|
||||||
|
captureNone captureFields = 1 << iota
|
||||||
|
captureReqHeaders
|
||||||
|
captureReqBody
|
||||||
|
captureRespHeaders
|
||||||
|
captureRespBody
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
captureReqAll = captureReqHeaders | captureReqBody
|
||||||
|
captureRespAll = captureRespHeaders | captureRespBody
|
||||||
|
captureAll = captureReqAll | captureRespAll
|
||||||
|
)
|
||||||
|
|
||||||
|
// wrapHandler wraps the proxy handler to extract token metrics.
|
||||||
|
// captureFields controls what is saved in the ReqRespCapture using bitwise flags.
|
||||||
// if wrapHandler returns an error it is safe to assume that no
|
// if wrapHandler returns an error it is safe to assume that no
|
||||||
// data was sent to the client
|
// data was sent to the client
|
||||||
func (mp *metricsMonitor) wrapHandler(
|
func (mp *metricsMonitor) wrapHandler(
|
||||||
modelID string,
|
modelID string,
|
||||||
writer gin.ResponseWriter,
|
writer gin.ResponseWriter,
|
||||||
request *http.Request,
|
request *http.Request,
|
||||||
|
captureFields captureFields,
|
||||||
next func(modelID string, w http.ResponseWriter, r *http.Request) error,
|
next func(modelID string, w http.ResponseWriter, r *http.Request) error,
|
||||||
) error {
|
) error {
|
||||||
// Capture request body and headers if captures enabled
|
// Capture request body and headers if captures enabled
|
||||||
var reqBody []byte
|
var reqBody []byte
|
||||||
var reqHeaders map[string]string
|
var reqHeaders map[string]string
|
||||||
if mp.enableCaptures {
|
if mp.enableCaptures && (captureFields&captureReqBody) != 0 {
|
||||||
if request.Body != nil {
|
if request.Body != nil {
|
||||||
var err error
|
var err error
|
||||||
reqBody, err = io.ReadAll(request.Body)
|
reqBody, err = io.ReadAll(request.Body)
|
||||||
@@ -255,6 +286,8 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
request.Body.Close()
|
request.Body.Close()
|
||||||
request.Body = io.NopCloser(bytes.NewBuffer(reqBody))
|
request.Body = io.NopCloser(bytes.NewBuffer(reqBody))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if mp.enableCaptures && (captureFields&captureReqHeaders) != 0 {
|
||||||
reqHeaders = make(map[string]string)
|
reqHeaders = make(map[string]string)
|
||||||
for key, values := range request.Header {
|
for key, values := range request.Header {
|
||||||
if len(values) > 0 {
|
if len(values) > 0 {
|
||||||
@@ -278,22 +311,28 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
// after this point we have to assume that data was sent to the client
|
// after this point we have to assume that data was sent to the client
|
||||||
// and we can only log errors but not send them to clients
|
// and we can only log errors but not send them to clients
|
||||||
|
|
||||||
if recorder.Status() != http.StatusOK {
|
// Initialize default metrics - recorded for every request
|
||||||
mp.logger.Warnf("metrics skipped, HTTP status=%d, path=%s", recorder.Status(), request.URL.Path)
|
tm := ActivityLogEntry{
|
||||||
return nil
|
Timestamp: time.Now(),
|
||||||
|
Model: modelID,
|
||||||
|
ReqPath: request.URL.Path,
|
||||||
|
RespContentType: recorder.Header().Get("Content-Type"),
|
||||||
|
RespStatusCode: recorder.Status(),
|
||||||
|
DurationMs: int(time.Since(recorder.StartTime()).Milliseconds()),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize default metrics - these will always be recorded
|
if recorder.Status() != http.StatusOK {
|
||||||
tm := TokenMetrics{
|
mp.logger.Warnf("non-200 response, recording partial metrics: status=%d, path=%s", recorder.Status(), request.URL.Path)
|
||||||
Timestamp: time.Now(),
|
tm.ID = mp.queueMetrics(tm)
|
||||||
Model: modelID,
|
mp.emitMetric(tm)
|
||||||
DurationMs: int(time.Since(recorder.StartTime()).Milliseconds()),
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
body := recorder.body.Bytes()
|
body := recorder.body.Bytes()
|
||||||
if len(body) == 0 {
|
if len(body) == 0 {
|
||||||
mp.logger.Warn("metrics: empty body, recording minimal metrics")
|
mp.logger.Warn("metrics: empty body, recording minimal metrics")
|
||||||
mp.addMetrics(tm)
|
tm.ID = mp.queueMetrics(tm)
|
||||||
|
mp.emitMetric(tm)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -303,7 +342,8 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
body, err = decompressBody(body, encoding)
|
body, err = decompressBody(body, encoding)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mp.logger.Warnf("metrics: decompression failed: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
mp.logger.Warnf("metrics: decompression failed: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
mp.addMetrics(tm)
|
tm.ID = mp.queueMetrics(tm)
|
||||||
|
mp.emitMetric(tm)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -311,7 +351,8 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
if parsed, err := processStreamingResponse(modelID, recorder.StartTime(), body); err != nil {
|
||||||
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
mp.logger.Warnf("error processing streaming response: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
} else {
|
} else {
|
||||||
tm = parsed
|
tm.Tokens = parsed.Tokens
|
||||||
|
tm.DurationMs = parsed.DurationMs
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if gjson.ValidBytes(body) {
|
if gjson.ValidBytes(body) {
|
||||||
@@ -331,7 +372,8 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
if parsedMetrics, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
|
||||||
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
mp.logger.Warnf("error parsing metrics: %v, path=%s, recording minimal metrics", err, request.URL.Path)
|
||||||
} else {
|
} else {
|
||||||
tm = parsedMetrics
|
tm.Tokens = parsedMetrics.Tokens
|
||||||
|
tm.DurationMs = parsedMetrics.DurationMs
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -342,155 +384,214 @@ func (mp *metricsMonitor) wrapHandler(
|
|||||||
// Build capture if enabled and determine if it will be stored
|
// Build capture if enabled and determine if it will be stored
|
||||||
var capture *ReqRespCapture
|
var capture *ReqRespCapture
|
||||||
if mp.enableCaptures {
|
if mp.enableCaptures {
|
||||||
respHeaders := make(map[string]string)
|
var respHeaders map[string]string
|
||||||
for key, values := range recorder.Header() {
|
var respBody []byte
|
||||||
if len(values) > 0 {
|
if (captureFields & captureRespHeaders) != 0 {
|
||||||
respHeaders[key] = values[0]
|
respHeaders = make(map[string]string)
|
||||||
|
for key, values := range recorder.Header() {
|
||||||
|
if len(values) > 0 {
|
||||||
|
respHeaders[key] = values[0]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
redactHeaders(respHeaders)
|
||||||
|
delete(respHeaders, "Content-Encoding")
|
||||||
|
}
|
||||||
|
if (captureFields & captureRespBody) != 0 {
|
||||||
|
respBody = body
|
||||||
}
|
}
|
||||||
redactHeaders(respHeaders)
|
|
||||||
delete(respHeaders, "Content-Encoding")
|
|
||||||
capture = &ReqRespCapture{
|
capture = &ReqRespCapture{
|
||||||
ReqPath: request.URL.Path,
|
ReqPath: request.URL.Path,
|
||||||
ReqHeaders: reqHeaders,
|
ReqHeaders: reqHeaders,
|
||||||
ReqBody: reqBody,
|
ReqBody: reqBody,
|
||||||
RespHeaders: respHeaders,
|
RespHeaders: respHeaders,
|
||||||
RespBody: body,
|
RespBody: respBody,
|
||||||
}
|
|
||||||
compressed, _, err := compressCapture(capture)
|
|
||||||
if err == nil && len(compressed) <= mp.maxCaptureSize {
|
|
||||||
tm.HasCapture = true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
metricID := mp.addMetrics(tm)
|
metricID := mp.queueMetrics(tm)
|
||||||
|
tm.ID = metricID
|
||||||
|
|
||||||
// Store capture if enabled
|
// Store capture if enabled
|
||||||
if capture != nil {
|
if capture != nil {
|
||||||
capture.ID = metricID
|
capture.ID = metricID
|
||||||
mp.addCapture(*capture)
|
if mp.addCapture(*capture) {
|
||||||
|
tm.HasCapture = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mp.emitMetric(tm)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func processStreamingResponse(modelID string, start time.Time, body []byte) (TokenMetrics, error) {
|
// usagePaths lists the JSON paths where a per-event usage object can live.
|
||||||
// Iterate **backwards** through the body looking for the data payload with
|
// v1/chat/completions puts it at top-level "usage"; v1/responses nests under
|
||||||
// usage data. This avoids allocating a slice of all lines via bytes.Split.
|
// "response.usage"; v1/messages emits it at "message.usage" on message_start
|
||||||
|
// and at "usage" on message_delta.
|
||||||
|
var usagePaths = []string{"usage", "response.usage", "message.usage"}
|
||||||
|
|
||||||
// Start from the end of the body and scan backwards for newlines
|
// extractUsageTokens reads input/output/cached token counts from a usage
|
||||||
pos := len(body)
|
// gjson.Result, handling the field-name differences across endpoints.
|
||||||
for pos > 0 {
|
// cached returns -1 when the field is absent. ok is true when at least one
|
||||||
// Find the previous newline (or start of body)
|
// field was present.
|
||||||
lineStart := bytes.LastIndexByte(body[:pos], '\n')
|
func extractUsageTokens(usage gjson.Result) (input, output, cached int64, ok bool) {
|
||||||
if lineStart == -1 {
|
cached = -1
|
||||||
lineStart = 0
|
if !usage.Exists() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if v := usage.Get("prompt_tokens"); v.Exists() {
|
||||||
|
// v1/chat/completions
|
||||||
|
input = v.Int()
|
||||||
|
ok = true
|
||||||
|
} else if v := usage.Get("input_tokens"); v.Exists() {
|
||||||
|
// v1/messages, v1/responses
|
||||||
|
input = v.Int()
|
||||||
|
ok = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if v := usage.Get("completion_tokens"); v.Exists() {
|
||||||
|
// v1/chat/completions
|
||||||
|
output = v.Int()
|
||||||
|
ok = true
|
||||||
|
} else if v := usage.Get("output_tokens"); v.Exists() {
|
||||||
|
// v1/messages, v1/responses
|
||||||
|
output = v.Int()
|
||||||
|
ok = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if v := usage.Get("cache_read_input_tokens"); v.Exists() {
|
||||||
|
// v1/messages (Anthropic)
|
||||||
|
cached = v.Int()
|
||||||
|
ok = true
|
||||||
|
} else if v := usage.Get("input_tokens_details.cached_tokens"); v.Exists() {
|
||||||
|
// v1/responses (OpenAI Responses API)
|
||||||
|
cached = v.Int()
|
||||||
|
ok = true
|
||||||
|
} else if v := usage.Get("prompt_tokens_details.cached_tokens"); v.Exists() {
|
||||||
|
// v1/chat/completions (OpenAI cache hits)
|
||||||
|
cached = v.Int()
|
||||||
|
ok = true
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func processStreamingResponse(modelID string, start time.Time, body []byte) (ActivityLogEntry, error) {
|
||||||
|
// Walk SSE "data:" lines forward, merging usage info from every event.
|
||||||
|
// Different endpoints split usage across events:
|
||||||
|
// - v1/chat/completions: usage on the final chunk before [DONE]
|
||||||
|
// - v1/responses: usage on response.completed (response.usage)
|
||||||
|
// - v1/messages: input + cache on message_start (message.usage),
|
||||||
|
// output_tokens on message_delta (usage)
|
||||||
|
// We take the latest informative value per field so all three are covered.
|
||||||
|
|
||||||
|
var (
|
||||||
|
inputTokens, outputTokens int64
|
||||||
|
cachedTokens int64 = -1
|
||||||
|
hasAny bool
|
||||||
|
timings gjson.Result
|
||||||
|
)
|
||||||
|
|
||||||
|
prefix := []byte("data:")
|
||||||
|
for offset := 0; offset < len(body); {
|
||||||
|
nl := bytes.IndexByte(body[offset:], '\n')
|
||||||
|
var line []byte
|
||||||
|
if nl == -1 {
|
||||||
|
line = body[offset:]
|
||||||
|
offset = len(body)
|
||||||
} else {
|
} else {
|
||||||
lineStart++ // Move past the newline
|
line = body[offset : offset+nl]
|
||||||
|
offset += nl + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
line := bytes.TrimSpace(body[lineStart:pos])
|
line = bytes.TrimSpace(line)
|
||||||
pos = lineStart - 1 // Move position before the newline for next iteration
|
if len(line) == 0 || !bytes.HasPrefix(line, prefix) {
|
||||||
|
|
||||||
if len(line) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// SSE payload always follows "data:"
|
|
||||||
prefix := []byte("data:")
|
|
||||||
if !bytes.HasPrefix(line, prefix) {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
data := bytes.TrimSpace(line[len(prefix):])
|
data := bytes.TrimSpace(line[len(prefix):])
|
||||||
|
if len(data) == 0 || bytes.Equal(data, []byte("[DONE]")) {
|
||||||
if len(data) == 0 {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !gjson.ValidBytes(data) {
|
||||||
if bytes.Equal(data, []byte("[DONE]")) {
|
|
||||||
// [DONE] line itself contains nothing of interest.
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
parsed := gjson.ParseBytes(data)
|
||||||
|
|
||||||
if gjson.ValidBytes(data) {
|
for _, path := range usagePaths {
|
||||||
parsed := gjson.ParseBytes(data)
|
u := parsed.Get(path)
|
||||||
usage := parsed.Get("usage")
|
if !u.Exists() {
|
||||||
timings := parsed.Get("timings")
|
continue
|
||||||
|
|
||||||
// v1/responses format nests usage under response.usage
|
|
||||||
if !usage.Exists() {
|
|
||||||
usage = parsed.Get("response.usage")
|
|
||||||
}
|
}
|
||||||
|
i, o, c, ok := extractUsageTokens(u)
|
||||||
if usage.Exists() || timings.Exists() {
|
if !ok {
|
||||||
return parseMetrics(modelID, start, usage, timings)
|
continue
|
||||||
}
|
}
|
||||||
|
hasAny = true
|
||||||
|
// Take the latest non-zero value so message_start's input_tokens
|
||||||
|
// is preserved when message_delta's usage omits it, and vice versa
|
||||||
|
// for output_tokens.
|
||||||
|
if i > 0 {
|
||||||
|
inputTokens = i
|
||||||
|
}
|
||||||
|
if o > 0 {
|
||||||
|
outputTokens = o
|
||||||
|
}
|
||||||
|
if c >= 0 {
|
||||||
|
cachedTokens = c
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if t := parsed.Get("timings"); t.Exists() {
|
||||||
|
timings = t
|
||||||
|
hasAny = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return TokenMetrics{}, fmt.Errorf("no valid JSON data found in stream")
|
if !hasAny {
|
||||||
|
return ActivityLogEntry{}, fmt.Errorf("no valid JSON data found in stream")
|
||||||
|
}
|
||||||
|
|
||||||
|
return buildMetrics(modelID, start, inputTokens, outputTokens, cachedTokens, timings), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result) (TokenMetrics, error) {
|
func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result) (ActivityLogEntry, error) {
|
||||||
|
input, output, cached, _ := extractUsageTokens(usage)
|
||||||
|
return buildMetrics(modelID, start, input, output, cached, timings), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildMetrics composes an ActivityLogEntry from accumulated token counts and
|
||||||
|
// optional llama-server timings (which override input/output and provide rates).
|
||||||
|
func buildMetrics(modelID string, start time.Time, inputTokens, outputTokens, cachedTokens int64, timings gjson.Result) ActivityLogEntry {
|
||||||
wallDurationMs := int(time.Since(start).Milliseconds())
|
wallDurationMs := int(time.Since(start).Milliseconds())
|
||||||
|
durationMs := wallDurationMs
|
||||||
// default values
|
|
||||||
cachedTokens := -1 // unknown or missing data
|
|
||||||
outputTokens := 0
|
|
||||||
inputTokens := 0
|
|
||||||
|
|
||||||
// timings data
|
|
||||||
tokensPerSecond := -1.0
|
tokensPerSecond := -1.0
|
||||||
promptPerSecond := -1.0
|
promptPerSecond := -1.0
|
||||||
durationMs := wallDurationMs
|
|
||||||
|
|
||||||
if usage.Exists() {
|
|
||||||
if pt := usage.Get("prompt_tokens"); pt.Exists() {
|
|
||||||
// v1/chat/completions
|
|
||||||
inputTokens = int(pt.Int())
|
|
||||||
} else if it := usage.Get("input_tokens"); it.Exists() {
|
|
||||||
// v1/messages
|
|
||||||
inputTokens = int(it.Int())
|
|
||||||
}
|
|
||||||
|
|
||||||
if ct := usage.Get("completion_tokens"); ct.Exists() {
|
|
||||||
// v1/chat/completions
|
|
||||||
outputTokens = int(ct.Int())
|
|
||||||
} else if ot := usage.Get("output_tokens"); ot.Exists() {
|
|
||||||
outputTokens = int(ot.Int())
|
|
||||||
}
|
|
||||||
|
|
||||||
if ct := usage.Get("cache_read_input_tokens"); ct.Exists() {
|
|
||||||
cachedTokens = int(ct.Int())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
|
||||||
if timings.Exists() {
|
if timings.Exists() {
|
||||||
inputTokens = int(timings.Get("prompt_n").Int())
|
inputTokens = timings.Get("prompt_n").Int()
|
||||||
outputTokens = int(timings.Get("predicted_n").Int())
|
outputTokens = timings.Get("predicted_n").Int()
|
||||||
promptPerSecond = timings.Get("prompt_per_second").Float()
|
promptPerSecond = timings.Get("prompt_per_second").Float()
|
||||||
tokensPerSecond = timings.Get("predicted_per_second").Float()
|
tokensPerSecond = timings.Get("predicted_per_second").Float()
|
||||||
timingsDurationMs := int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
|
timingsDurationMs := int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
|
||||||
if timingsDurationMs > durationMs {
|
if timingsDurationMs > durationMs {
|
||||||
durationMs = timingsDurationMs
|
durationMs = timingsDurationMs
|
||||||
}
|
}
|
||||||
|
|
||||||
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
|
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
|
||||||
cachedTokens = int(cachedValue.Int())
|
cachedTokens = cachedValue.Int()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return TokenMetrics{
|
return ActivityLogEntry{
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
Model: modelID,
|
Model: modelID,
|
||||||
CachedTokens: cachedTokens,
|
Tokens: TokenMetrics{
|
||||||
InputTokens: inputTokens,
|
CachedTokens: int(cachedTokens),
|
||||||
OutputTokens: outputTokens,
|
InputTokens: int(inputTokens),
|
||||||
PromptPerSecond: promptPerSecond,
|
OutputTokens: int(outputTokens),
|
||||||
TokensPerSecond: tokensPerSecond,
|
PromptPerSecond: promptPerSecond,
|
||||||
DurationMs: durationMs,
|
TokensPerSecond: tokensPerSecond,
|
||||||
}, nil
|
},
|
||||||
|
DurationMs: durationMs,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// decompressBody decompresses the body based on Content-Encoding header
|
// decompressBody decompresses the body based on Content-Encoding header
|
||||||
@@ -527,15 +628,11 @@ func newBodyCopier(w gin.ResponseWriter) *responseBodyCopier {
|
|||||||
ResponseWriter: w,
|
ResponseWriter: w,
|
||||||
body: bodyBuffer,
|
body: bodyBuffer,
|
||||||
tee: io.MultiWriter(w, bodyBuffer),
|
tee: io.MultiWriter(w, bodyBuffer),
|
||||||
|
start: time.Now(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *responseBodyCopier) Write(b []byte) (int, error) {
|
func (w *responseBodyCopier) Write(b []byte) (int, error) {
|
||||||
if w.start.IsZero() {
|
|
||||||
w.start = time.Now()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Single write operation that writes to both the response and buffer
|
|
||||||
return w.tee.Write(b)
|
return w.tee.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+462
-175
File diff suppressed because it is too large
Load Diff
+2
-1
@@ -10,6 +10,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -24,7 +25,7 @@ type PeerProxy struct {
|
|||||||
proxyMap map[string]*peerProxyMember
|
proxyMap map[string]*peerProxyMember
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*PeerProxy, error) {
|
func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *logmon.Monitor) (*PeerProxy, error) {
|
||||||
proxyMap := make(map[string]*peerProxyMember)
|
proxyMap := make(map[string]*peerProxyMember)
|
||||||
|
|
||||||
// Sort peer IDs for consistent iteration order
|
// Sort peer IDs for consistent iteration order
|
||||||
|
|||||||
+7
-6
@@ -18,6 +18,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -53,8 +54,8 @@ type Process struct {
|
|||||||
// closed when command exits
|
// closed when command exits
|
||||||
cmdWaitChan chan struct{}
|
cmdWaitChan chan struct{}
|
||||||
|
|
||||||
processLogger *LogMonitor
|
processLogger *logmon.Monitor
|
||||||
proxyLogger *LogMonitor
|
proxyLogger *logmon.Monitor
|
||||||
|
|
||||||
healthCheckTimeout int
|
healthCheckTimeout int
|
||||||
healthCheckLoopInterval time.Duration
|
healthCheckLoopInterval time.Duration
|
||||||
@@ -84,7 +85,7 @@ type Process struct {
|
|||||||
failedStartCount int
|
failedStartCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process {
|
func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *logmon.Monitor, proxyLogger *logmon.Monitor) *Process {
|
||||||
concurrentLimit := 10
|
concurrentLimit := 10
|
||||||
if config.ConcurrencyLimit > 0 {
|
if config.ConcurrencyLimit > 0 {
|
||||||
concurrentLimit = config.ConcurrencyLimit
|
concurrentLimit = config.ConcurrencyLimit
|
||||||
@@ -149,7 +150,7 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
|
|||||||
}
|
}
|
||||||
|
|
||||||
// LogMonitor returns the log monitor associated with the process.
|
// LogMonitor returns the log monitor associated with the process.
|
||||||
func (p *Process) LogMonitor() *LogMonitor {
|
func (p *Process) LogMonitor() *logmon.Monitor {
|
||||||
return p.processLogger
|
return p.processLogger
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -303,7 +304,7 @@ func (p *Process) start() error {
|
|||||||
return fmt.Errorf("process was already starting but wound up in state %v", state)
|
return fmt.Errorf("process was already starting but wound up in state %v", state)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return fmt.Errorf("processes was in state %v when start() was called", curState)
|
return fmt.Errorf("process was in state %v when start() was called", curState)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return fmt.Errorf("failed to set Process state to starting: current state: %v, error: %v", curState, err)
|
return fmt.Errorf("failed to set Process state to starting: current state: %v, error: %v", curState, err)
|
||||||
@@ -726,7 +727,7 @@ func (p *Process) cmdStopUpstreamProcess() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Logger returns the logger for this process.
|
// Logger returns the logger for this process.
|
||||||
func (p *Process) Logger() *LogMonitor {
|
func (p *Process) Logger() *logmon.Monitor {
|
||||||
return p.processLogger
|
return p.processLogger
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,20 +11,21 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
debugLogger = NewLogMonitorWriter(os.Stdout)
|
debugLogger = logmon.NewWriter(os.Stdout)
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
// flip to help with debugging tests
|
// flip to help with debugging tests
|
||||||
if false {
|
if false {
|
||||||
debugLogger.SetLogLevel(LevelDebug)
|
debugLogger.SetLogLevel(logmon.LevelDebug)
|
||||||
} else {
|
} else {
|
||||||
debugLogger.SetLogLevel(LevelError)
|
debugLogger.SetLogLevel(logmon.LevelError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -585,7 +586,7 @@ func TestProcess_CustomTimeouts(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
debugLogger := NewLogMonitorWriter(io.Discard)
|
debugLogger := logmon.NewWriter(io.Discard)
|
||||||
process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
|
process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
|
||||||
|
|
||||||
// Verify the process was created successfully
|
// Verify the process was created successfully
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -18,8 +19,8 @@ type ProcessGroup struct {
|
|||||||
exclusive bool
|
exclusive bool
|
||||||
persistent bool
|
persistent bool
|
||||||
|
|
||||||
proxyLogger *LogMonitor
|
proxyLogger *logmon.Monitor
|
||||||
upstreamLogger *LogMonitor
|
upstreamLogger *logmon.Monitor
|
||||||
|
|
||||||
// map of current processes
|
// map of current processes
|
||||||
processes map[string]*Process
|
processes map[string]*Process
|
||||||
@@ -42,7 +43,7 @@ type ProcessGroup struct {
|
|||||||
testDelayFastPath func()
|
testDelayFastPath func()
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup {
|
func NewProcessGroup(id string, config config.Config, proxyLogger *logmon.Monitor, upstreamLogger *logmon.Monitor) *ProcessGroup {
|
||||||
groupConfig, ok := config.Groups[id]
|
groupConfig, ok := config.Groups[id]
|
||||||
if !ok {
|
if !ok {
|
||||||
panic("Unable to find configuration for group id: " + id)
|
panic("Unable to find configuration for group id: " + id)
|
||||||
@@ -62,7 +63,7 @@ func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, u
|
|||||||
// Create a Process for each member in the group
|
// Create a Process for each member in the group
|
||||||
for _, modelID := range groupConfig.Members {
|
for _, modelID := range groupConfig.Members {
|
||||||
modelConfig, modelID, _ := pg.config.FindConfig(modelID)
|
modelConfig, modelID, _ := pg.config.FindConfig(modelID)
|
||||||
processLogger := NewLogMonitorWriter(upstreamLogger)
|
processLogger := logmon.NewWriter(upstreamLogger)
|
||||||
process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger)
|
process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger)
|
||||||
pg.processes[modelID] = process
|
pg.processes[modelID] = process
|
||||||
}
|
}
|
||||||
|
|||||||
+388
-311
@@ -17,6 +17,8 @@ import (
|
|||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||||
"github.com/mostlygeek/llama-swap/proxy/config"
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
@@ -69,11 +71,12 @@ type ProxyManager struct {
|
|||||||
ginEngine *gin.Engine
|
ginEngine *gin.Engine
|
||||||
|
|
||||||
// logging
|
// logging
|
||||||
proxyLogger *LogMonitor
|
proxyLogger *logmon.Monitor
|
||||||
upstreamLogger *LogMonitor
|
upstreamLogger *logmon.Monitor
|
||||||
muxLogger *LogMonitor
|
muxLogger *logmon.Monitor
|
||||||
|
|
||||||
metricsMonitor *metricsMonitor
|
metricsMonitor *metricsMonitor
|
||||||
|
perfMonitor *perf.Monitor
|
||||||
|
|
||||||
processGroups map[string]*ProcessGroup
|
processGroups map[string]*ProcessGroup
|
||||||
|
|
||||||
@@ -98,27 +101,27 @@ type ProxyManager struct {
|
|||||||
func New(proxyConfig config.Config) *ProxyManager {
|
func New(proxyConfig config.Config) *ProxyManager {
|
||||||
// set up loggers
|
// set up loggers
|
||||||
|
|
||||||
var muxLogger, upstreamLogger, proxyLogger *LogMonitor
|
var muxLogger, upstreamLogger, proxyLogger *logmon.Monitor
|
||||||
switch proxyConfig.LogToStdout {
|
switch proxyConfig.LogToStdout {
|
||||||
case config.LogToStdoutNone:
|
case config.LogToStdoutNone:
|
||||||
muxLogger = NewLogMonitorWriter(io.Discard)
|
muxLogger = logmon.NewWriter(io.Discard)
|
||||||
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
upstreamLogger = logmon.NewWriter(io.Discard)
|
||||||
proxyLogger = NewLogMonitorWriter(io.Discard)
|
proxyLogger = logmon.NewWriter(io.Discard)
|
||||||
case config.LogToStdoutBoth:
|
case config.LogToStdoutBoth:
|
||||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
muxLogger = logmon.NewWriter(os.Stdout)
|
||||||
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
upstreamLogger = logmon.NewWriter(muxLogger)
|
||||||
proxyLogger = NewLogMonitorWriter(muxLogger)
|
proxyLogger = logmon.NewWriter(muxLogger)
|
||||||
case config.LogToStdoutUpstream:
|
case config.LogToStdoutUpstream:
|
||||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
muxLogger = logmon.NewWriter(os.Stdout)
|
||||||
upstreamLogger = NewLogMonitorWriter(muxLogger)
|
upstreamLogger = logmon.NewWriter(muxLogger)
|
||||||
proxyLogger = NewLogMonitorWriter(io.Discard)
|
proxyLogger = logmon.NewWriter(io.Discard)
|
||||||
default:
|
default:
|
||||||
// same as config.LogToStdoutProxy
|
// same as config.LogToStdoutProxy
|
||||||
// helpful because some old tests create a config.Config directly and it
|
// helpful because some old tests create a config.Config directly and it
|
||||||
// may not have LogToStdout set explicitly
|
// may not have LogToStdout set explicitly
|
||||||
muxLogger = NewLogMonitorWriter(os.Stdout)
|
muxLogger = logmon.NewWriter(os.Stdout)
|
||||||
upstreamLogger = NewLogMonitorWriter(io.Discard)
|
upstreamLogger = logmon.NewWriter(io.Discard)
|
||||||
proxyLogger = NewLogMonitorWriter(muxLogger)
|
proxyLogger = logmon.NewWriter(muxLogger)
|
||||||
}
|
}
|
||||||
|
|
||||||
if proxyConfig.LogRequests {
|
if proxyConfig.LogRequests {
|
||||||
@@ -127,20 +130,20 @@ func New(proxyConfig config.Config) *ProxyManager {
|
|||||||
|
|
||||||
switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
|
switch strings.ToLower(strings.TrimSpace(proxyConfig.LogLevel)) {
|
||||||
case "debug":
|
case "debug":
|
||||||
proxyLogger.SetLogLevel(LevelDebug)
|
proxyLogger.SetLogLevel(logmon.LevelDebug)
|
||||||
upstreamLogger.SetLogLevel(LevelDebug)
|
upstreamLogger.SetLogLevel(logmon.LevelDebug)
|
||||||
case "info":
|
case "info":
|
||||||
proxyLogger.SetLogLevel(LevelInfo)
|
proxyLogger.SetLogLevel(logmon.LevelInfo)
|
||||||
upstreamLogger.SetLogLevel(LevelInfo)
|
upstreamLogger.SetLogLevel(logmon.LevelInfo)
|
||||||
case "warn":
|
case "warn":
|
||||||
proxyLogger.SetLogLevel(LevelWarn)
|
proxyLogger.SetLogLevel(logmon.LevelWarn)
|
||||||
upstreamLogger.SetLogLevel(LevelWarn)
|
upstreamLogger.SetLogLevel(logmon.LevelWarn)
|
||||||
case "error":
|
case "error":
|
||||||
proxyLogger.SetLogLevel(LevelError)
|
proxyLogger.SetLogLevel(logmon.LevelError)
|
||||||
upstreamLogger.SetLogLevel(LevelError)
|
upstreamLogger.SetLogLevel(logmon.LevelError)
|
||||||
default:
|
default:
|
||||||
proxyLogger.SetLogLevel(LevelInfo)
|
proxyLogger.SetLogLevel(logmon.LevelInfo)
|
||||||
upstreamLogger.SetLogLevel(LevelInfo)
|
upstreamLogger.SetLogLevel(logmon.LevelInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
// see: https://go.dev/src/time/format.go
|
// see: https://go.dev/src/time/format.go
|
||||||
@@ -271,13 +274,17 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
|
|
||||||
pm.ginEngine.Use(func(c *gin.Context) {
|
pm.ginEngine.Use(func(c *gin.Context) {
|
||||||
|
|
||||||
// don't log the Wake on Lan proxy health check
|
for _, prefix := range []string{
|
||||||
if c.Request.URL.Path == "/wol-health" {
|
"/wol-health",
|
||||||
c.Next()
|
"/api/performance",
|
||||||
return
|
"/metrics",
|
||||||
|
} {
|
||||||
|
if strings.HasPrefix(c.Request.URL.Path, prefix) {
|
||||||
|
c.Next()
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start timer
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
// capture these because /upstream/:model rewrites them in c.Next()
|
// capture these because /upstream/:model rewrites them in c.Next()
|
||||||
@@ -285,12 +292,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
method := c.Request.Method
|
method := c.Request.Method
|
||||||
path := c.Request.URL.Path
|
path := c.Request.URL.Path
|
||||||
|
|
||||||
// Process request
|
|
||||||
c.Next()
|
c.Next()
|
||||||
|
|
||||||
// Stop timer
|
|
||||||
duration := time.Since(start)
|
duration := time.Since(start)
|
||||||
|
|
||||||
statusCode := c.Writer.Status()
|
statusCode := c.Writer.Status()
|
||||||
bodySize := c.Writer.Size()
|
bodySize := c.Writer.Size()
|
||||||
|
|
||||||
@@ -332,41 +336,87 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
|
|
||||||
// Set up routes using the Gin engine
|
// Set up routes using the Gin engine
|
||||||
// Protected routes use pm.apiKeyAuth() middleware
|
// Protected routes use pm.apiKeyAuth() middleware
|
||||||
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
llmHandler := pm.mkProxyJSONHandler(captureAll)
|
||||||
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/chat/completions", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v1/responses", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
// Support legacy /v1/completions api, see issue #12
|
// Support legacy /v1/completions api, see issue #12
|
||||||
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/completions", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
// Support anthropic /v1/messages (added https://github.com/ggml-org/llama.cpp/pull/17570)
|
||||||
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/messages", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
// Support anthropic count_tokens API (Also added in the above PR)
|
// Support anthropic count_tokens API (Also added in the above PR)
|
||||||
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/messages/count_tokens", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
|
||||||
// Support embeddings and reranking
|
// Support embeddings and reranking
|
||||||
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/embeddings", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
|
||||||
// llama-server's /reranking endpoint + aliases
|
// llama-server's /reranking endpoint + aliases
|
||||||
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/reranking", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/rerank", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/rerank", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/v1/reranking", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
|
||||||
|
// Unversioned API endpoints, see issue #728
|
||||||
|
pm.ginEngine.POST("/v/chat/completions", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v/responses", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v/completions", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v/messages", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v/messages/count_tokens", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v/embeddings", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v/rerank", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
pm.ginEngine.POST("/v/reranking", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
|
||||||
// llama-server's /infill endpoint for code infilling
|
// llama-server's /infill endpoint for code infilling
|
||||||
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/infill", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
|
||||||
// llama-server's /completion endpoint
|
// llama-server's /completion endpoint
|
||||||
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/completion", pm.apiKeyAuth(), pm.trackInflight(), llmHandler)
|
||||||
|
|
||||||
// Support audio/speech endpoint
|
// Support audio/speech endpoint
|
||||||
pm.ginEngine.POST("/v1/audio/speech", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST(
|
||||||
pm.ginEngine.POST("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
"/v1/audio/speech",
|
||||||
|
pm.apiKeyAuth(),
|
||||||
|
pm.trackInflight(),
|
||||||
|
pm.mkProxyJSONHandler(captureReqAll|captureRespHeaders),
|
||||||
|
)
|
||||||
|
pm.ginEngine.POST(
|
||||||
|
"/v1/audio/voices",
|
||||||
|
pm.apiKeyAuth(),
|
||||||
|
pm.trackInflight(),
|
||||||
|
pm.mkProxyJSONHandler(captureReqHeaders|captureRespAll),
|
||||||
|
)
|
||||||
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
pm.ginEngine.GET("/v1/audio/voices", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
||||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
|
|
||||||
pm.ginEngine.POST("/v1/images/generations", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST(
|
||||||
pm.ginEngine.POST("/v1/images/edits", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyOAIPostFormHandler)
|
"/v1/audio/transcriptions",
|
||||||
|
pm.apiKeyAuth(),
|
||||||
|
pm.trackInflight(),
|
||||||
|
pm.mkPostFormHandler(captureReqHeaders|captureRespHeaders|captureRespBody),
|
||||||
|
)
|
||||||
|
pm.ginEngine.POST(
|
||||||
|
"/v1/images/generations",
|
||||||
|
pm.apiKeyAuth(),
|
||||||
|
pm.trackInflight(),
|
||||||
|
pm.mkProxyJSONHandler(captureReqAll|captureRespHeaders),
|
||||||
|
)
|
||||||
|
|
||||||
|
pm.ginEngine.POST(
|
||||||
|
"/v1/images/edits",
|
||||||
|
pm.apiKeyAuth(),
|
||||||
|
pm.trackInflight(),
|
||||||
|
pm.mkPostFormHandler(captureReqHeaders|captureRespHeaders),
|
||||||
|
)
|
||||||
|
|
||||||
// sd.cpp /sdapi/v1 endpoints
|
// sd.cpp /sdapi/v1 endpoints
|
||||||
pm.ginEngine.POST("/sdapi/v1/txt2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.ginEngine.POST("/sdapi/v1/txt2img",
|
||||||
pm.ginEngine.POST("/sdapi/v1/img2img", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyInferenceHandler)
|
pm.apiKeyAuth(),
|
||||||
|
pm.trackInflight(),
|
||||||
|
pm.mkProxyJSONHandler(captureReqAll|captureRespHeaders),
|
||||||
|
)
|
||||||
|
pm.ginEngine.POST("/sdapi/v1/img2img",
|
||||||
|
pm.apiKeyAuth(),
|
||||||
|
pm.trackInflight(),
|
||||||
|
pm.mkProxyJSONHandler(captureReqHeaders|captureRespHeaders),
|
||||||
|
)
|
||||||
pm.ginEngine.GET("/sdapi/v1/loras", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
pm.ginEngine.GET("/sdapi/v1/loras", pm.apiKeyAuth(), pm.trackInflight(), pm.proxyGETModelHandler)
|
||||||
|
|
||||||
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
|
pm.ginEngine.GET("/v1/models", pm.apiKeyAuth(), pm.listModelsHandler)
|
||||||
@@ -393,6 +443,8 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
c.String(http.StatusOK, "OK")
|
c.String(http.StatusOK, "OK")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
pm.ginEngine.GET("/metrics", pm.prometheusMetricsHandler)
|
||||||
|
|
||||||
// see cmd/wol-proxy/wol-proxy.go, not logged
|
// see cmd/wol-proxy/wol-proxy.go, not logged
|
||||||
pm.ginEngine.GET("/wol-health", func(c *gin.Context) {
|
pm.ginEngine.GET("/wol-health", func(c *gin.Context) {
|
||||||
c.String(http.StatusOK, "OK")
|
c.String(http.StatusOK, "OK")
|
||||||
@@ -647,7 +699,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
|||||||
searchModelName, modelID, remainingPath, modelFound := pm.findModelInPath(upstreamPath)
|
searchModelName, modelID, remainingPath, modelFound := pm.findModelInPath(upstreamPath)
|
||||||
|
|
||||||
if !modelFound {
|
if !modelFound {
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
|
pm.sendErrorResponse(c, http.StatusNotFound, "model not found")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -686,7 +738,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
|||||||
|
|
||||||
// attempt to record metrics if it is a POST request
|
// attempt to record metrics if it is a POST request
|
||||||
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
||||||
if err := pm.metricsMonitor.wrapHandler(modelID, c.Writer, c.Request, handler); err != nil {
|
if err := pm.metricsMonitor.wrapHandler(modelID, c.Writer, c.Request, captureNone, handler); err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
||||||
pm.proxyLogger.Errorf("Error proxying wrapped upstream request for model %s, path=%s", modelID, originalPath)
|
pm.proxyLogger.Errorf("Error proxying wrapped upstream request for model %s, path=%s", modelID, originalPath)
|
||||||
return
|
return
|
||||||
@@ -700,280 +752,299 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
|
func (pm *ProxyManager) mkProxyJSONHandler(cf captureFields) func(*gin.Context) {
|
||||||
bodyBytes, err := io.ReadAll(c.Request.Body)
|
return func(c *gin.Context) {
|
||||||
if err != nil {
|
bodyBytes, err := io.ReadAll(c.Request.Body)
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
|
if err != nil {
|
||||||
return
|
pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body")
|
||||||
}
|
return
|
||||||
|
|
||||||
requestedModel := gjson.GetBytes(bodyBytes, "model").String()
|
|
||||||
if requestedModel == "" {
|
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for a matching local model first
|
|
||||||
var nextHandler func(modelID string, w http.ResponseWriter, r *http.Request) error
|
|
||||||
|
|
||||||
modelID, found := pm.config.RealModelName(requestedModel)
|
|
||||||
if found {
|
|
||||||
var localHandler func(string, http.ResponseWriter, *http.Request) error
|
|
||||||
if pm.matrix != nil {
|
|
||||||
localHandler = pm.matrix.ProxyRequest
|
|
||||||
} else {
|
|
||||||
processGroup, err := pm.swapProcessGroup(modelID)
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
localHandler = processGroup.ProxyRequest
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// issue #69 allow custom model names to be sent to upstream
|
requestedModel := gjson.GetBytes(bodyBytes, "model").String()
|
||||||
useModelName := pm.config.Models[modelID].UseModelName
|
if requestedModel == "" {
|
||||||
if useModelName != "" {
|
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key")
|
||||||
bodyBytes, err = sjson.SetBytes(bodyBytes, "model", useModelName)
|
return
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error rewriting model name in JSON: %s", err.Error()))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// issue #174 strip parameters from the JSON body
|
// Look for a matching local model first
|
||||||
stripParams, err := pm.config.Models[modelID].Filters.SanitizedStripParams()
|
var nextHandler func(modelID string, w http.ResponseWriter, r *http.Request) error
|
||||||
if err != nil { // just log it and continue
|
|
||||||
pm.proxyLogger.Errorf("Error sanitizing strip params string: %s, %s", pm.config.Models[modelID].Filters.StripParams, err.Error())
|
modelID, found := pm.config.RealModelName(requestedModel)
|
||||||
} else {
|
if found {
|
||||||
|
var localHandler func(string, http.ResponseWriter, *http.Request) error
|
||||||
|
if pm.matrix != nil {
|
||||||
|
localHandler = pm.matrix.ProxyRequest
|
||||||
|
} else {
|
||||||
|
processGroup, err := pm.swapProcessGroup(modelID)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
localHandler = processGroup.ProxyRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
// issue #69 allow custom model names to be sent to upstream
|
||||||
|
useModelName := pm.config.Models[modelID].UseModelName
|
||||||
|
if useModelName != "" {
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, "model", useModelName)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error rewriting model name in JSON: %s", err.Error()))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// issue #174 strip parameters from the JSON body
|
||||||
|
stripParams, err := pm.config.Models[modelID].Filters.SanitizedStripParams()
|
||||||
|
if err != nil { // just log it and continue
|
||||||
|
pm.proxyLogger.Errorf("Error sanitizing strip params string: %s, %s", pm.config.Models[modelID].Filters.StripParams, err.Error())
|
||||||
|
} else {
|
||||||
|
for _, param := range stripParams {
|
||||||
|
pm.proxyLogger.Debugf("<%s> stripping param: %s", modelID, param)
|
||||||
|
bodyBytes, err = sjson.DeleteBytes(bodyBytes, param)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error deleting parameter %s from request", param))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// issue #453 set/override parameters in the JSON body
|
||||||
|
setParams, setParamKeys := pm.config.Models[modelID].Filters.SanitizedSetParams()
|
||||||
|
for _, key := range setParamKeys {
|
||||||
|
pm.proxyLogger.Debugf("<%s> setting param: %s", modelID, key)
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParams[key])
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// setParamsByID: set params based on the requested model ID (runs after setParams, can override it)
|
||||||
|
setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel)
|
||||||
|
for _, key := range setParamsByIDKeys {
|
||||||
|
pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key)
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key])
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
||||||
|
nextHandler = localHandler
|
||||||
|
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
||||||
|
pm.proxyLogger.Debugf("ProxyManager using ProxyPeer for model: %s", requestedModel)
|
||||||
|
modelID = requestedModel
|
||||||
|
|
||||||
|
// issue #453 apply filters for peer requests
|
||||||
|
peerFilters := pm.peerProxy.GetPeerFilters(requestedModel)
|
||||||
|
|
||||||
|
// Apply stripParams - remove specified parameters from request
|
||||||
|
stripParams := peerFilters.SanitizedStripParams()
|
||||||
for _, param := range stripParams {
|
for _, param := range stripParams {
|
||||||
pm.proxyLogger.Debugf("<%s> stripping param: %s", modelID, param)
|
pm.proxyLogger.Debugf("<%s> stripping param: %s", requestedModel, param)
|
||||||
bodyBytes, err = sjson.DeleteBytes(bodyBytes, param)
|
bodyBytes, err = sjson.DeleteBytes(bodyBytes, param)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error deleting parameter %s from request", param))
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error stripping parameter %s from request", param))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply setParams - set/override specified parameters in request
|
||||||
|
setParams, setParamKeys := peerFilters.SanitizedSetParams()
|
||||||
|
for _, key := range setParamKeys {
|
||||||
|
pm.proxyLogger.Debugf("<%s> setting param: %s", requestedModel, key)
|
||||||
|
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParams[key])
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nextHandler = pm.peerProxy.ProxyRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
if nextHandler == nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find suitable inference handler for %s", requestedModel))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||||
|
|
||||||
|
// dechunk it as we already have all the body bytes see issue #11
|
||||||
|
c.Request.Header.Del("transfer-encoding")
|
||||||
|
c.Request.Header.Set("content-length", strconv.Itoa(len(bodyBytes)))
|
||||||
|
c.Request.ContentLength = int64(len(bodyBytes))
|
||||||
|
|
||||||
|
// issue #728 support versionless API requests
|
||||||
|
if strings.HasPrefix(c.Request.URL.Path, "/v/") {
|
||||||
|
c.Request.URL.Path = strings.TrimPrefix(c.Request.URL.Path, "/v")
|
||||||
|
}
|
||||||
|
|
||||||
|
// issue #366 extract values that downstream handlers may need
|
||||||
|
isStreaming := gjson.GetBytes(bodyBytes, "stream").Bool()
|
||||||
|
ctx := context.WithValue(c.Request.Context(), proxyCtxKey("streaming"), isStreaming)
|
||||||
|
ctx = context.WithValue(ctx, proxyCtxKey("model"), modelID)
|
||||||
|
c.Request = c.Request.WithContext(ctx)
|
||||||
|
|
||||||
|
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
||||||
|
if err := pm.metricsMonitor.wrapHandler(modelID, c.Writer, c.Request, cf, nextHandler); err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
||||||
|
pm.proxyLogger.Errorf("Error Proxying Metrics Wrapped Request model %s", modelID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err := nextHandler(modelID, c.Writer, c.Request); err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
||||||
|
pm.proxyLogger.Errorf("Error Proxying Request for model %s", modelID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// mkPostFormHandler creates a POST form handler for inference backends
|
||||||
|
// with a custom captureFields to filter out large binary requests or responses.
|
||||||
|
func (pm *ProxyManager) mkPostFormHandler(cf captureFields) func(*gin.Context) {
|
||||||
|
return func(c *gin.Context) {
|
||||||
|
// Parse multipart form
|
||||||
|
if err := c.Request.ParseMultipartForm(32 << 20); err != nil { // 32MB max memory, larger files go to tmp disk
|
||||||
|
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("error parsing multipart form: %s", err.Error()))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get model parameter from the form
|
||||||
|
requestedModel := c.Request.FormValue("model")
|
||||||
|
if requestedModel == "" {
|
||||||
|
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' parameter in form data")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for a matching local model first, then check peers
|
||||||
|
var nextHandler func(modelID string, w http.ResponseWriter, r *http.Request) error
|
||||||
|
var useModelName string
|
||||||
|
|
||||||
|
modelID, found := pm.config.RealModelName(requestedModel)
|
||||||
|
if found {
|
||||||
|
if pm.matrix != nil {
|
||||||
|
nextHandler = pm.matrix.ProxyRequest
|
||||||
|
} else {
|
||||||
|
processGroup, err := pm.swapProcessGroup(modelID)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
nextHandler = processGroup.ProxyRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
useModelName = pm.config.Models[modelID].UseModelName
|
||||||
|
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
||||||
|
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
||||||
|
pm.proxyLogger.Debugf("ProxyManager using ProxyPeer for model: %s", requestedModel)
|
||||||
|
modelID = requestedModel
|
||||||
|
nextHandler = pm.peerProxy.ProxyRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
if nextHandler == nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find suitable handler for %s", requestedModel))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to reconstruct the multipart form in any case since the body is consumed
|
||||||
|
// Create a new buffer for the reconstructed request
|
||||||
|
var requestBuffer bytes.Buffer
|
||||||
|
multipartWriter := multipart.NewWriter(&requestBuffer)
|
||||||
|
|
||||||
|
// Copy all form values
|
||||||
|
for key, values := range c.Request.MultipartForm.Value {
|
||||||
|
for _, value := range values {
|
||||||
|
fieldValue := value
|
||||||
|
// If this is the model field and we have a profile, use just the model name
|
||||||
|
if key == "model" {
|
||||||
|
// # issue #69 allow custom model names to be sent to upstream
|
||||||
|
if useModelName != "" {
|
||||||
|
fieldValue = useModelName
|
||||||
|
} else {
|
||||||
|
fieldValue = requestedModel
|
||||||
|
}
|
||||||
|
}
|
||||||
|
field, err := multipartWriter.CreateFormField(key)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, "error recreating form field")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if _, err = field.Write([]byte(fieldValue)); err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, "error writing form field")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// issue #453 set/override parameters in the JSON body
|
// Copy all files from the original request
|
||||||
setParams, setParamKeys := pm.config.Models[modelID].Filters.SanitizedSetParams()
|
for key, fileHeaders := range c.Request.MultipartForm.File {
|
||||||
for _, key := range setParamKeys {
|
for _, fileHeader := range fileHeaders {
|
||||||
pm.proxyLogger.Debugf("<%s> setting param: %s", modelID, key)
|
formFile, err := multipartWriter.CreateFormFile(key, fileHeader.Filename)
|
||||||
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParams[key])
|
if err != nil {
|
||||||
if err != nil {
|
pm.sendErrorResponse(c, http.StatusInternalServerError, "error recreating form file")
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// setParamsByID: set params based on the requested model ID (runs after setParams, can override it)
|
|
||||||
setParamsByIDParams, setParamsByIDKeys := pm.config.Models[modelID].Filters.SanitizedSetParamsByID(requestedModel)
|
|
||||||
for _, key := range setParamsByIDKeys {
|
|
||||||
pm.proxyLogger.Debugf("<%s> setting param by id: %s", requestedModel, key)
|
|
||||||
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParamsByIDParams[key])
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
|
||||||
nextHandler = localHandler
|
|
||||||
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
|
||||||
pm.proxyLogger.Debugf("ProxyManager using ProxyPeer for model: %s", requestedModel)
|
|
||||||
modelID = requestedModel
|
|
||||||
|
|
||||||
// issue #453 apply filters for peer requests
|
|
||||||
peerFilters := pm.peerProxy.GetPeerFilters(requestedModel)
|
|
||||||
|
|
||||||
// Apply stripParams - remove specified parameters from request
|
|
||||||
stripParams := peerFilters.SanitizedStripParams()
|
|
||||||
for _, param := range stripParams {
|
|
||||||
pm.proxyLogger.Debugf("<%s> stripping param: %s", requestedModel, param)
|
|
||||||
bodyBytes, err = sjson.DeleteBytes(bodyBytes, param)
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error stripping parameter %s from request", param))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply setParams - set/override specified parameters in request
|
|
||||||
setParams, setParamKeys := peerFilters.SanitizedSetParams()
|
|
||||||
for _, key := range setParamKeys {
|
|
||||||
pm.proxyLogger.Debugf("<%s> setting param: %s", requestedModel, key)
|
|
||||||
bodyBytes, err = sjson.SetBytes(bodyBytes, key, setParams[key])
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error setting parameter %s in request", key))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nextHandler = pm.peerProxy.ProxyRequest
|
|
||||||
}
|
|
||||||
|
|
||||||
if nextHandler == nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find suitable inference handler for %s", requestedModel))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
|
||||||
|
|
||||||
// dechunk it as we already have all the body bytes see issue #11
|
|
||||||
c.Request.Header.Del("transfer-encoding")
|
|
||||||
c.Request.Header.Set("content-length", strconv.Itoa(len(bodyBytes)))
|
|
||||||
c.Request.ContentLength = int64(len(bodyBytes))
|
|
||||||
|
|
||||||
// issue #366 extract values that downstream handlers may need
|
|
||||||
isStreaming := gjson.GetBytes(bodyBytes, "stream").Bool()
|
|
||||||
ctx := context.WithValue(c.Request.Context(), proxyCtxKey("streaming"), isStreaming)
|
|
||||||
ctx = context.WithValue(ctx, proxyCtxKey("model"), modelID)
|
|
||||||
c.Request = c.Request.WithContext(ctx)
|
|
||||||
|
|
||||||
if pm.metricsMonitor != nil && c.Request.Method == "POST" {
|
|
||||||
if err := pm.metricsMonitor.wrapHandler(modelID, c.Writer, c.Request, nextHandler); err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying metrics wrapped request: %s", err.Error()))
|
|
||||||
pm.proxyLogger.Errorf("Error Proxying Metrics Wrapped Request model %s", modelID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if err := nextHandler(modelID, c.Writer, c.Request); err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
|
||||||
pm.proxyLogger.Errorf("Error Proxying Request for model %s", modelID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pm *ProxyManager) proxyOAIPostFormHandler(c *gin.Context) {
|
|
||||||
// Parse multipart form
|
|
||||||
if err := c.Request.ParseMultipartForm(32 << 20); err != nil { // 32MB max memory, larger files go to tmp disk
|
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("error parsing multipart form: %s", err.Error()))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get model parameter from the form
|
|
||||||
requestedModel := c.Request.FormValue("model")
|
|
||||||
if requestedModel == "" {
|
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' parameter in form data")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for a matching local model first, then check peers
|
|
||||||
var nextHandler func(modelID string, w http.ResponseWriter, r *http.Request) error
|
|
||||||
var useModelName string
|
|
||||||
|
|
||||||
modelID, found := pm.config.RealModelName(requestedModel)
|
|
||||||
if found {
|
|
||||||
if pm.matrix != nil {
|
|
||||||
nextHandler = pm.matrix.ProxyRequest
|
|
||||||
} else {
|
|
||||||
processGroup, err := pm.swapProcessGroup(modelID)
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error swapping process group: %s", err.Error()))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
nextHandler = processGroup.ProxyRequest
|
|
||||||
}
|
|
||||||
|
|
||||||
useModelName = pm.config.Models[modelID].UseModelName
|
|
||||||
pm.proxyLogger.Debugf("ProxyManager using local Process for model: %s", requestedModel)
|
|
||||||
} else if pm.peerProxy != nil && pm.peerProxy.HasPeerModel(requestedModel) {
|
|
||||||
pm.proxyLogger.Debugf("ProxyManager using ProxyPeer for model: %s", requestedModel)
|
|
||||||
modelID = requestedModel
|
|
||||||
nextHandler = pm.peerProxy.ProxyRequest
|
|
||||||
}
|
|
||||||
|
|
||||||
if nextHandler == nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("could not find suitable handler for %s", requestedModel))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// We need to reconstruct the multipart form in any case since the body is consumed
|
|
||||||
// Create a new buffer for the reconstructed request
|
|
||||||
var requestBuffer bytes.Buffer
|
|
||||||
multipartWriter := multipart.NewWriter(&requestBuffer)
|
|
||||||
|
|
||||||
// Copy all form values
|
|
||||||
for key, values := range c.Request.MultipartForm.Value {
|
|
||||||
for _, value := range values {
|
|
||||||
fieldValue := value
|
|
||||||
// If this is the model field and we have a profile, use just the model name
|
|
||||||
if key == "model" {
|
|
||||||
// # issue #69 allow custom model names to be sent to upstream
|
|
||||||
if useModelName != "" {
|
|
||||||
fieldValue = useModelName
|
|
||||||
} else {
|
|
||||||
fieldValue = requestedModel
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
field, err := multipartWriter.CreateFormField(key)
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "error recreating form field")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if _, err = field.Write([]byte(fieldValue)); err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "error writing form field")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy all files from the original request
|
file, err := fileHeader.Open()
|
||||||
for key, fileHeaders := range c.Request.MultipartForm.File {
|
if err != nil {
|
||||||
for _, fileHeader := range fileHeaders {
|
pm.sendErrorResponse(c, http.StatusInternalServerError, "error opening uploaded file")
|
||||||
formFile, err := multipartWriter.CreateFormFile(key, fileHeader.Filename)
|
return
|
||||||
if err != nil {
|
}
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "error recreating form file")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
file, err := fileHeader.Open()
|
if _, err = io.Copy(formFile, file); err != nil {
|
||||||
if err != nil {
|
file.Close()
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "error opening uploaded file")
|
pm.sendErrorResponse(c, http.StatusInternalServerError, "error copying file data")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err = io.Copy(formFile, file); err != nil {
|
|
||||||
file.Close()
|
file.Close()
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "error copying file data")
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the multipart writer to finalize the form
|
||||||
|
if err := multipartWriter.Close(); err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, "error finalizing multipart form")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new request with the reconstructed form data
|
||||||
|
modifiedReq, err := http.NewRequestWithContext(
|
||||||
|
c.Request.Context(),
|
||||||
|
c.Request.Method,
|
||||||
|
c.Request.URL.String(),
|
||||||
|
&requestBuffer,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, "error creating modified request")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the headers from the original request
|
||||||
|
modifiedReq.Header = c.Request.Header.Clone()
|
||||||
|
modifiedReq.Header.Set("Content-Type", multipartWriter.FormDataContentType())
|
||||||
|
|
||||||
|
// set the content length of the body
|
||||||
|
modifiedReq.Header.Set("Content-Length", strconv.Itoa(requestBuffer.Len()))
|
||||||
|
modifiedReq.ContentLength = int64(requestBuffer.Len())
|
||||||
|
|
||||||
|
// Use the modified request for proxying
|
||||||
|
if pm.metricsMonitor != nil {
|
||||||
|
if err := pm.metricsMonitor.wrapHandler(modelID, c.Writer, modifiedReq, cf, nextHandler); err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
||||||
|
pm.proxyLogger.Errorf("Error Proxying Request for model %s", modelID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err := nextHandler(modelID, c.Writer, modifiedReq); err != nil {
|
||||||
|
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
||||||
|
pm.proxyLogger.Errorf("Error Proxying Request for model %s", modelID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
file.Close()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close the multipart writer to finalize the form
|
|
||||||
if err := multipartWriter.Close(); err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "error finalizing multipart form")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new request with the reconstructed form data
|
|
||||||
modifiedReq, err := http.NewRequestWithContext(
|
|
||||||
c.Request.Context(),
|
|
||||||
c.Request.Method,
|
|
||||||
c.Request.URL.String(),
|
|
||||||
&requestBuffer,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, "error creating modified request")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy the headers from the original request
|
|
||||||
modifiedReq.Header = c.Request.Header.Clone()
|
|
||||||
modifiedReq.Header.Set("Content-Type", multipartWriter.FormDataContentType())
|
|
||||||
|
|
||||||
// set the content length of the body
|
|
||||||
modifiedReq.Header.Set("Content-Length", strconv.Itoa(requestBuffer.Len()))
|
|
||||||
modifiedReq.ContentLength = int64(requestBuffer.Len())
|
|
||||||
|
|
||||||
// Use the modified request for proxying
|
|
||||||
if err := nextHandler(modelID, c.Writer, modifiedReq); err != nil {
|
|
||||||
pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error proxying request: %s", err.Error()))
|
|
||||||
pm.proxyLogger.Errorf("Error Proxying Request for model %s", modelID)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) proxyGETModelHandler(c *gin.Context) {
|
func (pm *ProxyManager) proxyGETModelHandler(c *gin.Context) {
|
||||||
@@ -1102,7 +1173,7 @@ func (pm *ProxyManager) listRunningProcessesHandler(context *gin.Context) {
|
|||||||
if process, ok := pm.matrix.GetProcess(modelID); ok {
|
if process, ok := pm.matrix.GetProcess(modelID); ok {
|
||||||
runningProcesses = append(runningProcesses, gin.H{
|
runningProcesses = append(runningProcesses, gin.H{
|
||||||
"model": process.ID,
|
"model": process.ID,
|
||||||
"state": process.state,
|
"state": process.CurrentState(),
|
||||||
"cmd": process.config.Cmd,
|
"cmd": process.config.Cmd,
|
||||||
"proxy": process.config.Proxy,
|
"proxy": process.config.Proxy,
|
||||||
"ttl": process.config.UnloadAfter,
|
"ttl": process.config.UnloadAfter,
|
||||||
@@ -1117,7 +1188,7 @@ func (pm *ProxyManager) listRunningProcessesHandler(context *gin.Context) {
|
|||||||
if process.CurrentState() == StateReady {
|
if process.CurrentState() == StateReady {
|
||||||
runningProcesses = append(runningProcesses, gin.H{
|
runningProcesses = append(runningProcesses, gin.H{
|
||||||
"model": process.ID,
|
"model": process.ID,
|
||||||
"state": process.state,
|
"state": process.CurrentState(),
|
||||||
"cmd": process.config.Cmd,
|
"cmd": process.config.Cmd,
|
||||||
"proxy": process.config.Proxy,
|
"proxy": process.config.Proxy,
|
||||||
"ttl": process.config.UnloadAfter,
|
"ttl": process.config.UnloadAfter,
|
||||||
@@ -1153,3 +1224,9 @@ func (pm *ProxyManager) SetVersion(buildDate string, commit string, version stri
|
|||||||
pm.commit = commit
|
pm.commit = commit
|
||||||
pm.version = version
|
pm.version = version
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pm *ProxyManager) SetPerfMonitor(m *perf.Monitor) {
|
||||||
|
pm.Lock()
|
||||||
|
defer pm.Unlock()
|
||||||
|
pm.perfMonitor = m
|
||||||
|
}
|
||||||
|
|||||||
+63
-20
@@ -8,9 +8,11 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/mostlygeek/llama-swap/event"
|
"github.com/mostlygeek/llama-swap/event"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/perf"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
@@ -32,6 +34,7 @@ func addApiHandlers(pm *ProxyManager) {
|
|||||||
apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
|
apiGroup.POST("/models/unload/*model", pm.apiUnloadSingleModelHandler)
|
||||||
apiGroup.GET("/events", pm.apiSendEvents)
|
apiGroup.GET("/events", pm.apiSendEvents)
|
||||||
apiGroup.GET("/metrics", pm.apiGetMetrics)
|
apiGroup.GET("/metrics", pm.apiGetMetrics)
|
||||||
|
apiGroup.GET("/performance", pm.apiGetPerformance)
|
||||||
apiGroup.GET("/version", pm.apiGetVersion)
|
apiGroup.GET("/version", pm.apiGetVersion)
|
||||||
apiGroup.GET("/captures/:id", pm.apiGetCapture)
|
apiGroup.GET("/captures/:id", pm.apiGetCapture)
|
||||||
}
|
}
|
||||||
@@ -158,7 +161,7 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sendMetrics := func(metrics []TokenMetrics) {
|
sendMetrics := func(metrics []ActivityLogEntry) {
|
||||||
jsonData, err := json.Marshal(metrics)
|
jsonData, err := json.Marshal(metrics)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
select {
|
select {
|
||||||
@@ -205,8 +208,8 @@ func (pm *ProxyManager) apiSendEvents(c *gin.Context) {
|
|||||||
/**
|
/**
|
||||||
* Send Metrics data
|
* Send Metrics data
|
||||||
*/
|
*/
|
||||||
defer event.On(func(e TokenMetricsEvent) {
|
defer event.On(func(e ActivityLogEvent) {
|
||||||
sendMetrics([]TokenMetrics{e.Metrics})
|
sendMetrics([]ActivityLogEntry{e.Metrics})
|
||||||
})()
|
})()
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -247,6 +250,56 @@ func (pm *ProxyManager) apiGetMetrics(c *gin.Context) {
|
|||||||
c.Data(http.StatusOK, "application/json", jsonData)
|
c.Data(http.StatusOK, "application/json", jsonData)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pm *ProxyManager) prometheusMetricsHandler(c *gin.Context) {
|
||||||
|
if pm.perfMonitor == nil {
|
||||||
|
c.String(http.StatusServiceUnavailable, "# performance monitor not available\n")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pm.perfMonitor.MetricsHandler().ServeHTTP(c.Writer, c.Request)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pm *ProxyManager) apiGetPerformance(c *gin.Context) {
|
||||||
|
if pm.perfMonitor == nil {
|
||||||
|
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "performance monitor not available"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
sysStats, gpuStats := pm.perfMonitor.Current()
|
||||||
|
|
||||||
|
var after time.Time
|
||||||
|
if afterStr := c.Query("after"); afterStr != "" {
|
||||||
|
ts, err := time.Parse(time.RFC3339, afterStr)
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid 'after' timestamp, use RFC3339 format"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
after = ts
|
||||||
|
}
|
||||||
|
|
||||||
|
if !after.IsZero() {
|
||||||
|
filtered := make([]perf.SysStat, 0, len(sysStats))
|
||||||
|
for _, s := range sysStats {
|
||||||
|
if s.Timestamp.After(after) {
|
||||||
|
filtered = append(filtered, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sysStats = filtered
|
||||||
|
|
||||||
|
filteredGpu := make([]perf.GpuStat, 0, len(gpuStats))
|
||||||
|
for _, g := range gpuStats {
|
||||||
|
if g.Timestamp.After(after) {
|
||||||
|
filteredGpu = append(filteredGpu, g)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gpuStats = filteredGpu
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(http.StatusOK, gin.H{
|
||||||
|
"sys_stats": sysStats,
|
||||||
|
"gpu_stats": gpuStats,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) {
|
func (pm *ProxyManager) apiUnloadSingleModelHandler(c *gin.Context) {
|
||||||
requestedModel := strings.TrimPrefix(c.Param("model"), "/")
|
requestedModel := strings.TrimPrefix(c.Param("model"), "/")
|
||||||
realModelName, found := pm.config.RealModelName(requestedModel)
|
realModelName, found := pm.config.RealModelName(requestedModel)
|
||||||
@@ -290,26 +343,16 @@ func (pm *ProxyManager) apiGetCapture(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
data, exists := pm.metricsMonitor.getCompressedBytes(id)
|
capture := pm.metricsMonitor.getCaptureByID(id)
|
||||||
if !exists {
|
if capture == nil || (capture.ReqPath == "" && capture.ReqHeaders == nil && capture.ReqBody == nil && capture.RespHeaders == nil && capture.RespBody == nil) {
|
||||||
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
|
c.JSON(http.StatusNotFound, gin.H{"error": "capture not found"})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
c.Header("Vary", "Accept-Encoding")
|
jsonBytes, err := json.Marshal(capture)
|
||||||
|
if err != nil {
|
||||||
// ¯\_(ツ)_/¯ quality weights are too fancy for us anyway
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to marshal capture"})
|
||||||
hasZstd := strings.Contains(c.GetHeader("Accept-Encoding"), "zstd")
|
return
|
||||||
|
|
||||||
if hasZstd {
|
|
||||||
c.Header("Content-Encoding", "zstd")
|
|
||||||
c.Data(http.StatusOK, "application/json", data)
|
|
||||||
} else {
|
|
||||||
decompressed, err := decompressCapture(data)
|
|
||||||
if err != nil {
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to decompress capture"})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c.Data(http.StatusOK, "application/json", decompressed)
|
|
||||||
}
|
}
|
||||||
|
c.Data(http.StatusOK, "application/json", jsonBytes)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (pm *ProxyManager) sendLogsHandlers(c *gin.Context) {
|
func (pm *ProxyManager) sendLogsHandlers(c *gin.Context) {
|
||||||
@@ -32,6 +33,13 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
|
|||||||
c.Header("X-Accel-Buffering", "no")
|
c.Header("X-Accel-Buffering", "no")
|
||||||
|
|
||||||
logMonitorId := strings.TrimPrefix(c.Param("logMonitorID"), "/")
|
logMonitorId := strings.TrimPrefix(c.Param("logMonitorID"), "/")
|
||||||
|
|
||||||
|
// Handle case where query string might be included in the parameter
|
||||||
|
// (can happen with catch-all routes on some versions/setups)
|
||||||
|
if idx := strings.Index(logMonitorId, "?"); idx != -1 {
|
||||||
|
logMonitorId = logMonitorId[:idx]
|
||||||
|
}
|
||||||
|
|
||||||
logger, err := pm.getLogger(logMonitorId)
|
logger, err := pm.getLogger(logMonitorId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.String(http.StatusBadRequest, err.Error())
|
c.String(http.StatusBadRequest, err.Error())
|
||||||
@@ -82,7 +90,7 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// getLogger searches for the appropriate logger based on the logMonitorId
|
// getLogger searches for the appropriate logger based on the logMonitorId
|
||||||
func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) {
|
func (pm *ProxyManager) getLogger(logMonitorId string) (*logmon.Monitor, error) {
|
||||||
switch logMonitorId {
|
switch logMonitorId {
|
||||||
case "":
|
case "":
|
||||||
// maintain the default
|
// maintain the default
|
||||||
@@ -100,6 +108,12 @@ func (pm *ProxyManager) getLogger(logMonitorId string) (*LogMonitor, error) {
|
|||||||
return process.Logger(), nil
|
return process.Logger(), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// also check the matrix when processGroups doesn't contain the model
|
||||||
|
if pm.matrix != nil {
|
||||||
|
if process, found := pm.matrix.GetProcess(name); found {
|
||||||
|
return process.Logger(), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("invalid logger. Use 'proxy', 'upstream' or a model's ID")
|
return nil, fmt.Errorf("invalid logger. Use 'proxy', 'upstream' or a model's ID")
|
||||||
|
|||||||
@@ -0,0 +1,173 @@
|
|||||||
|
package proxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mostlygeek/llama-swap/proxy/config"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLogMonitorIdQueryParameterStripping(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "upstream without query param",
|
||||||
|
input: "upstream",
|
||||||
|
expected: "upstream",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "upstream with query param",
|
||||||
|
input: "upstream?no-history",
|
||||||
|
expected: "upstream",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "proxy with multiple query params",
|
||||||
|
input: "proxy?no-history&foo=bar",
|
||||||
|
expected: "proxy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "model with slash and query param",
|
||||||
|
input: "author/model?no-history",
|
||||||
|
expected: "author/model",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
// Simulate the query parameter stripping logic
|
||||||
|
logMonitorId := tt.input
|
||||||
|
if idx := strings.Index(logMonitorId, "?"); idx != -1 {
|
||||||
|
logMonitorId = logMonitorId[:idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
if logMonitorId != tt.expected {
|
||||||
|
t.Errorf("Query parameter stripping failed: got %q, want %q", logMonitorId, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestProxyManager_GetLogger_ProcessGroups verifies getLogger resolves the
|
||||||
|
// well-known "proxy"/"upstream" loggers and a model ID managed by processGroups.
|
||||||
|
func TestProxyManager_GetLogger_ProcessGroups(t *testing.T) {
|
||||||
|
cfg := testConfigFromYAML(t, `
|
||||||
|
healthCheckTimeout: 15
|
||||||
|
logLevel: error
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: {{RESPONDER}} --port ${PORT} --silent --respond model1
|
||||||
|
`)
|
||||||
|
pm := New(cfg)
|
||||||
|
defer pm.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
id string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{"proxy", false},
|
||||||
|
{"upstream", false},
|
||||||
|
{"model1", false},
|
||||||
|
{"does-not-exist", true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.id, func(t *testing.T) {
|
||||||
|
logger, err := pm.getLogger(tt.id)
|
||||||
|
if tt.wantErr {
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "invalid logger")
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotNil(t, logger)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestProxyManager_GetLogger_Matrix verifies that getLogger can resolve a model
|
||||||
|
// ID when the proxy is configured with a swap matrix (pm.processGroups is empty
|
||||||
|
// for matrix-managed models).
|
||||||
|
func TestProxyManager_GetLogger_Matrix(t *testing.T) {
|
||||||
|
cfg := config.Config{
|
||||||
|
HealthCheckTimeout: 15,
|
||||||
|
Models: map[string]config.ModelConfig{
|
||||||
|
"model1": getTestSimpleResponderConfig("model1"),
|
||||||
|
"model2": getTestSimpleResponderConfig("model2"),
|
||||||
|
},
|
||||||
|
ExpandedSets: []config.ExpandedSet{
|
||||||
|
{SetName: "s1", Models: []string{"model1", "model2"}},
|
||||||
|
},
|
||||||
|
Matrix: &config.MatrixConfig{},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm := New(cfg)
|
||||||
|
defer pm.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
id string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{"proxy", false},
|
||||||
|
{"upstream", false},
|
||||||
|
{"model1", false},
|
||||||
|
{"model2", false},
|
||||||
|
{"does-not-exist", true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.id, func(t *testing.T) {
|
||||||
|
logger, err := pm.getLogger(tt.id)
|
||||||
|
if tt.wantErr {
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "invalid logger")
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotNil(t, logger)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestProxyManager_StreamLogs_Matrix verifies that /logs/stream/<modelID>
|
||||||
|
// returns 200 (not 400) for a model managed by the swap matrix.
|
||||||
|
func TestProxyManager_StreamLogs_Matrix(t *testing.T) {
|
||||||
|
cfg := config.Config{
|
||||||
|
HealthCheckTimeout: 15,
|
||||||
|
Models: map[string]config.ModelConfig{
|
||||||
|
"matrix-model": getTestSimpleResponderConfig("matrix-model"),
|
||||||
|
},
|
||||||
|
ExpandedSets: []config.ExpandedSet{
|
||||||
|
{SetName: "s1", Models: []string{"matrix-model"}},
|
||||||
|
},
|
||||||
|
Matrix: &config.MatrixConfig{},
|
||||||
|
}
|
||||||
|
|
||||||
|
pm := New(cfg)
|
||||||
|
defer pm.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
req := httptest.NewRequest("GET", "/logs/stream/matrix-model", nil)
|
||||||
|
req = req.WithContext(ctx)
|
||||||
|
rec := CreateTestResponseRecorder()
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
pm.ServeHTTP(rec, req)
|
||||||
|
}()
|
||||||
|
|
||||||
|
<-ctx.Done()
|
||||||
|
<-done
|
||||||
|
|
||||||
|
assert.Equal(t, 200, rec.Code)
|
||||||
|
}
|
||||||
@@ -1721,3 +1721,161 @@ models:
|
|||||||
assert.Contains(t, w.Body.String(), "could not find suitable handler")
|
assert.Contains(t, w.Body.String(), "could not find suitable handler")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_AudioTranscriptionCapture(t *testing.T) {
|
||||||
|
cfg := testConfigFromYAML(t, `
|
||||||
|
healthCheckTimeout: 15
|
||||||
|
logLevel: error
|
||||||
|
captureBuffer: 5
|
||||||
|
models:
|
||||||
|
TheExpectedModel:
|
||||||
|
cmd: {{RESPONDER}} --port ${PORT} --silent --respond TheExpectedModel
|
||||||
|
`)
|
||||||
|
|
||||||
|
proxy := New(cfg)
|
||||||
|
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||||
|
injectTestHandlers(proxy, nil)
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
w := multipart.NewWriter(&b)
|
||||||
|
|
||||||
|
fw, err := w.CreateFormField("model")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
_, err = fw.Write([]byte("TheExpectedModel"))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
fw, err = w.CreateFormFile("file", "test.mp3")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
_, err = fw.Write([]byte("test audio content"))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
w.Close()
|
||||||
|
|
||||||
|
req := httptest.NewRequest("POST", "/v1/audio/transcriptions", &b)
|
||||||
|
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||||
|
req.Header.Set("Authorization", "Bearer mysecret")
|
||||||
|
req.Header.Set("X-Custom-Req", "req-value")
|
||||||
|
rec := CreateTestResponseRecorder()
|
||||||
|
proxy.ServeHTTP(rec, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusOK, rec.Code)
|
||||||
|
|
||||||
|
// Verify capture exists
|
||||||
|
metrics := proxy.metricsMonitor.getMetrics()
|
||||||
|
assert.Equal(t, 1, len(metrics))
|
||||||
|
assert.True(t, metrics[0].HasCapture)
|
||||||
|
|
||||||
|
capture := proxy.metricsMonitor.getCaptureByID(metrics[0].ID)
|
||||||
|
assert.NotNil(t, capture)
|
||||||
|
|
||||||
|
// Should capture request headers (sensitive ones redacted)
|
||||||
|
assert.NotEmpty(t, capture.ReqHeaders)
|
||||||
|
assert.Equal(t, "[REDACTED]", capture.ReqHeaders["Authorization"])
|
||||||
|
assert.Equal(t, "req-value", capture.ReqHeaders["X-Custom-Req"])
|
||||||
|
|
||||||
|
// Should capture response headers
|
||||||
|
assert.NotNil(t, capture.RespHeaders)
|
||||||
|
|
||||||
|
// Should NOT capture request bodies but get response bodies (text
|
||||||
|
assert.Nil(t, capture.ReqBody)
|
||||||
|
assert.NotNil(t, capture.RespBody)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_VersionlessEndpoints_LocalModel(t *testing.T) {
|
||||||
|
cfg := testConfigFromYAML(t, `
|
||||||
|
healthCheckTimeout: 15
|
||||||
|
logLevel: error
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: {{RESPONDER}} --port ${PORT} --silent --respond model1
|
||||||
|
`)
|
||||||
|
|
||||||
|
proxy := New(cfg)
|
||||||
|
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||||
|
injectTestHandlers(proxy, nil)
|
||||||
|
|
||||||
|
endpoints := []string{
|
||||||
|
"/v/chat/completions",
|
||||||
|
"/v/responses",
|
||||||
|
"/v/completions",
|
||||||
|
"/v/embeddings",
|
||||||
|
"/v/rerank",
|
||||||
|
"/v/reranking",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, endpoint := range endpoints {
|
||||||
|
t.Run(endpoint, func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", endpoint, bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "model1")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("/v/messages", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"model1","messages":[{"role":"user","content":"hi"}]}`
|
||||||
|
req := httptest.NewRequest("POST", "/v/messages", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "model1")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyManager_VersionlessEndpoints_PeerModel(t *testing.T) {
|
||||||
|
peerServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
fmt.Fprintf(w, `{"endpoint":"%s","model":"peer-model"}`, r.URL.Path)
|
||||||
|
}))
|
||||||
|
defer peerServer.Close()
|
||||||
|
|
||||||
|
cfg := testConfigFromYAML(t, fmt.Sprintf(`
|
||||||
|
healthCheckTimeout: 15
|
||||||
|
logLevel: error
|
||||||
|
peers:
|
||||||
|
test-peer:
|
||||||
|
proxy: %s
|
||||||
|
models:
|
||||||
|
- peer-model
|
||||||
|
models:
|
||||||
|
local-model:
|
||||||
|
cmd: {{RESPONDER}} --port ${PORT} --silent --respond local-model
|
||||||
|
`, peerServer.URL))
|
||||||
|
|
||||||
|
proxy := New(cfg)
|
||||||
|
defer proxy.StopProcesses(StopImmediately)
|
||||||
|
|
||||||
|
endpoints := []struct {
|
||||||
|
path string
|
||||||
|
wantSuffix string
|
||||||
|
}{
|
||||||
|
{"/v/chat/completions", "/chat/completions"},
|
||||||
|
{"/v/responses", "/responses"},
|
||||||
|
{"/v/completions", "/completions"},
|
||||||
|
{"/v/embeddings", "/embeddings"},
|
||||||
|
{"/v/rerank", "/rerank"},
|
||||||
|
{"/v/reranking", "/reranking"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, ep := range endpoints {
|
||||||
|
t.Run(ep.path, func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"peer-model"}`
|
||||||
|
req := httptest.NewRequest("POST", ep.path, bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), ep.wantSuffix)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("/v/messages", func(t *testing.T) {
|
||||||
|
reqBody := `{"model":"peer-model","messages":[{"role":"user","content":"hi"}]}`
|
||||||
|
req := httptest.NewRequest("POST", "/v/messages", bytes.NewBufferString(reqBody))
|
||||||
|
w := CreateTestResponseRecorder()
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "/messages")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
Generated
+30
-11
@@ -8,6 +8,7 @@
|
|||||||
"name": "ui-svelte",
|
"name": "ui-svelte",
|
||||||
"version": "0.0.0",
|
"version": "0.0.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"chart.js": "4.5.1",
|
||||||
"highlight.js": "^11.11.1",
|
"highlight.js": "^11.11.1",
|
||||||
"katex": "^0.16.28",
|
"katex": "^0.16.28",
|
||||||
"lucide-svelte": "^0.563.0",
|
"lucide-svelte": "^0.563.0",
|
||||||
@@ -120,6 +121,12 @@
|
|||||||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@kurkle/color": {
|
||||||
|
"version": "0.3.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.4.tgz",
|
||||||
|
"integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/@napi-rs/wasm-runtime": {
|
"node_modules/@napi-rs/wasm-runtime": {
|
||||||
"version": "1.1.3",
|
"version": "1.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.3.tgz",
|
||||||
@@ -1096,6 +1103,18 @@
|
|||||||
"url": "https://github.com/sponsors/wooorm"
|
"url": "https://github.com/sponsors/wooorm"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/chart.js": {
|
||||||
|
"version": "4.5.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.1.tgz",
|
||||||
|
"integrity": "sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@kurkle/color": "^0.3.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"pnpm": ">=8"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/chokidar": {
|
"node_modules/chokidar": {
|
||||||
"version": "4.0.3",
|
"version": "4.0.3",
|
||||||
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz",
|
||||||
@@ -1208,9 +1227,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/devalue": {
|
"node_modules/devalue": {
|
||||||
"version": "5.6.4",
|
"version": "5.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/devalue/-/devalue-5.6.4.tgz",
|
"resolved": "https://registry.npmjs.org/devalue/-/devalue-5.8.1.tgz",
|
||||||
"integrity": "sha512-Gp6rDldRsFh/7XuouDbxMH3Mx8GMCcgzIb1pDTvNyn8pZGQ22u+Wa+lGV9dQCltFQ7uVw0MhRyb8XDskNFOReA==",
|
"integrity": "sha512-4CXDYRBGqN+57wVJkuXBYmpAVUSg3L6JAQa/DFqm238G73E1wuyc/JhGQJzN7vUf/CMphYau2zXbfWzDR5aTEw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
@@ -2788,9 +2807,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/postcss": {
|
"node_modules/postcss": {
|
||||||
"version": "8.5.8",
|
"version": "8.5.12",
|
||||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz",
|
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz",
|
||||||
"integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==",
|
"integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
@@ -3068,9 +3087,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/svelte": {
|
"node_modules/svelte": {
|
||||||
"version": "5.53.11",
|
"version": "5.55.7",
|
||||||
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.53.11.tgz",
|
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.55.7.tgz",
|
||||||
"integrity": "sha512-GYmqRjRhJYLQBonfdfGAt28gkfWEShrtXKGXcFGneXi502aBE+I1dJcs/YQriByvP6xqXRz/OdBGC6tfvUQHyQ==",
|
"integrity": "sha512-ymI5ykLPwIHW839E053FQbI1G+jnRFJEw3Kv5Y4njixVWywQBx+NUFpkkKyk5LIb36Fg9DVXSYpqiGekLD0hyw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
@@ -3083,9 +3102,9 @@
|
|||||||
"aria-query": "5.3.1",
|
"aria-query": "5.3.1",
|
||||||
"axobject-query": "^4.1.0",
|
"axobject-query": "^4.1.0",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"devalue": "^5.6.3",
|
"devalue": "^5.8.1",
|
||||||
"esm-env": "^1.2.1",
|
"esm-env": "^1.2.1",
|
||||||
"esrap": "^2.2.2",
|
"esrap": "^2.2.4",
|
||||||
"is-reference": "^3.0.3",
|
"is-reference": "^3.0.3",
|
||||||
"locate-character": "^3.0.0",
|
"locate-character": "^3.0.0",
|
||||||
"magic-string": "^0.30.11",
|
"magic-string": "^0.30.11",
|
||||||
|
|||||||
@@ -35,6 +35,7 @@
|
|||||||
"remark-math": "^6.0.0",
|
"remark-math": "^6.0.0",
|
||||||
"remark-parse": "^11.0.0",
|
"remark-parse": "^11.0.0",
|
||||||
"remark-rehype": "^11.1.2",
|
"remark-rehype": "^11.1.2",
|
||||||
|
"chart.js": "4.5.1",
|
||||||
"svelte-spa-router": "^4.0.1",
|
"svelte-spa-router": "^4.0.1",
|
||||||
"unified": "^11.0.5",
|
"unified": "^11.0.5",
|
||||||
"unist-util-visit": "^5.1.0"
|
"unist-util-visit": "^5.1.0"
|
||||||
|
|||||||
@@ -5,10 +5,11 @@
|
|||||||
import LogViewer from "./routes/LogViewer.svelte";
|
import LogViewer from "./routes/LogViewer.svelte";
|
||||||
import Models from "./routes/Models.svelte";
|
import Models from "./routes/Models.svelte";
|
||||||
import Activity from "./routes/Activity.svelte";
|
import Activity from "./routes/Activity.svelte";
|
||||||
|
import Performance from "./routes/Performance.svelte";
|
||||||
import Playground from "./routes/Playground.svelte";
|
import Playground from "./routes/Playground.svelte";
|
||||||
import PlaygroundStub from "./routes/PlaygroundStub.svelte";
|
import PlaygroundStub from "./routes/PlaygroundStub.svelte";
|
||||||
import { enableAPIEvents } from "./stores/api";
|
import { enableAPIEvents } from "./stores/api";
|
||||||
import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
import { initScreenWidth, initSystemThemeListener, isDarkMode, appTitle, connectionState } from "./stores/theme";
|
||||||
import { currentRoute } from "./stores/route";
|
import { currentRoute } from "./stores/route";
|
||||||
|
|
||||||
const routes = {
|
const routes = {
|
||||||
@@ -16,6 +17,7 @@
|
|||||||
"/models": Models,
|
"/models": Models,
|
||||||
"/logs": LogViewer,
|
"/logs": LogViewer,
|
||||||
"/activity": Activity,
|
"/activity": Activity,
|
||||||
|
"/performance": Performance,
|
||||||
"*": PlaygroundStub,
|
"*": PlaygroundStub,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -35,10 +37,12 @@
|
|||||||
|
|
||||||
onMount(() => {
|
onMount(() => {
|
||||||
const cleanupScreenWidth = initScreenWidth();
|
const cleanupScreenWidth = initScreenWidth();
|
||||||
|
const cleanupSystemTheme = initSystemThemeListener();
|
||||||
enableAPIEvents(true);
|
enableAPIEvents(true);
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
cleanupScreenWidth();
|
cleanupScreenWidth();
|
||||||
|
cleanupSystemTheme();
|
||||||
enableAPIEvents(false);
|
enableAPIEvents(false);
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { inFlightRequests, metrics } from "../stores/api";
|
||||||
|
import { persistentStore } from "../stores/persistent";
|
||||||
|
import { calculateHistogramData } from "../lib/histogram";
|
||||||
|
import TokenHistogram from "./TokenHistogram.svelte";
|
||||||
|
|
||||||
|
const nf = new Intl.NumberFormat();
|
||||||
|
const histogramCollapsed = persistentStore<boolean>("activity-histogram-collapsed", false);
|
||||||
|
|
||||||
|
let stats = $derived.by(() => {
|
||||||
|
const totalRequests = $metrics.length;
|
||||||
|
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.tokens.input_tokens, 0);
|
||||||
|
const totalOutputTokens = $metrics.reduce((sum, m) => sum + m.tokens.output_tokens, 0);
|
||||||
|
const totalCacheTokens = $metrics.reduce((sum, m) => sum + m.tokens.cache_tokens, 0);
|
||||||
|
|
||||||
|
const promptPerSecond = $metrics.filter((m) => m.tokens.prompt_per_second > 0).map((m) => m.tokens.prompt_per_second);
|
||||||
|
|
||||||
|
const tokensPerSecond = $metrics.filter((m) => m.tokens.tokens_per_second > 0).map((m) => m.tokens.tokens_per_second);
|
||||||
|
|
||||||
|
const promptHistogramData =
|
||||||
|
promptPerSecond.length > 0 ? calculateHistogramData(promptPerSecond) : null;
|
||||||
|
|
||||||
|
const genHistogramData =
|
||||||
|
tokensPerSecond.length > 0 ? calculateHistogramData(tokensPerSecond) : null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalRequests,
|
||||||
|
totalInputTokens,
|
||||||
|
totalOutputTokens,
|
||||||
|
totalCacheTokens,
|
||||||
|
inFlightRequests: $inFlightRequests,
|
||||||
|
promptHistogramData,
|
||||||
|
genHistogramData,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="card relative p-3">
|
||||||
|
<button
|
||||||
|
class="absolute top-2 right-2 w-6 h-6 flex items-center justify-center rounded-full border border-gray-300 dark:border-gray-600 text-gray-400 dark:text-gray-500 hover:text-gray-600 dark:hover:text-gray-300 hover:border-gray-400 dark:hover:border-gray-400 transition-colors"
|
||||||
|
onclick={() => ($histogramCollapsed = !$histogramCollapsed)}
|
||||||
|
title={$histogramCollapsed ? "Show histograms" : "Hide histograms"}
|
||||||
|
>
|
||||||
|
{#if $histogramCollapsed}
|
||||||
|
<svg class="w-3.5 h-3.5" viewBox="0 0 16 16" fill="currentColor">
|
||||||
|
<path d="M4.5 6l3.5 4 3.5-4H4.5z" />
|
||||||
|
</svg>
|
||||||
|
{:else}
|
||||||
|
<svg class="w-3 h-3" viewBox="0 0 16 16" fill="currentColor">
|
||||||
|
<path d="M3.5 3.5l9 9M12.5 3.5l-9 9" stroke="currentColor" stroke-width="2" stroke-linecap="round" fill="none" />
|
||||||
|
</svg>
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
{#if !$histogramCollapsed}
|
||||||
|
<div class="flex flex-col sm:flex-row gap-6 mb-3">
|
||||||
|
<div class="w-full sm:w-1/2 min-w-0">
|
||||||
|
<div class="text-sm font-medium text-gray-500 dark:text-gray-400 mb-1">Prompt Processing</div>
|
||||||
|
{#if stats.promptHistogramData}
|
||||||
|
<TokenHistogram
|
||||||
|
data={stats.promptHistogramData}
|
||||||
|
unit="prompt tokens/sec"
|
||||||
|
colorClass="text-amber-500 dark:text-amber-400"
|
||||||
|
/>
|
||||||
|
{:else}
|
||||||
|
<div class="py-6 text-center text-sm text-gray-500 dark:text-gray-400">No prompt speed data yet</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<div class="w-full sm:w-1/2 min-w-0">
|
||||||
|
<div class="text-sm font-medium text-gray-500 dark:text-gray-400 mb-1">Token Generation</div>
|
||||||
|
{#if stats.genHistogramData}
|
||||||
|
<TokenHistogram data={stats.genHistogramData} unit="tokens/sec" />
|
||||||
|
{:else}
|
||||||
|
<div class="py-6 text-center text-sm text-gray-500 dark:text-gray-400">No generation speed data yet</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
<div class="grid grid-cols-4 gap-x-6 gap-y-1 text-sm">
|
||||||
|
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Requests</div>
|
||||||
|
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Cached</div>
|
||||||
|
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Processed</div>
|
||||||
|
<div class="text-xs uppercase tracking-wider text-gray-500 dark:text-gray-400">Generated</div>
|
||||||
|
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||||
|
<span class="font-semibold">{nf.format(stats.totalRequests)}</span> completed,
|
||||||
|
<span class="font-semibold">{nf.format(stats.inFlightRequests)}</span> waiting
|
||||||
|
</div>
|
||||||
|
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||||
|
<span class="font-semibold">{nf.format(stats.totalCacheTokens)}</span> tokens
|
||||||
|
</div>
|
||||||
|
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||||
|
<span class="font-semibold">{nf.format(stats.totalInputTokens)}</span> tokens
|
||||||
|
</div>
|
||||||
|
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||||
|
<span class="font-semibold">{nf.format(stats.totalOutputTokens)}</span> tokens
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
@@ -106,6 +106,7 @@
|
|||||||
const delta = parsed.choices?.[0]?.delta;
|
const delta = parsed.choices?.[0]?.delta;
|
||||||
if (delta?.content) result.content += delta.content;
|
if (delta?.content) result.content += delta.content;
|
||||||
if (delta?.reasoning_content) result.reasoning += delta.reasoning_content;
|
if (delta?.reasoning_content) result.reasoning += delta.reasoning_content;
|
||||||
|
if (delta?.reasoning) result.reasoning += delta.reasoning;
|
||||||
} catch {
|
} catch {
|
||||||
// skip unparseable lines
|
// skip unparseable lines
|
||||||
}
|
}
|
||||||
@@ -426,6 +427,14 @@
|
|||||||
<button onclick={() => dialogEl?.close()} class="btn"> Close </button>
|
<button onclick={() => dialogEl?.close()} class="btn"> Close </button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div class="flex flex-col items-center justify-center p-12">
|
||||||
|
<p class="text-lg text-txtsecondary">Capture not found</p>
|
||||||
|
<p class="text-sm text-txtsecondary mt-1">The capture may have expired or was never recorded.</p>
|
||||||
|
<div class="mt-4">
|
||||||
|
<button onclick={() => dialogEl?.close()} class="btn">Close</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</dialog>
|
</dialog>
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { link } from "svelte-spa-router";
|
import { link } from "svelte-spa-router";
|
||||||
import { screenWidth, toggleTheme, isDarkMode, appTitle, isNarrow } from "../stores/theme";
|
import { screenWidth, toggleTheme, themeMode, appTitle, isNarrow } from "../stores/theme";
|
||||||
import { currentRoute } from "../stores/route";
|
import { currentRoute } from "../stores/route";
|
||||||
import { playgroundActivity } from "../stores/playgroundActivity";
|
import { playgroundActivity } from "../stores/playgroundActivity";
|
||||||
import ConnectionStatus from "./ConnectionStatus.svelte";
|
import ConnectionStatus from "./ConnectionStatus.svelte";
|
||||||
@@ -50,7 +50,7 @@
|
|||||||
<a
|
<a
|
||||||
href="/"
|
href="/"
|
||||||
use:link
|
use:link
|
||||||
class="p-1 whitespace-nowrap {isActive('/', $currentRoute) ? 'font-semibold' : ''} {$playgroundActivity ? 'activity-link' : 'text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100'}"
|
class="p-1 whitespace-nowrap {isActive('/', $currentRoute) ? 'font-semibold underline underline-offset-4' : ''} {$playgroundActivity ? 'activity-link' : 'text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100'}"
|
||||||
>
|
>
|
||||||
Playground
|
Playground
|
||||||
</a>
|
</a>
|
||||||
@@ -59,6 +59,8 @@
|
|||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/models", $currentRoute)}
|
class:font-semibold={isActive("/models", $currentRoute)}
|
||||||
|
class:underline={isActive("/models", $currentRoute)}
|
||||||
|
class:underline-offset-4={isActive("/models", $currentRoute)}
|
||||||
>
|
>
|
||||||
Models
|
Models
|
||||||
</a>
|
</a>
|
||||||
@@ -67,6 +69,8 @@
|
|||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/activity", $currentRoute)}
|
class:font-semibold={isActive("/activity", $currentRoute)}
|
||||||
|
class:underline={isActive("/activity", $currentRoute)}
|
||||||
|
class:underline-offset-4={isActive("/activity", $currentRoute)}
|
||||||
>
|
>
|
||||||
Activity
|
Activity
|
||||||
</a>
|
</a>
|
||||||
@@ -75,22 +79,39 @@
|
|||||||
use:link
|
use:link
|
||||||
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
class:font-semibold={isActive("/logs", $currentRoute)}
|
class:font-semibold={isActive("/logs", $currentRoute)}
|
||||||
|
class:underline={isActive("/logs", $currentRoute)}
|
||||||
|
class:underline-offset-4={isActive("/logs", $currentRoute)}
|
||||||
>
|
>
|
||||||
Logs
|
Logs
|
||||||
</a>
|
</a>
|
||||||
<button onclick={toggleTheme} title="Toggle theme">
|
<a
|
||||||
{#if $isDarkMode}
|
href="/performance"
|
||||||
|
use:link
|
||||||
|
class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1 whitespace-nowrap"
|
||||||
|
class:font-semibold={isActive("/performance", $currentRoute)}
|
||||||
|
class:underline={isActive("/performance", $currentRoute)}
|
||||||
|
class:underline-offset-4={isActive("/performance", $currentRoute)}
|
||||||
|
>
|
||||||
|
Performance
|
||||||
|
</a>
|
||||||
|
<button onclick={toggleTheme} title="Toggle theme (current: {$themeMode})">
|
||||||
|
{#if $themeMode === "system"}
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
|
<path d="M0,9c0-.552,.448-1,1-1H3.108c.147-.874,.472-1.721,1.006-2.471l-1.478-1.478c-.391-.391-.391-1.023,0-1.414s1.023-.391,1.414,0l1.478,1.478c.751-.534,1.598-.859,2.471-1.006V1c0-.552,.448-1,1-1s1,.448,1,1V3.108c.874,.147,1.725,.466,2.477,1.001l1.473-1.473c.391-.391,1.023-.391,1.414,0s.391,1.023,0,1.414L3.963,15.45c-.195,.195-.451,.293-.707,.293s-.512-.098-.707-.293c-.391-.391-.391-1.023,0-1.414l1.56-1.56c-.535-.751-.854-1.602-1.001-2.477H1c-.552,0-1-.448-1-1ZM23.707,.293c-.391-.391-1.023-.391-1.414,0L.293,22.293c-.391,.391-.391,1.023,0,1.414,.195,.195,.451,.293,.707,.293s.512-.098,.707-.293L23.707,1.707c.391-.391,.391-1.023,0-1.414Zm-.283,10.954c.32-.15,.538-.458,.572-.81,.034-.353-.121-.696-.407-.904-.858-.625-1.833-1.066-2.897-1.315-.335-.078-.69,.022-.934,.267l-8.392,8.391c-.244,.244-.345,.597-.267,.933,.843,3.646,4.047,6.191,7.792,6.191,1.695,0,3.32-.53,4.697-1.533,.286-.208,.441-.553,.407-.904-.034-.353-.251-.66-.572-.811-1.842-.861-3.033-2.727-3.033-4.752s1.19-3.891,3.033-4.753Z"/>
|
||||||
|
</svg>
|
||||||
|
{:else if $themeMode === "light"}
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
<path
|
<path
|
||||||
fill-rule="evenodd"
|
fill-rule="evenodd"
|
||||||
d="M9.528 1.718a.75.75 0 0 1 .162.819A8.97 8.97 0 0 0 9 6a9 9 0 0 0 9 9 8.97 8.97 0 0 0 3.463-.69.75.75 0 0 1 .981.98 10.503 10.503 0 0 1-9.694 6.46c-5.799 0-10.5-4.7-10.5-10.5 0-4.368 2.667-8.112 6.46-9.694a.75.75 0 0 1 .818.162Z"
|
d="M12 2.25a.75.75 0 0 1 .75.75v2.25a.75.75 0 0 1-1.5 0V3a.75.75 0 0 1 .75-.75ZM7.5 12a4.5 4.5 0 1 1 9 0 4.5 4.5 0 0 1-9 0ZM18.894 6.166a.75.75 0 0 0-1.06-1.06l-1.591 1.59a.75.75 0 1 0 1.06 1.061l1.591-1.59ZM21.75 12a.75.75 0 0 1-.75.75h-2.25a.75.75 0 0 1 0-1.5H21a.75.75 0 0 1 .75.75ZM17.834 18.894a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 1 0-1.061 1.06l1.591 1.591ZM12 18a.75.75 0 0 1 .75.75V21a.75.75 0 0 1-1.5 0v-2.25A.75.75 0 0 1 12 18ZM7.758 17.303a.75.75 0 0 0-1.061-1.06l-1.591 1.59a.75.75 0 0 0 1.06 1.061l1.591-1.59ZM6 12a.75.75 0 0 1-.75.75H3a.75.75 0 0 1 0-1.5h2.25A.75.75 0 0 1 6 12ZM6.697 7.757a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 0 0-1.061 1.06l1.59 1.591Z"
|
||||||
clip-rule="evenodd"
|
|
||||||
/>
|
/>
|
||||||
</svg>
|
</svg>
|
||||||
{:else}
|
{:else}
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
|
||||||
<path
|
<path
|
||||||
d="M12 2.25a.75.75 0 0 1 .75.75v2.25a.75.75 0 0 1-1.5 0V3a.75.75 0 0 1 .75-.75ZM7.5 12a4.5 4.5 0 1 1 9 0 4.5 4.5 0 0 1-9 0ZM18.894 6.166a.75.75 0 0 0-1.06-1.06l-1.591 1.59a.75.75 0 1 0 1.06 1.061l1.591-1.59ZM21.75 12a.75.75 0 0 1-.75.75h-2.25a.75.75 0 0 1 0-1.5H21a.75.75 0 0 1 .75.75ZM17.834 18.894a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 1 0-1.061 1.06l1.591 1.591ZM12 18a.75.75 0 0 1 .75.75V21a.75.75 0 0 1-1.5 0v-2.25A.75.75 0 0 1 12 18ZM7.758 17.303a.75.75 0 0 0-1.061-1.06l-1.591 1.59a.75.75 0 0 0 1.06 1.061l1.591-1.59ZM6 12a.75.75 0 0 1-.75.75H3a.75.75 0 0 1 0-1.5h2.25A.75.75 0 0 1 6 12ZM6.697 7.757a.75.75 0 0 0 1.06-1.06l-1.59-1.591a.75.75 0 0 0-1.061 1.06l1.59 1.591Z"
|
fill-rule="evenodd"
|
||||||
|
d="M9.528 1.718a.75.75 0 0 1 .162.819A8.97 8.97 0 0 0 9 6a9 9 0 0 0 9 9 8.97 8.97 0 0 0 3.463-.69.75.75 0 0 1 .981.98 10.503 10.503 0 0 1-9.694 6.46c-5.799 0-10.5-4.7-10.5-10.5 0-4.368 2.667-8.112 6.46-9.694a.75.75 0 0 1 .818.162Z"
|
||||||
|
clip-rule="evenodd"
|
||||||
/>
|
/>
|
||||||
</svg>
|
</svg>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -0,0 +1,148 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from "svelte";
|
||||||
|
import { Chart, registerables } from "chart.js";
|
||||||
|
import { isDarkMode } from "../stores/theme";
|
||||||
|
|
||||||
|
Chart.register(...registerables);
|
||||||
|
|
||||||
|
interface Dataset {
|
||||||
|
label: string;
|
||||||
|
data: number[];
|
||||||
|
borderColor: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
title: string;
|
||||||
|
labels: string[];
|
||||||
|
datasets: Dataset[];
|
||||||
|
yMin?: number;
|
||||||
|
yMax?: number;
|
||||||
|
yLabel?: string;
|
||||||
|
showLegend?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { title, labels, datasets, yMin, yMax, yLabel, showLegend = true }: Props = $props();
|
||||||
|
|
||||||
|
let canvas: HTMLCanvasElement;
|
||||||
|
let chart: Chart;
|
||||||
|
|
||||||
|
function getChartColors(dark: boolean) {
|
||||||
|
return {
|
||||||
|
grid: dark ? "rgba(255,255,255,0.08)" : "rgba(0,0,0,0.08)",
|
||||||
|
tick: dark ? "#9ca3af" : "#6b7280",
|
||||||
|
legend: dark ? "#d1d5db" : "#374151",
|
||||||
|
tooltipBg: dark ? "#1f2937" : "#ffffff",
|
||||||
|
tooltipText: dark ? "#f3f4f6" : "#111827",
|
||||||
|
tooltipBorder: dark ? "#374151" : "#e5e7eb",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildOptions(dark: boolean) {
|
||||||
|
const colors = getChartColors(dark);
|
||||||
|
return {
|
||||||
|
responsive: true,
|
||||||
|
maintainAspectRatio: false,
|
||||||
|
animation: false as const,
|
||||||
|
interaction: {
|
||||||
|
mode: "index" as const,
|
||||||
|
intersect: false,
|
||||||
|
},
|
||||||
|
plugins: {
|
||||||
|
legend: {
|
||||||
|
display: showLegend,
|
||||||
|
position: "top" as const,
|
||||||
|
labels: {
|
||||||
|
color: colors.legend,
|
||||||
|
usePointStyle: true,
|
||||||
|
pointStyle: "circle" as const,
|
||||||
|
padding: 12,
|
||||||
|
font: { size: 11 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
title: {
|
||||||
|
display: true,
|
||||||
|
text: title,
|
||||||
|
color: colors.legend,
|
||||||
|
font: { size: 14, weight: "bold" as const },
|
||||||
|
},
|
||||||
|
tooltip: {
|
||||||
|
backgroundColor: colors.tooltipBg,
|
||||||
|
titleColor: colors.tooltipText,
|
||||||
|
bodyColor: colors.tooltipText,
|
||||||
|
borderColor: colors.tooltipBorder,
|
||||||
|
borderWidth: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
scales: {
|
||||||
|
x: {
|
||||||
|
bounds: "data" as const,
|
||||||
|
offset: false,
|
||||||
|
ticks: { color: colors.tick, maxRotation: 0, font: { size: 10 }, maxTicksLimit: 10 },
|
||||||
|
grid: { color: colors.grid },
|
||||||
|
},
|
||||||
|
y: {
|
||||||
|
min: yMin,
|
||||||
|
max: yMax,
|
||||||
|
ticks: { color: colors.tick, font: { size: 10 } },
|
||||||
|
grid: { color: colors.grid },
|
||||||
|
title: yLabel
|
||||||
|
? { display: true, text: yLabel, color: colors.tick }
|
||||||
|
: undefined,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
chart = new Chart(canvas, {
|
||||||
|
type: "line",
|
||||||
|
data: {
|
||||||
|
labels: [...labels],
|
||||||
|
datasets: datasets.map((ds) => ({
|
||||||
|
label: ds.label,
|
||||||
|
data: [...ds.data],
|
||||||
|
borderColor: ds.borderColor,
|
||||||
|
backgroundColor: ds.borderColor + "20",
|
||||||
|
borderWidth: 1.5,
|
||||||
|
pointRadius: 0,
|
||||||
|
tension: 0.4,
|
||||||
|
fill: false,
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
options: buildOptions($isDarkMode),
|
||||||
|
});
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
chart.destroy();
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
if (!chart) return;
|
||||||
|
const _dark = $isDarkMode;
|
||||||
|
chart.options = buildOptions(_dark);
|
||||||
|
chart.update("none");
|
||||||
|
});
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
if (!chart) return;
|
||||||
|
const _l = labels;
|
||||||
|
const _d = datasets;
|
||||||
|
chart.data.labels = [..._l];
|
||||||
|
chart.data.datasets = _d.map((ds) => ({
|
||||||
|
label: ds.label,
|
||||||
|
data: [...ds.data],
|
||||||
|
borderColor: ds.borderColor,
|
||||||
|
backgroundColor: ds.borderColor + "20",
|
||||||
|
borderWidth: 1.5,
|
||||||
|
pointRadius: 0,
|
||||||
|
tension: 0.4,
|
||||||
|
fill: false,
|
||||||
|
}));
|
||||||
|
chart.update("none");
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="card p-4 h-[300px]">
|
||||||
|
<canvas bind:this={canvas}></canvas>
|
||||||
|
</div>
|
||||||
@@ -1,167 +0,0 @@
|
|||||||
<script lang="ts">
|
|
||||||
import { inFlightRequests, metrics } from "../stores/api";
|
|
||||||
import TokenHistogram from "./TokenHistogram.svelte";
|
|
||||||
|
|
||||||
interface HistogramData {
|
|
||||||
bins: number[];
|
|
||||||
min: number;
|
|
||||||
max: number;
|
|
||||||
binSize: number;
|
|
||||||
p99: number;
|
|
||||||
p95: number;
|
|
||||||
p50: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
let stats = $derived.by(() => {
|
|
||||||
const totalRequests = $metrics.length;
|
|
||||||
if (totalRequests === 0) {
|
|
||||||
return {
|
|
||||||
totalRequests: 0,
|
|
||||||
totalInputTokens: 0,
|
|
||||||
totalOutputTokens: 0,
|
|
||||||
inFlightRequests: $inFlightRequests,
|
|
||||||
tokenStats: { p99: "0", p95: "0", p50: "0" },
|
|
||||||
histogramData: null,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const totalInputTokens = $metrics.reduce((sum, m) => sum + m.input_tokens, 0);
|
|
||||||
const totalOutputTokens = $metrics.reduce((sum, m) => sum + m.output_tokens, 0);
|
|
||||||
|
|
||||||
// Calculate token statistics using output_tokens and duration_ms
|
|
||||||
const validMetrics = $metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0);
|
|
||||||
if (validMetrics.length === 0) {
|
|
||||||
return {
|
|
||||||
totalRequests,
|
|
||||||
totalInputTokens,
|
|
||||||
totalOutputTokens,
|
|
||||||
inFlightRequests: $inFlightRequests,
|
|
||||||
tokenStats: { p99: "0", p95: "0", p50: "0" },
|
|
||||||
histogramData: null,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate tokens/second for each valid metric
|
|
||||||
const tokensPerSecond = validMetrics.map((m) => m.output_tokens / (m.duration_ms / 1000));
|
|
||||||
|
|
||||||
// Sort for percentile calculation
|
|
||||||
const sortedTokensPerSecond = [...tokensPerSecond].sort((a, b) => a - b);
|
|
||||||
|
|
||||||
const p99 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.99)];
|
|
||||||
const p95 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.95)];
|
|
||||||
const p50 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.5)];
|
|
||||||
|
|
||||||
// Create histogram data
|
|
||||||
const min = Math.min(...tokensPerSecond);
|
|
||||||
const max = Math.max(...tokensPerSecond);
|
|
||||||
const binCount = Math.min(30, Math.max(10, Math.floor(tokensPerSecond.length / 5)));
|
|
||||||
const binSize = (max - min) / binCount;
|
|
||||||
|
|
||||||
const bins = Array(binCount).fill(0);
|
|
||||||
tokensPerSecond.forEach((value) => {
|
|
||||||
const binIndex = Math.min(Math.floor((value - min) / binSize), binCount - 1);
|
|
||||||
bins[binIndex]++;
|
|
||||||
});
|
|
||||||
|
|
||||||
const histogramData: HistogramData = {
|
|
||||||
bins,
|
|
||||||
min,
|
|
||||||
max,
|
|
||||||
binSize,
|
|
||||||
p99,
|
|
||||||
p95,
|
|
||||||
p50,
|
|
||||||
};
|
|
||||||
|
|
||||||
return {
|
|
||||||
totalRequests,
|
|
||||||
totalInputTokens,
|
|
||||||
totalOutputTokens,
|
|
||||||
inFlightRequests: $inFlightRequests,
|
|
||||||
tokenStats: {
|
|
||||||
p99: p99.toFixed(2),
|
|
||||||
p95: p95.toFixed(2),
|
|
||||||
p50: p50.toFixed(2),
|
|
||||||
},
|
|
||||||
histogramData,
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
const nf = new Intl.NumberFormat();
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<div class="card">
|
|
||||||
<div class="rounded-lg overflow-hidden border border-card-border-inner">
|
|
||||||
<table class="min-w-full divide-y divide-card-border-inner">
|
|
||||||
<thead class="bg-secondary">
|
|
||||||
<tr>
|
|
||||||
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain">Requests</th>
|
|
||||||
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
|
|
||||||
Processed
|
|
||||||
</th>
|
|
||||||
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
|
|
||||||
Generated
|
|
||||||
</th>
|
|
||||||
<th class="px-4 py-3 text-left text-xs font-semibold uppercase tracking-wider text-txtmain border-l border-card-border-inner">
|
|
||||||
Token Stats (tokens/sec)
|
|
||||||
</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
|
|
||||||
<tbody class="bg-surface divide-y divide-card-border-inner">
|
|
||||||
<tr class="hover:bg-secondary">
|
|
||||||
<td class="px-4 py-4 text-sm font-semibold text-gray-900 dark:text-white">
|
|
||||||
<div class="flex flex-col gap-1">
|
|
||||||
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Completed: {nf.format(stats.totalRequests)}</span>
|
|
||||||
<span class="text-xs font-medium text-gray-500 dark:text-gray-400">Waiting: {nf.format(stats.inFlightRequests)}</span>
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
|
|
||||||
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
|
|
||||||
<div class="flex items-center gap-2">
|
|
||||||
<span class="text-sm font-medium">{nf.format(stats.totalInputTokens)}</span>
|
|
||||||
<span class="text-xs text-gray-500 dark:text-gray-400">tokens</span>
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
|
|
||||||
<td class="px-4 py-4 text-sm text-gray-700 dark:text-gray-300 border-l border-gray-200 dark:border-white/10">
|
|
||||||
<div class="flex items-center gap-2">
|
|
||||||
<span class="text-sm font-medium">{nf.format(stats.totalOutputTokens)}</span>
|
|
||||||
<span class="text-xs text-gray-500 dark:text-gray-400">tokens</span>
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
|
|
||||||
<td class="px-4 py-4 border-l border-gray-200 dark:border-white/10">
|
|
||||||
<div class="space-y-3">
|
|
||||||
<div class="grid grid-cols-3 gap-2 items-center">
|
|
||||||
<div class="text-center">
|
|
||||||
<div class="text-xs text-gray-500 dark:text-gray-400">P50</div>
|
|
||||||
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
|
|
||||||
{stats.tokenStats.p50}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="text-center">
|
|
||||||
<div class="text-xs text-gray-500 dark:text-gray-400">P95</div>
|
|
||||||
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
|
|
||||||
{stats.tokenStats.p95}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="text-center">
|
|
||||||
<div class="text-xs text-gray-500 dark:text-gray-400">P99</div>
|
|
||||||
<div class="mt-1 inline-block rounded-full bg-gray-100 dark:bg-white/5 px-3 py-1 text-sm font-semibold text-gray-800 dark:text-white">
|
|
||||||
{stats.tokenStats.p99}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{#if stats.histogramData}
|
|
||||||
<TokenHistogram data={stats.histogramData} />
|
|
||||||
{/if}
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
@@ -1,23 +1,19 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
interface HistogramData {
|
import type { HistogramData } from "../lib/types";
|
||||||
bins: number[];
|
|
||||||
min: number;
|
|
||||||
max: number;
|
|
||||||
binSize: number;
|
|
||||||
p99: number;
|
|
||||||
p95: number;
|
|
||||||
p50: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Props {
|
let {
|
||||||
|
data,
|
||||||
|
unit = "tokens/sec",
|
||||||
|
colorClass = "text-blue-500 dark:text-blue-400",
|
||||||
|
}: {
|
||||||
data: HistogramData;
|
data: HistogramData;
|
||||||
}
|
unit?: string;
|
||||||
|
colorClass?: string;
|
||||||
|
} = $props();
|
||||||
|
|
||||||
let { data }: Props = $props();
|
const height = 250;
|
||||||
|
const padding = { top: 30, right: 20, bottom: 40, left: 75 };
|
||||||
const height = 120;
|
const viewBoxWidth = 1200;
|
||||||
const padding = { top: 10, right: 15, bottom: 25, left: 45 };
|
|
||||||
const viewBoxWidth = 600;
|
|
||||||
const chartWidth = viewBoxWidth - padding.left - padding.right;
|
const chartWidth = viewBoxWidth - padding.left - padding.right;
|
||||||
const chartHeight = height - padding.top - padding.bottom;
|
const chartHeight = height - padding.top - padding.bottom;
|
||||||
|
|
||||||
@@ -43,6 +39,24 @@
|
|||||||
opacity="0.3"
|
opacity="0.3"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<!-- Y-axis ticks and labels -->
|
||||||
|
{#each [0, 0.5, 1] as fraction}
|
||||||
|
{@const tickCount = Math.round(maxCount * fraction)}
|
||||||
|
{@const tickY = height - padding.bottom - fraction * chartHeight}
|
||||||
|
<line
|
||||||
|
x1={padding.left - 8}
|
||||||
|
y1={tickY}
|
||||||
|
x2={padding.left}
|
||||||
|
y2={tickY}
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="1"
|
||||||
|
opacity="0.4"
|
||||||
|
/>
|
||||||
|
<text x={padding.left - 10} y={tickY + 10} font-size="26" fill="currentColor" opacity="0.8" text-anchor="end">
|
||||||
|
{tickCount}
|
||||||
|
</text>
|
||||||
|
{/each}
|
||||||
|
|
||||||
<!-- X-axis -->
|
<!-- X-axis -->
|
||||||
<line
|
<line
|
||||||
x1={padding.left}
|
x1={padding.left}
|
||||||
@@ -69,9 +83,9 @@
|
|||||||
height={barHeight}
|
height={barHeight}
|
||||||
fill="currentColor"
|
fill="currentColor"
|
||||||
opacity="0.6"
|
opacity="0.6"
|
||||||
class="text-blue-500 dark:text-blue-400 hover:opacity-90 transition-opacity cursor-pointer"
|
class="{colorClass} hover:opacity-90 transition-opacity cursor-pointer"
|
||||||
/>
|
/>
|
||||||
<title>{`${binStart.toFixed(1)} - ${binEnd.toFixed(1)} tokens/sec\nCount: ${count}`}</title>
|
<title>{`${binStart.toFixed(1)} - ${binEnd.toFixed(1)} ${unit}\nCount: ${count}`}</title>
|
||||||
</g>
|
</g>
|
||||||
{/each}
|
{/each}
|
||||||
|
|
||||||
@@ -113,17 +127,19 @@
|
|||||||
/>
|
/>
|
||||||
|
|
||||||
<!-- X-axis labels -->
|
<!-- X-axis labels -->
|
||||||
<text x={padding.left} y={height - 5} font-size="10" fill="currentColor" opacity="0.6" text-anchor="start">
|
<text x={padding.left} y={height - 8} font-size="26" fill="currentColor" opacity="0.8" text-anchor="start">
|
||||||
{data.min.toFixed(1)}
|
{data.min.toFixed(1)}
|
||||||
</text>
|
</text>
|
||||||
|
|
||||||
<text x={viewBoxWidth - padding.right} y={height - 5} font-size="10" fill="currentColor" opacity="0.6" text-anchor="end">
|
<text
|
||||||
|
x={viewBoxWidth - padding.right}
|
||||||
|
y={height - 8}
|
||||||
|
font-size="26"
|
||||||
|
fill="currentColor"
|
||||||
|
opacity="0.8"
|
||||||
|
text-anchor="end"
|
||||||
|
>
|
||||||
{data.max.toFixed(1)}
|
{data.max.toFixed(1)}
|
||||||
</text>
|
</text>
|
||||||
|
|
||||||
<!-- X-axis label -->
|
|
||||||
<text x={padding.left + chartWidth / 2} y={height - 2} font-size="10" fill="currentColor" opacity="0.6" text-anchor="middle">
|
|
||||||
Tokens/Second Distribution
|
|
||||||
</text>
|
|
||||||
</svg>
|
</svg>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { models } from "../../stores/api";
|
import { models } from "../../stores/api";
|
||||||
import { persistentStore } from "../../stores/persistent";
|
import { persistentStore } from "../../stores/persistent";
|
||||||
import { streamChatCompletion } from "../../lib/chatApi";
|
import { streamChatCompletion, type Endpoint } from "../../lib/chatApi";
|
||||||
import { playgroundStores } from "../../stores/playgroundActivity";
|
import { playgroundStores } from "../../stores/playgroundActivity";
|
||||||
import type { ChatMessage, ContentPart } from "../../lib/types";
|
import type { ChatMessage, ContentPart } from "../../lib/types";
|
||||||
import ChatMessageComponent from "./ChatMessage.svelte";
|
import ChatMessageComponent from "./ChatMessage.svelte";
|
||||||
@@ -11,6 +11,8 @@
|
|||||||
const selectedModelStore = persistentStore<string>("playground-selected-model", "");
|
const selectedModelStore = persistentStore<string>("playground-selected-model", "");
|
||||||
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
const systemPromptStore = persistentStore<string>("playground-system-prompt", "");
|
||||||
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
const temperatureStore = persistentStore<number>("playground-temperature", 0.7);
|
||||||
|
const endpointStore = persistentStore<Endpoint>("playground-endpoint", "v1/chat/completions");
|
||||||
|
const maxTokensStore = persistentStore<number>("playground-max-tokens", 4096);
|
||||||
|
|
||||||
function loadMessages(): ChatMessage[] {
|
function loadMessages(): ChatMessage[] {
|
||||||
try {
|
try {
|
||||||
@@ -142,7 +144,7 @@
|
|||||||
$selectedModelStore,
|
$selectedModelStore,
|
||||||
apiMessages,
|
apiMessages,
|
||||||
abortController.signal,
|
abortController.signal,
|
||||||
{ temperature: $temperatureStore }
|
{ temperature: $temperatureStore, endpoint: $endpointStore, max_tokens: $maxTokensStore }
|
||||||
);
|
);
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
@@ -319,6 +321,19 @@
|
|||||||
<!-- Settings panel -->
|
<!-- Settings panel -->
|
||||||
{#if showSettings}
|
{#if showSettings}
|
||||||
<div class="shrink-0 mb-4 p-4 bg-surface border border-gray-200 dark:border-white/10 rounded">
|
<div class="shrink-0 mb-4 p-4 bg-surface border border-gray-200 dark:border-white/10 rounded">
|
||||||
|
<div class="mb-4">
|
||||||
|
<label class="block text-sm font-medium mb-1" for="endpoint">Endpoint</label>
|
||||||
|
<select
|
||||||
|
id="endpoint"
|
||||||
|
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
|
||||||
|
bind:value={$endpointStore}
|
||||||
|
disabled={isStreaming}
|
||||||
|
>
|
||||||
|
<option value="v1/chat/completions">/v1/chat/completions</option>
|
||||||
|
<option value="v1/messages">/v1/messages</option>
|
||||||
|
<option value="v1/responses">/v1/responses</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
<div class="mb-4">
|
<div class="mb-4">
|
||||||
<label class="block text-sm font-medium mb-1" for="system-prompt">System Prompt</label>
|
<label class="block text-sm font-medium mb-1" for="system-prompt">System Prompt</label>
|
||||||
<textarea
|
<textarea
|
||||||
@@ -330,7 +345,7 @@
|
|||||||
disabled={isStreaming}
|
disabled={isStreaming}
|
||||||
></textarea>
|
></textarea>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div class="mb-4">
|
||||||
<label class="block text-sm font-medium mb-1" for="temperature">
|
<label class="block text-sm font-medium mb-1" for="temperature">
|
||||||
Temperature: {$temperatureStore.toFixed(2)}
|
Temperature: {$temperatureStore.toFixed(2)}
|
||||||
</label>
|
</label>
|
||||||
@@ -349,6 +364,18 @@
|
|||||||
<span>Creative (2)</span>
|
<span>Creative (2)</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium mb-1" for="max-tokens">Max Tokens</label>
|
||||||
|
<input
|
||||||
|
id="max-tokens"
|
||||||
|
type="number"
|
||||||
|
min="1"
|
||||||
|
class="w-full px-3 py-2 rounded border border-gray-200 dark:border-white/10 bg-card focus:outline-none focus:ring-2 focus:ring-primary"
|
||||||
|
bind:value={$maxTokensStore}
|
||||||
|
disabled={isStreaming}
|
||||||
|
/>
|
||||||
|
<p class="text-xs text-txtsecondary mt-1">Required for /v1/messages.</p>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
|||||||
+266
-42
@@ -1,4 +1,6 @@
|
|||||||
import type { ChatMessage, ChatCompletionRequest } from "./types";
|
import type { ChatMessage, ContentPart } from "./types";
|
||||||
|
|
||||||
|
export type Endpoint = "v1/chat/completions" | "v1/messages" | "v1/responses";
|
||||||
|
|
||||||
export interface StreamChunk {
|
export interface StreamChunk {
|
||||||
content: string;
|
content: string;
|
||||||
@@ -8,9 +10,126 @@ export interface StreamChunk {
|
|||||||
|
|
||||||
export interface ChatOptions {
|
export interface ChatOptions {
|
||||||
temperature?: number;
|
temperature?: number;
|
||||||
|
endpoint?: Endpoint;
|
||||||
|
max_tokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseSSELine(line: string): StreamChunk | null {
|
function parseDataUrl(url: string): { media_type: string; data: string } {
|
||||||
|
const match = /^data:([^;]+);base64,(.*)$/i.exec(url);
|
||||||
|
if (!match) {
|
||||||
|
throw new Error("Image is not a base64 data URL");
|
||||||
|
}
|
||||||
|
return { media_type: match[1], data: match[2] };
|
||||||
|
}
|
||||||
|
|
||||||
|
function splitSystemMessages(messages: ChatMessage[]): { system: string; rest: ChatMessage[] } {
|
||||||
|
const systemParts: string[] = [];
|
||||||
|
const rest: ChatMessage[] = [];
|
||||||
|
for (const msg of messages) {
|
||||||
|
if (msg.role === "system") {
|
||||||
|
if (typeof msg.content === "string") {
|
||||||
|
systemParts.push(msg.content);
|
||||||
|
} else {
|
||||||
|
for (const part of msg.content) {
|
||||||
|
if (part.type === "text") systemParts.push(part.text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rest.push(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { system: systemParts.join("\n\n"), rest };
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildChatCompletionsBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
|
||||||
|
return {
|
||||||
|
model,
|
||||||
|
messages: messages.map((m) => ({
|
||||||
|
role: m.role,
|
||||||
|
content: m.content,
|
||||||
|
})),
|
||||||
|
stream: true,
|
||||||
|
temperature: options?.temperature,
|
||||||
|
...(options?.max_tokens ? { max_tokens: options.max_tokens } : {}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildMessagesBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
|
||||||
|
const { system, rest } = splitSystemMessages(messages);
|
||||||
|
const mapped = rest.map((m) => {
|
||||||
|
if (typeof m.content === "string") {
|
||||||
|
return { role: m.role, content: m.content };
|
||||||
|
}
|
||||||
|
const blocks: object[] = [];
|
||||||
|
for (const part of m.content as ContentPart[]) {
|
||||||
|
if (part.type === "text") {
|
||||||
|
blocks.push({ type: "text", text: part.text });
|
||||||
|
} else if (m.role !== "assistant") {
|
||||||
|
const { media_type, data } = parseDataUrl(part.image_url.url);
|
||||||
|
blocks.push({ type: "image", source: { type: "base64", media_type, data } });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { role: m.role, content: blocks };
|
||||||
|
});
|
||||||
|
|
||||||
|
const body: Record<string, unknown> = {
|
||||||
|
model,
|
||||||
|
messages: mapped,
|
||||||
|
stream: true,
|
||||||
|
max_tokens: options?.max_tokens ?? 4096,
|
||||||
|
};
|
||||||
|
if (system) body.system = system;
|
||||||
|
if (options?.temperature !== undefined) body.temperature = options.temperature;
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildResponsesBody(model: string, messages: ChatMessage[], options?: ChatOptions): object {
|
||||||
|
const { system, rest } = splitSystemMessages(messages);
|
||||||
|
const input = rest.map((m) => {
|
||||||
|
const isAssistant = m.role === "assistant";
|
||||||
|
if (typeof m.content === "string") {
|
||||||
|
const partType = isAssistant ? "output_text" : "input_text";
|
||||||
|
return { role: m.role, content: [{ type: partType, text: m.content }] };
|
||||||
|
}
|
||||||
|
const content = m.content.map((part: ContentPart) => {
|
||||||
|
if (part.type === "text") {
|
||||||
|
return { type: isAssistant ? "output_text" : "input_text", text: part.text };
|
||||||
|
}
|
||||||
|
return { type: "input_image", image_url: part.image_url.url };
|
||||||
|
});
|
||||||
|
return { role: m.role, content };
|
||||||
|
});
|
||||||
|
|
||||||
|
const body: Record<string, unknown> = {
|
||||||
|
model,
|
||||||
|
input,
|
||||||
|
stream: true,
|
||||||
|
};
|
||||||
|
if (system) body.instructions = system;
|
||||||
|
if (options?.temperature !== undefined) body.temperature = options.temperature;
|
||||||
|
if (options?.max_tokens) body.max_output_tokens = options.max_tokens;
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildRequest(
|
||||||
|
endpoint: Endpoint,
|
||||||
|
model: string,
|
||||||
|
messages: ChatMessage[],
|
||||||
|
options?: ChatOptions
|
||||||
|
): { url: string; body: object } {
|
||||||
|
const url = "/" + endpoint;
|
||||||
|
switch (endpoint) {
|
||||||
|
case "v1/messages":
|
||||||
|
return { url, body: buildMessagesBody(model, messages, options) };
|
||||||
|
case "v1/responses":
|
||||||
|
return { url, body: buildResponsesBody(model, messages, options) };
|
||||||
|
case "v1/chat/completions":
|
||||||
|
default:
|
||||||
|
return { url, body: buildChatCompletionsBody(model, messages, options) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseChatCompletionsLine(line: string): StreamChunk | null {
|
||||||
const trimmed = line.trim();
|
const trimmed = line.trim();
|
||||||
if (!trimmed || !trimmed.startsWith("data: ")) {
|
if (!trimmed || !trimmed.startsWith("data: ")) {
|
||||||
return null;
|
return null;
|
||||||
@@ -25,7 +144,7 @@ function parseSSELine(line: string): StreamChunk | null {
|
|||||||
const parsed = JSON.parse(data);
|
const parsed = JSON.parse(data);
|
||||||
const delta = parsed.choices?.[0]?.delta;
|
const delta = parsed.choices?.[0]?.delta;
|
||||||
const content = delta?.content || "";
|
const content = delta?.content || "";
|
||||||
const reasoning_content = delta?.reasoning_content || "";
|
const reasoning_content = delta?.reasoning_content || delta?.reasoning || "";
|
||||||
|
|
||||||
if (content || reasoning_content) {
|
if (content || reasoning_content) {
|
||||||
return { content, reasoning_content, done: false };
|
return { content, reasoning_content, done: false };
|
||||||
@@ -36,25 +155,158 @@ function parseSSELine(line: string): StreamChunk | null {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function* parseChatCompletionsStream(
|
||||||
|
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||||
|
): AsyncGenerator<StreamChunk> {
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = "";
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
const lines = buffer.split("\n");
|
||||||
|
buffer = lines.pop() || "";
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
const result = parseChatCompletionsLine(line);
|
||||||
|
if (result?.done) {
|
||||||
|
yield result;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (result) {
|
||||||
|
yield result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = parseChatCompletionsLine(buffer);
|
||||||
|
if (result && !result.done) {
|
||||||
|
yield result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseSSEEventBlock(block: string): { event: string; data: string } | null {
|
||||||
|
let event = "";
|
||||||
|
const dataLines: string[] = [];
|
||||||
|
for (const rawLine of block.split("\n")) {
|
||||||
|
const line = rawLine.replace(/\r$/, "");
|
||||||
|
if (!line || line.startsWith(":")) continue;
|
||||||
|
if (line.startsWith("event:")) {
|
||||||
|
event = line.slice(6).trim();
|
||||||
|
} else if (line.startsWith("data:")) {
|
||||||
|
dataLines.push(line.slice(5).trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (dataLines.length === 0 && !event) return null;
|
||||||
|
return { event, data: dataLines.join("\n") };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function* parseMessagesStream(
|
||||||
|
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||||
|
): AsyncGenerator<StreamChunk> {
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = "";
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
const blocks = buffer.split("\n\n");
|
||||||
|
buffer = blocks.pop() || "";
|
||||||
|
|
||||||
|
for (const block of blocks) {
|
||||||
|
const parsed = parseSSEEventBlock(block);
|
||||||
|
if (!parsed) continue;
|
||||||
|
if (parsed.event === "message_stop") {
|
||||||
|
yield { content: "", done: true };
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (parsed.event !== "content_block_delta" || !parsed.data) continue;
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(parsed.data);
|
||||||
|
const delta = json.delta;
|
||||||
|
if (!delta) continue;
|
||||||
|
if (delta.type === "text_delta" && delta.text) {
|
||||||
|
yield { content: delta.text, done: false };
|
||||||
|
} else if (delta.type === "thinking_delta" && delta.thinking) {
|
||||||
|
yield { content: "", reasoning_content: delta.thinking, done: false };
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore malformed event
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function* parseResponsesStream(
|
||||||
|
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||||
|
): AsyncGenerator<StreamChunk> {
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = "";
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
const blocks = buffer.split("\n\n");
|
||||||
|
buffer = blocks.pop() || "";
|
||||||
|
|
||||||
|
for (const block of blocks) {
|
||||||
|
const parsed = parseSSEEventBlock(block);
|
||||||
|
if (!parsed) continue;
|
||||||
|
if (parsed.event === "response.completed") {
|
||||||
|
yield { content: "", done: true };
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!parsed.data) continue;
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(parsed.data);
|
||||||
|
if (parsed.event === "response.output_text.delta" && json.delta) {
|
||||||
|
yield { content: json.delta, done: false };
|
||||||
|
} else if (parsed.event === "response.reasoning_summary_text.delta" && json.delta) {
|
||||||
|
yield { content: "", reasoning_content: json.delta, done: false };
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore malformed event
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseStream(
|
||||||
|
endpoint: Endpoint,
|
||||||
|
reader: ReadableStreamDefaultReader<Uint8Array>
|
||||||
|
): AsyncGenerator<StreamChunk> {
|
||||||
|
switch (endpoint) {
|
||||||
|
case "v1/messages":
|
||||||
|
return parseMessagesStream(reader);
|
||||||
|
case "v1/responses":
|
||||||
|
return parseResponsesStream(reader);
|
||||||
|
case "v1/chat/completions":
|
||||||
|
default:
|
||||||
|
return parseChatCompletionsStream(reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function* streamChatCompletion(
|
export async function* streamChatCompletion(
|
||||||
model: string,
|
model: string,
|
||||||
messages: ChatMessage[],
|
messages: ChatMessage[],
|
||||||
signal?: AbortSignal,
|
signal?: AbortSignal,
|
||||||
options?: ChatOptions
|
options?: ChatOptions
|
||||||
): AsyncGenerator<StreamChunk> {
|
): AsyncGenerator<StreamChunk> {
|
||||||
const request: ChatCompletionRequest = {
|
const endpoint = options?.endpoint ?? "v1/chat/completions";
|
||||||
model,
|
const { url, body } = buildRequest(endpoint, model, messages, options);
|
||||||
messages,
|
|
||||||
stream: true,
|
|
||||||
temperature: options?.temperature,
|
|
||||||
};
|
|
||||||
|
|
||||||
const response = await fetch("/v1/chat/completions", {
|
const response = await fetch(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
body: JSON.stringify(request),
|
body: JSON.stringify(body),
|
||||||
signal,
|
signal,
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -68,39 +320,11 @@ export async function* streamChatCompletion(
|
|||||||
throw new Error("Response body is not readable");
|
throw new Error("Response body is not readable");
|
||||||
}
|
}
|
||||||
|
|
||||||
const decoder = new TextDecoder();
|
|
||||||
let buffer = "";
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (true) {
|
for await (const chunk of parseStream(endpoint, reader)) {
|
||||||
const { done, value } = await reader.read();
|
yield chunk;
|
||||||
|
if (chunk.done) return;
|
||||||
if (done) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer += decoder.decode(value, { stream: true });
|
|
||||||
const lines = buffer.split("\n");
|
|
||||||
buffer = lines.pop() || "";
|
|
||||||
|
|
||||||
for (const line of lines) {
|
|
||||||
const result = parseSSELine(line);
|
|
||||||
if (result?.done) {
|
|
||||||
yield result;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (result) {
|
|
||||||
yield result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process any remaining buffer
|
|
||||||
const result = parseSSELine(buffer);
|
|
||||||
if (result && !result.done) {
|
|
||||||
yield result;
|
|
||||||
}
|
|
||||||
|
|
||||||
yield { content: "", done: true };
|
yield { content: "", done: true };
|
||||||
} finally {
|
} finally {
|
||||||
reader.releaseLock();
|
reader.releaseLock();
|
||||||
|
|||||||
@@ -0,0 +1,167 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { calculateHistogramData } from "./histogram";
|
||||||
|
|
||||||
|
describe("calculateHistogramData", () => {
|
||||||
|
describe("edge cases", () => {
|
||||||
|
it("returns null for empty input", () => {
|
||||||
|
expect(calculateHistogramData([])).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles single value", () => {
|
||||||
|
const result = calculateHistogramData([42]);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.bins).toEqual([1]);
|
||||||
|
expect(result!.min).toBe(42);
|
||||||
|
expect(result!.max).toBe(42);
|
||||||
|
expect(result!.binSize).toBe(0);
|
||||||
|
expect(result!.p50).toBe(42);
|
||||||
|
expect(result!.p95).toBe(42);
|
||||||
|
expect(result!.p99).toBe(42);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles all identical values", () => {
|
||||||
|
const result = calculateHistogramData([10, 10, 10, 10, 10]);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.bins).toEqual([5]);
|
||||||
|
expect(result!.min).toBe(10);
|
||||||
|
expect(result!.max).toBe(10);
|
||||||
|
expect(result!.binSize).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles two distinct values", () => {
|
||||||
|
const result = calculateHistogramData([10, 20]);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.min).toBe(10);
|
||||||
|
expect(result!.max).toBe(20);
|
||||||
|
expect(result!.p50).toBe(15);
|
||||||
|
const binSum = result!.bins.reduce((s, b) => s + b, 0);
|
||||||
|
expect(binSum).toBe(2);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("bin distribution", () => {
|
||||||
|
it("bins sum to total number of values", () => {
|
||||||
|
const values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
const binSum = result!.bins.reduce((s, b) => s + b, 0);
|
||||||
|
expect(binSum).toBe(values.length);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("distributes uniform values across bins", () => {
|
||||||
|
const values = Array.from({ length: 100 }, (_, i) => i);
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.bins.length).toBe(8);
|
||||||
|
const binSum = result!.bins.reduce((s, b) => s + b, 0);
|
||||||
|
expect(binSum).toBe(100);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("places values in correct bins", () => {
|
||||||
|
const values = [1, 1, 1, 5, 5, 9, 9, 9];
|
||||||
|
const result = calculateHistogramData(values, { minBins: 3, maxBins: 3 });
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.bins.length).toBe(3);
|
||||||
|
expect(result!.bins.reduce((s, b) => s + b, 0)).toBe(8);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles skewed distribution", () => {
|
||||||
|
const values = [1, 1, 1, 1, 1, 100];
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
const binSum = result!.bins.reduce((s, b) => s + b, 0);
|
||||||
|
expect(binSum).toBe(6);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("percentiles", () => {
|
||||||
|
it("calculates correct p50 for even-length array", () => {
|
||||||
|
const values = [1, 2, 3, 4];
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.p50).toBe(2.5);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("calculates correct p50 for odd-length array", () => {
|
||||||
|
const values = [1, 2, 3, 4, 5];
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.p50).toBe(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("calculates p99 with interpolation", () => {
|
||||||
|
const values = Array.from({ length: 100 }, (_, i) => i + 1);
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.p99).toBeCloseTo(99.01);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("calculates p95 with interpolation", () => {
|
||||||
|
const values = Array.from({ length: 100 }, (_, i) => i + 1);
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.p95).toBeCloseTo(95.05);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("percentiles are monotonically increasing", () => {
|
||||||
|
const values = Array.from({ length: 200 }, () => Math.random() * 100);
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.p50).toBeLessThanOrEqual(result!.p95);
|
||||||
|
expect(result!.p95).toBeLessThanOrEqual(result!.p99);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("bin count adaptation", () => {
|
||||||
|
it("uses minimum bins for small datasets", () => {
|
||||||
|
// n=8: sturges=4, clamped up to minBins=5
|
||||||
|
const values = Array.from({ length: 8 }, (_, i) => i);
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result!.bins.length).toBe(5);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("scales bins with dataset size", () => {
|
||||||
|
// n=100: sturges=8
|
||||||
|
const values = Array.from({ length: 100 }, (_, i) => i);
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result!.bins.length).toBe(8);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("caps bins at maximum", () => {
|
||||||
|
// n=1000: sturges=11, clamped down to maxBins=10
|
||||||
|
const values = Array.from({ length: 1000 }, (_, i) => i);
|
||||||
|
const result = calculateHistogramData(values, { minBins: 5, maxBins: 10 });
|
||||||
|
expect(result!.bins.length).toBe(10);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("respects custom options", () => {
|
||||||
|
// n=100: sturges=8, within [minBins=5, maxBins=10]
|
||||||
|
const values = Array.from({ length: 100 }, (_, i) => i);
|
||||||
|
const result = calculateHistogramData(values, { minBins: 5, maxBins: 10 });
|
||||||
|
expect(result!.bins.length).toBe(8);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("min and max", () => {
|
||||||
|
it("correctly identifies min and max", () => {
|
||||||
|
const values = [5, 3, 8, 1, 9, 2];
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result!.min).toBe(1);
|
||||||
|
expect(result!.max).toBe(9);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles negative values", () => {
|
||||||
|
const values = [-10, -5, 0, 5, 10];
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result!.min).toBe(-10);
|
||||||
|
expect(result!.max).toBe(10);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles floating point values", () => {
|
||||||
|
const values = [1.5, 2.7, 3.14, 0.5, 4.99];
|
||||||
|
const result = calculateHistogramData(values);
|
||||||
|
expect(result!.min).toBe(0.5);
|
||||||
|
expect(result!.max).toBe(4.99);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
import type { HistogramData } from "./types";
|
||||||
|
|
||||||
|
export interface HistogramOptions {
|
||||||
|
minBins?: number;
|
||||||
|
maxBins?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_OPTIONS: HistogramOptions = {
|
||||||
|
minBins: 5,
|
||||||
|
maxBins: 20,
|
||||||
|
};
|
||||||
|
|
||||||
|
function percentile(sorted: number[], p: number): number {
|
||||||
|
if (sorted.length === 0) return 0;
|
||||||
|
if (sorted.length === 1) return sorted[0];
|
||||||
|
|
||||||
|
const rank = (p / 100) * (sorted.length - 1);
|
||||||
|
const lower = Math.floor(rank);
|
||||||
|
const upper = Math.ceil(rank);
|
||||||
|
const fraction = rank - lower;
|
||||||
|
|
||||||
|
return sorted[lower] + fraction * (sorted[upper] - sorted[lower]);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function calculateHistogramData(
|
||||||
|
values: number[],
|
||||||
|
options: HistogramOptions = DEFAULT_OPTIONS,
|
||||||
|
): HistogramData | null {
|
||||||
|
if (values.length === 0) return null;
|
||||||
|
|
||||||
|
const sorted = [...values].sort((a, b) => a - b);
|
||||||
|
const min = sorted[0];
|
||||||
|
const max = sorted[sorted.length - 1];
|
||||||
|
|
||||||
|
const p50 = percentile(sorted, 50);
|
||||||
|
const p95 = percentile(sorted, 95);
|
||||||
|
const p99 = percentile(sorted, 99);
|
||||||
|
|
||||||
|
if (min === max) {
|
||||||
|
return {
|
||||||
|
bins: [values.length],
|
||||||
|
min,
|
||||||
|
max,
|
||||||
|
binSize: 0,
|
||||||
|
p50,
|
||||||
|
p95,
|
||||||
|
p99,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const { minBins = 5, maxBins = 20 } = options;
|
||||||
|
const sturges = Math.ceil(Math.log2(values.length)) + 1;
|
||||||
|
const binCount = Math.min(maxBins, Math.max(minBins, sturges));
|
||||||
|
const binSize = (max - min) / binCount;
|
||||||
|
|
||||||
|
const bins = new Array(binCount).fill(0);
|
||||||
|
for (const value of values) {
|
||||||
|
const binIndex = Math.min(Math.floor((value - min) / binSize), binCount - 1);
|
||||||
|
bins[binIndex]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
bins,
|
||||||
|
min,
|
||||||
|
max,
|
||||||
|
binSize,
|
||||||
|
p50,
|
||||||
|
p95,
|
||||||
|
p99,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -12,15 +12,22 @@ export interface Model {
|
|||||||
aliases?: string[];
|
aliases?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Metrics {
|
export interface TokenMetrics {
|
||||||
id: number;
|
|
||||||
timestamp: string;
|
|
||||||
model: string;
|
|
||||||
cache_tokens: number;
|
cache_tokens: number;
|
||||||
input_tokens: number;
|
input_tokens: number;
|
||||||
output_tokens: number;
|
output_tokens: number;
|
||||||
prompt_per_second: number;
|
prompt_per_second: number;
|
||||||
tokens_per_second: number;
|
tokens_per_second: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ActivityLogEntry {
|
||||||
|
id: number;
|
||||||
|
timestamp: string;
|
||||||
|
model: string;
|
||||||
|
req_path: string;
|
||||||
|
resp_content_type: string;
|
||||||
|
resp_status_code: number;
|
||||||
|
tokens: TokenMetrics;
|
||||||
duration_ms: number;
|
duration_ms: number;
|
||||||
has_capture: boolean;
|
has_capture: boolean;
|
||||||
}
|
}
|
||||||
@@ -43,11 +50,61 @@ export interface InFlightStats {
|
|||||||
total: number;
|
total: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface NetIOStat {
|
||||||
|
name: string;
|
||||||
|
bytes_recv: number;
|
||||||
|
bytes_sent: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SysStat {
|
||||||
|
timestamp: string;
|
||||||
|
cpu_util_per_core: number[];
|
||||||
|
mem_total_mb: number;
|
||||||
|
mem_used_mb: number;
|
||||||
|
mem_free_mb: number;
|
||||||
|
swap_total_mb: number;
|
||||||
|
swap_used_mb: number;
|
||||||
|
load_avg_1: number;
|
||||||
|
load_avg_5: number;
|
||||||
|
load_avg_15: number;
|
||||||
|
net_io: NetIOStat[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GpuStat {
|
||||||
|
timestamp: string;
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
uuid: string;
|
||||||
|
temp_c: number;
|
||||||
|
vram_temp_c: number;
|
||||||
|
gpu_util_pct: number;
|
||||||
|
mem_util_pct: number;
|
||||||
|
mem_used_mb: number;
|
||||||
|
mem_total_mb: number;
|
||||||
|
fan_speed_pct: number;
|
||||||
|
power_draw_w: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PerformanceResponse {
|
||||||
|
sys_stats: SysStat[];
|
||||||
|
gpu_stats: GpuStat[];
|
||||||
|
}
|
||||||
|
|
||||||
export interface APIEventEnvelope {
|
export interface APIEventEnvelope {
|
||||||
type: "modelStatus" | "logData" | "metrics" | "inflight";
|
type: "modelStatus" | "logData" | "metrics" | "inflight" | "perfsys" | "perfgpu";
|
||||||
data: string;
|
data: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface HistogramData {
|
||||||
|
bins: number[];
|
||||||
|
min: number;
|
||||||
|
max: number;
|
||||||
|
binSize: number;
|
||||||
|
p99: number;
|
||||||
|
p95: number;
|
||||||
|
p50: number;
|
||||||
|
}
|
||||||
|
|
||||||
export interface VersionInfo {
|
export interface VersionInfo {
|
||||||
build_date: string;
|
build_date: string;
|
||||||
commit: string;
|
commit: string;
|
||||||
|
|||||||
@@ -1,9 +1,89 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { metrics, getCapture } from "../stores/api";
|
import { metrics, getCapture } from "../stores/api";
|
||||||
|
import ActivityStats from "../components/ActivityStats.svelte";
|
||||||
import Tooltip from "../components/Tooltip.svelte";
|
import Tooltip from "../components/Tooltip.svelte";
|
||||||
import CaptureDialog from "../components/CaptureDialog.svelte";
|
import CaptureDialog from "../components/CaptureDialog.svelte";
|
||||||
|
import { persistentStore } from "../stores/persistent";
|
||||||
|
import { onMount } from "svelte";
|
||||||
import type { ReqRespCapture } from "../lib/types";
|
import type { ReqRespCapture } from "../lib/types";
|
||||||
|
|
||||||
|
type ColumnKey =
|
||||||
|
| "id"
|
||||||
|
| "time"
|
||||||
|
| "model"
|
||||||
|
| "req_path"
|
||||||
|
| "resp_status_code"
|
||||||
|
| "resp_content_type"
|
||||||
|
| "cached"
|
||||||
|
| "prompt"
|
||||||
|
| "generated"
|
||||||
|
| "prompt_speed"
|
||||||
|
| "gen_speed"
|
||||||
|
| "duration"
|
||||||
|
| "capture";
|
||||||
|
|
||||||
|
interface ColumnDef {
|
||||||
|
key: ColumnKey;
|
||||||
|
label: string;
|
||||||
|
defaultVisible: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
const columns: ColumnDef[] = [
|
||||||
|
{ key: "id", label: "ID", defaultVisible: true },
|
||||||
|
{ key: "time", label: "Time", defaultVisible: true },
|
||||||
|
{ key: "model", label: "Model", defaultVisible: true },
|
||||||
|
{ key: "req_path", label: "Path", defaultVisible: false },
|
||||||
|
{ key: "resp_status_code", label: "Status", defaultVisible: false },
|
||||||
|
{ key: "resp_content_type", label: "Content-Type", defaultVisible: false },
|
||||||
|
{ key: "cached", label: "Cached", defaultVisible: true },
|
||||||
|
{ key: "prompt", label: "Prompt", defaultVisible: true },
|
||||||
|
{ key: "generated", label: "Generated", defaultVisible: true },
|
||||||
|
{ key: "prompt_speed", label: "Prompt Speed", defaultVisible: true },
|
||||||
|
{ key: "gen_speed", label: "Gen Speed", defaultVisible: true },
|
||||||
|
{ key: "duration", label: "Duration", defaultVisible: true },
|
||||||
|
{ key: "capture", label: "Capture", defaultVisible: true },
|
||||||
|
];
|
||||||
|
|
||||||
|
const defaultVisibleKeys = columns.filter((c) => c.defaultVisible).map((c) => c.key);
|
||||||
|
|
||||||
|
const visibleColumns = persistentStore<ColumnKey[]>(
|
||||||
|
"activity-columns",
|
||||||
|
defaultVisibleKeys
|
||||||
|
);
|
||||||
|
|
||||||
|
let columnsMenuOpen = $state(false);
|
||||||
|
let dropdownContainer: HTMLDivElement | null = null;
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
function handleKeydown(e: KeyboardEvent) {
|
||||||
|
if (e.key === "Escape" && columnsMenuOpen) {
|
||||||
|
columnsMenuOpen = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function handleClick(e: MouseEvent) {
|
||||||
|
if (columnsMenuOpen && dropdownContainer && !dropdownContainer.contains(e.target as Node)) {
|
||||||
|
columnsMenuOpen = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.addEventListener("keydown", handleKeydown);
|
||||||
|
document.addEventListener("click", handleClick);
|
||||||
|
return () => {
|
||||||
|
document.removeEventListener("keydown", handleKeydown);
|
||||||
|
document.removeEventListener("click", handleClick);
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
function toggleColumn(key: ColumnKey) {
|
||||||
|
const current = $visibleColumns;
|
||||||
|
if (current.includes(key)) {
|
||||||
|
if (current.length > 1) {
|
||||||
|
visibleColumns.set(current.filter((k) => k !== key));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
visibleColumns.set([...current, key]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function formatSpeed(speed: number): string {
|
function formatSpeed(speed: number): string {
|
||||||
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s";
|
||||||
}
|
}
|
||||||
@@ -49,10 +129,8 @@
|
|||||||
loadingCaptureId = id;
|
loadingCaptureId = id;
|
||||||
const capture = await getCapture(id);
|
const capture = await getCapture(id);
|
||||||
loadingCaptureId = null;
|
loadingCaptureId = null;
|
||||||
if (capture) {
|
selectedCapture = capture;
|
||||||
selectedCapture = capture;
|
dialogOpen = true;
|
||||||
dialogOpen = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function closeDialog() {
|
function closeDialog() {
|
||||||
@@ -62,64 +140,160 @@
|
|||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="p-2">
|
<div class="p-2">
|
||||||
<h1 class="text-2xl font-bold">Activity</h1>
|
<div class="mt-4 mb-4">
|
||||||
|
<ActivityStats />
|
||||||
|
</div>
|
||||||
|
|
||||||
{#if $metrics.length === 0}
|
<div class="card overflow-auto relative min-h-[30rem]">
|
||||||
<div class="text-center py-8">
|
<div class="flex justify-end px-4" bind:this={dropdownContainer}>
|
||||||
<p class="text-gray-600">No metrics data available</p>
|
<div class="relative">
|
||||||
|
<button
|
||||||
|
class="w-8 h-8 flex items-center justify-center rounded hover:bg-secondary-hover transition-colors"
|
||||||
|
onclick={() => (columnsMenuOpen = !columnsMenuOpen)}
|
||||||
|
title="Select columns"
|
||||||
|
>
|
||||||
|
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 6V4m0 2a2 2 0 100 4m0-4a2 2 0 110 4m-6 8a2 2 0 100-4m0 4a2 2 0 110-4m0 4v2m0-6V4m6 6v10m6-2a2 2 0 100-4m0 4a2 2 0 110-4m0 4v2m0-6V4"></path>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
{#if columnsMenuOpen}
|
||||||
|
<div class="absolute right-0 top-full mt-1 bg-surface border border-gray-200 dark:border-white/10 rounded shadow-lg z-10 py-1 min-w-[16rem]">
|
||||||
|
<div class="px-3 py-2 text-xs font-medium uppercase tracking-wider text-gray-500 dark:text-gray-400 border-b border-gray-200 dark:border-white/10">
|
||||||
|
Columns
|
||||||
|
</div>
|
||||||
|
{#each columns as col (col.key)}
|
||||||
|
<label
|
||||||
|
class="flex items-center gap-2 px-3 py-1.5 text-sm cursor-pointer hover:bg-secondary-hover transition-colors"
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={$visibleColumns.includes(col.key)}
|
||||||
|
onchange={() => toggleColumn(col.key)}
|
||||||
|
class="rounded"
|
||||||
|
/>
|
||||||
|
{col.label}
|
||||||
|
</label>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{:else}
|
|
||||||
<div class="card overflow-auto">
|
<table class="min-w-full divide-y">
|
||||||
<table class="min-w-full divide-y">
|
<thead class="border-gray-200 dark:border-white/10">
|
||||||
<thead class="border-gray-200 dark:border-white/10">
|
<tr class="text-left text-xs uppercase tracking-wider">
|
||||||
<tr class="text-left text-xs uppercase tracking-wider">
|
{#if $visibleColumns.includes("id")}
|
||||||
<th class="px-6 py-3">ID</th>
|
<th class="px-6 py-3">ID</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("time")}
|
||||||
<th class="px-6 py-3">Time</th>
|
<th class="px-6 py-3">Time</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("model")}
|
||||||
<th class="px-6 py-3">Model</th>
|
<th class="px-6 py-3">Model</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("req_path")}
|
||||||
|
<th class="px-6 py-3">Path</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("resp_status_code")}
|
||||||
|
<th class="px-6 py-3">Status</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("resp_content_type")}
|
||||||
|
<th class="px-6 py-3">Content-Type</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("cached")}
|
||||||
<th class="px-6 py-3">
|
<th class="px-6 py-3">
|
||||||
Cached <Tooltip content="prompt tokens from cache" />
|
Cached <Tooltip content="prompt tokens from cache" />
|
||||||
</th>
|
</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("prompt")}
|
||||||
<th class="px-6 py-3">
|
<th class="px-6 py-3">
|
||||||
Prompt <Tooltip content="new prompt tokens processed" />
|
Prompt <Tooltip content="new prompt tokens processed" />
|
||||||
</th>
|
</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("generated")}
|
||||||
<th class="px-6 py-3">Generated</th>
|
<th class="px-6 py-3">Generated</th>
|
||||||
<th class="px-6 py-3">Prompt Processing</th>
|
{/if}
|
||||||
<th class="px-6 py-3">Generation Speed</th>
|
{#if $visibleColumns.includes("prompt_speed")}
|
||||||
|
<th class="px-6 py-3">Prompt Speed</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("gen_speed")}
|
||||||
|
<th class="px-6 py-3">Gen Speed</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("duration")}
|
||||||
<th class="px-6 py-3">Duration</th>
|
<th class="px-6 py-3">Duration</th>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("capture")}
|
||||||
<th class="px-6 py-3">Capture</th>
|
<th class="px-6 py-3">Capture</th>
|
||||||
|
{/if}
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody class="divide-y">
|
||||||
|
{#if sortedMetrics.length === 0}
|
||||||
|
<tr>
|
||||||
|
<td colspan={$visibleColumns.length} class="px-6 py-8 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||||
|
No activity recorded
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
{:else}
|
||||||
<tbody class="divide-y">
|
|
||||||
{#each sortedMetrics as metric (metric.id)}
|
{#each sortedMetrics as metric (metric.id)}
|
||||||
<tr class="whitespace-nowrap text-sm border-gray-200 dark:border-white/10">
|
<tr class="whitespace-nowrap text-sm border-gray-200 dark:border-white/10">
|
||||||
<td class="px-4 py-4">{metric.id + 1}</td>
|
{#if $visibleColumns.includes("id")}
|
||||||
<td class="px-6 py-4">{formatRelativeTime(metric.timestamp)}</td>
|
<td class="px-4 py-4">{metric.id + 1}</td>
|
||||||
<td class="px-6 py-4">{metric.model}</td>
|
{/if}
|
||||||
<td class="px-6 py-4">{metric.cache_tokens > 0 ? metric.cache_tokens.toLocaleString() : "-"}</td>
|
{#if $visibleColumns.includes("time")}
|
||||||
<td class="px-6 py-4">{metric.input_tokens.toLocaleString()}</td>
|
<td class="px-6 py-4">{formatRelativeTime(metric.timestamp)}</td>
|
||||||
<td class="px-6 py-4">{metric.output_tokens.toLocaleString()}</td>
|
{/if}
|
||||||
<td class="px-6 py-4">{formatSpeed(metric.prompt_per_second)}</td>
|
{#if $visibleColumns.includes("model")}
|
||||||
<td class="px-6 py-4">{formatSpeed(metric.tokens_per_second)}</td>
|
<td class="px-6 py-4">{metric.model}</td>
|
||||||
<td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
|
{/if}
|
||||||
<td class="px-6 py-4">
|
{#if $visibleColumns.includes("req_path")}
|
||||||
{#if metric.has_capture}
|
<td class="px-6 py-4">{metric.req_path || "-"}</td>
|
||||||
<button
|
{/if}
|
||||||
onclick={() => viewCapture(metric.id)}
|
{#if $visibleColumns.includes("resp_status_code")}
|
||||||
disabled={loadingCaptureId === metric.id}
|
<td class="px-6 py-4">{metric.resp_status_code || "-"}</td>
|
||||||
class="btn btn--sm"
|
{/if}
|
||||||
>
|
{#if $visibleColumns.includes("resp_content_type")}
|
||||||
{loadingCaptureId === metric.id ? "..." : "View"}
|
<td class="px-6 py-4">{metric.resp_content_type || "-"}</td>
|
||||||
</button>
|
{/if}
|
||||||
{:else}
|
{#if $visibleColumns.includes("cached")}
|
||||||
<span class="text-txtsecondary">-</span>
|
<td class="px-6 py-4">{metric.tokens.cache_tokens > 0 ? metric.tokens.cache_tokens.toLocaleString() : "-"}</td>
|
||||||
{/if}
|
{/if}
|
||||||
</td>
|
{#if $visibleColumns.includes("prompt")}
|
||||||
|
<td class="px-6 py-4">{metric.tokens.input_tokens.toLocaleString()}</td>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("generated")}
|
||||||
|
<td class="px-6 py-4">{metric.tokens.output_tokens.toLocaleString()}</td>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("prompt_speed")}
|
||||||
|
<td class="px-6 py-4">{formatSpeed(metric.tokens.prompt_per_second)}</td>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("gen_speed")}
|
||||||
|
<td class="px-6 py-4">{formatSpeed(metric.tokens.tokens_per_second)}</td>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("duration")}
|
||||||
|
<td class="px-6 py-4">{formatDuration(metric.duration_ms)}</td>
|
||||||
|
{/if}
|
||||||
|
{#if $visibleColumns.includes("capture")}
|
||||||
|
<td class="px-6 py-4">
|
||||||
|
{#if metric.has_capture}
|
||||||
|
<button
|
||||||
|
onclick={() => viewCapture(metric.id)}
|
||||||
|
disabled={loadingCaptureId === metric.id}
|
||||||
|
class="btn btn--sm"
|
||||||
|
>
|
||||||
|
{loadingCaptureId === metric.id ? "..." : "View"}
|
||||||
|
</button>
|
||||||
|
{:else}
|
||||||
|
<span class="text-txtsecondary">-</span>
|
||||||
|
{/if}
|
||||||
|
</td>
|
||||||
|
{/if}
|
||||||
</tr>
|
</tr>
|
||||||
{/each}
|
{/each}
|
||||||
</tbody>
|
{/if}
|
||||||
</table>
|
</tbody>
|
||||||
</div>
|
</table>
|
||||||
{/if}
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<CaptureDialog capture={selectedCapture} open={dialogOpen} onclose={closeDialog} />
|
<CaptureDialog capture={selectedCapture} open={dialogOpen} onclose={closeDialog} />
|
||||||
|
|||||||
@@ -10,49 +10,35 @@
|
|||||||
const viewModeStore = persistentStore<ViewMode>("logviewer-view-mode", "panels");
|
const viewModeStore = persistentStore<ViewMode>("logviewer-view-mode", "panels");
|
||||||
|
|
||||||
let direction = $derived<"horizontal" | "vertical">(
|
let direction = $derived<"horizontal" | "vertical">(
|
||||||
$screenWidth === "xs" || $screenWidth === "sm" ? "vertical" : "horizontal"
|
$screenWidth === "xs" || $screenWidth === "sm" ? "vertical" : "horizontal",
|
||||||
);
|
);
|
||||||
|
|
||||||
function cycleViewMode(): void {
|
|
||||||
const modes: ViewMode[] = ["panels", "proxy", "upstream"];
|
|
||||||
const currentIndex = modes.indexOf($viewModeStore);
|
|
||||||
const nextIndex = (currentIndex + 1) % modes.length;
|
|
||||||
viewModeStore.set(modes[nextIndex]);
|
|
||||||
}
|
|
||||||
|
|
||||||
function getViewModeIcon(mode: ViewMode): string {
|
|
||||||
switch (mode) {
|
|
||||||
case "proxy":
|
|
||||||
return "P";
|
|
||||||
case "upstream":
|
|
||||||
return "U";
|
|
||||||
case "panels":
|
|
||||||
return "⊞";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function getViewModeLabel(mode: ViewMode): string {
|
|
||||||
switch (mode) {
|
|
||||||
case "proxy":
|
|
||||||
return "Proxy";
|
|
||||||
case "upstream":
|
|
||||||
return "Upstream";
|
|
||||||
case "panels":
|
|
||||||
return "Panels";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="flex flex-col h-full w-full gap-2">
|
<div class="flex flex-col h-full w-full gap-2">
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-1">
|
||||||
<button
|
<button
|
||||||
onclick={cycleViewMode}
|
onclick={() => viewModeStore.set("panels")}
|
||||||
class="btn flex items-center gap-2 text-sm"
|
class:btn={true}
|
||||||
title="Toggle view mode"
|
class:bg-primary={$viewModeStore === "panels"}
|
||||||
aria-label="Toggle view mode: {getViewModeLabel($viewModeStore)}"
|
class:text-btn-primary-text={$viewModeStore === "panels"}
|
||||||
>
|
>
|
||||||
<span class="font-mono font-bold">{getViewModeIcon($viewModeStore)}</span>
|
Both
|
||||||
<span>{getViewModeLabel($viewModeStore)}</span>
|
</button>
|
||||||
|
<button
|
||||||
|
onclick={() => viewModeStore.set("proxy")}
|
||||||
|
class:btn={true}
|
||||||
|
class:bg-primary={$viewModeStore === "proxy"}
|
||||||
|
class:text-btn-primary-text={$viewModeStore === "proxy"}
|
||||||
|
>
|
||||||
|
Proxy
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onclick={() => viewModeStore.set("upstream")}
|
||||||
|
class:btn={true}
|
||||||
|
class:bg-primary={$viewModeStore === "upstream"}
|
||||||
|
class:text-btn-primary-text={$viewModeStore === "upstream"}
|
||||||
|
>
|
||||||
|
Upstream
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
import { isNarrow } from "../stores/theme";
|
import { isNarrow } from "../stores/theme";
|
||||||
import { upstreamLogs } from "../stores/api";
|
import { upstreamLogs } from "../stores/api";
|
||||||
import ModelsPanel from "../components/ModelsPanel.svelte";
|
import ModelsPanel from "../components/ModelsPanel.svelte";
|
||||||
import StatsPanel from "../components/StatsPanel.svelte";
|
|
||||||
import LogPanel from "../components/LogPanel.svelte";
|
import LogPanel from "../components/LogPanel.svelte";
|
||||||
import ResizablePanels from "../components/ResizablePanels.svelte";
|
import ResizablePanels from "../components/ResizablePanels.svelte";
|
||||||
|
|
||||||
@@ -14,13 +13,6 @@
|
|||||||
<ModelsPanel />
|
<ModelsPanel />
|
||||||
{/snippet}
|
{/snippet}
|
||||||
{#snippet rightPanel()}
|
{#snippet rightPanel()}
|
||||||
<div class="flex flex-col h-full space-y-4">
|
<LogPanel id="modelsupstream" title="Upstream Logs" logData={$upstreamLogs} />
|
||||||
{#if direction === "horizontal"}
|
|
||||||
<StatsPanel />
|
|
||||||
{/if}
|
|
||||||
<div class="flex-1 min-h-0">
|
|
||||||
<LogPanel id="modelsupstream" title="Upstream Logs" logData={$upstreamLogs} />
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{/snippet}
|
{/snippet}
|
||||||
</ResizablePanels>
|
</ResizablePanels>
|
||||||
|
|||||||
@@ -0,0 +1,508 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from "svelte";
|
||||||
|
import { fetchPerformance } from "../stores/api";
|
||||||
|
import { persistentStore } from "../stores/persistent";
|
||||||
|
import type { SysStat, GpuStat } from "../lib/types";
|
||||||
|
import PerformanceChart from "../components/PerformanceChart.svelte";
|
||||||
|
|
||||||
|
const COLORS = [
|
||||||
|
"#3b82f6",
|
||||||
|
"#ef4444",
|
||||||
|
"#10b981",
|
||||||
|
"#f59e0b",
|
||||||
|
"#8b5cf6",
|
||||||
|
"#ec4899",
|
||||||
|
"#06b6d4",
|
||||||
|
"#84cc16",
|
||||||
|
"#f97316",
|
||||||
|
"#14b8a6",
|
||||||
|
"#a855f7",
|
||||||
|
"#e11d48",
|
||||||
|
"#0ea5e9",
|
||||||
|
"#eab308",
|
||||||
|
"#d946ef",
|
||||||
|
"#22d3ee",
|
||||||
|
];
|
||||||
|
|
||||||
|
const WINDOWS = [
|
||||||
|
{ label: "5 min", ms: 5 * 60 * 1000 },
|
||||||
|
{ label: "15 min", ms: 15 * 60 * 1000 },
|
||||||
|
{ label: "1 hr", ms: 60 * 60 * 1000 },
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
const INTERVALS = [
|
||||||
|
{ label: "Off", ms: 0 },
|
||||||
|
{ label: "5s", ms: 5000 },
|
||||||
|
{ label: "10s", ms: 10000 },
|
||||||
|
{ label: "30s", ms: 30000 },
|
||||||
|
{ label: "60s", ms: 60000 },
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
let selectedWindow = persistentStore("perf-window", 0);
|
||||||
|
let selectedInterval = persistentStore("perf-refresh-interval", 0);
|
||||||
|
let sysData = $state<SysStat[]>([]);
|
||||||
|
let gpuData = $state<GpuStat[]>([]);
|
||||||
|
let refreshing = $state(false);
|
||||||
|
|
||||||
|
let pollTimer: ReturnType<typeof setInterval> | null = null;
|
||||||
|
let visible = $state(true);
|
||||||
|
let mounted = $state(false);
|
||||||
|
|
||||||
|
function cutoffTime(): number {
|
||||||
|
return Date.now() - WINDOWS[$selectedWindow].ms;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDelta(ts: string, refTime: number): string {
|
||||||
|
const diffMs = new Date(ts).getTime() - refTime;
|
||||||
|
const diffSec = Math.round(diffMs / 1000);
|
||||||
|
const absSec = Math.abs(diffSec);
|
||||||
|
const sign = diffSec <= 0 ? "-" : "+";
|
||||||
|
if (absSec < 60) return `${sign}${absSec}s`;
|
||||||
|
const min = Math.floor(absSec / 60);
|
||||||
|
const sec = absSec % 60;
|
||||||
|
if (sec === 0) return `${sign}${min}m`;
|
||||||
|
return `${sign}${min}:${sec.toString().padStart(2, "0")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sysLabels = $derived.by(() => {
|
||||||
|
const stats = filteredSysStats;
|
||||||
|
if (stats.length === 0) return [];
|
||||||
|
const refTime = new Date(stats[stats.length - 1].timestamp).getTime();
|
||||||
|
return stats.map((s) => formatDelta(s.timestamp, refTime));
|
||||||
|
});
|
||||||
|
|
||||||
|
async function loadAll() {
|
||||||
|
const resp = await fetchPerformance();
|
||||||
|
if (resp) {
|
||||||
|
sysData = resp.sys_stats ?? [];
|
||||||
|
gpuData = resp.gpu_stats ?? [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadIncremental() {
|
||||||
|
const lastTs = sysData.length > 0 ? sysData[sysData.length - 1].timestamp : undefined;
|
||||||
|
const resp = await fetchPerformance(lastTs);
|
||||||
|
if (resp) {
|
||||||
|
const newSys = resp.sys_stats ?? [];
|
||||||
|
const newGpu = resp.gpu_stats ?? [];
|
||||||
|
if (newSys.length > 0) {
|
||||||
|
sysData = [...sysData, ...newSys];
|
||||||
|
}
|
||||||
|
if (newGpu.length > 0) {
|
||||||
|
gpuData = [...gpuData, ...newGpu];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function startPolling() {
|
||||||
|
stopPolling();
|
||||||
|
const ms = INTERVALS[$selectedInterval].ms;
|
||||||
|
if (ms <= 0) return;
|
||||||
|
pollTimer = setInterval(() => {
|
||||||
|
if (visible) {
|
||||||
|
loadIncremental();
|
||||||
|
}
|
||||||
|
}, ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
function stopPolling() {
|
||||||
|
if (pollTimer) {
|
||||||
|
clearInterval(pollTimer);
|
||||||
|
pollTimer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleVisibility() {
|
||||||
|
visible = !document.hidden;
|
||||||
|
if (visible && mounted) {
|
||||||
|
loadAll().then(() => startPolling());
|
||||||
|
} else {
|
||||||
|
stopPolling();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleIntervalChange(i: number) {
|
||||||
|
$selectedInterval = i;
|
||||||
|
if (visible && mounted) {
|
||||||
|
startPolling();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function manualRefresh() {
|
||||||
|
refreshing = true;
|
||||||
|
await loadIncremental();
|
||||||
|
refreshing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
return () => {
|
||||||
|
stopPolling();
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
mounted = true;
|
||||||
|
document.addEventListener("visibilitychange", handleVisibility);
|
||||||
|
loadAll().then(() => startPolling());
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
mounted = false;
|
||||||
|
stopPolling();
|
||||||
|
document.removeEventListener("visibilitychange", handleVisibility);
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- System charts (filtered by time window) ---
|
||||||
|
|
||||||
|
const filteredSysStats = $derived(sysData.filter((s) => new Date(s.timestamp).getTime() >= cutoffTime()));
|
||||||
|
|
||||||
|
const cpuDatasets = $derived.by(() => {
|
||||||
|
const stats = filteredSysStats;
|
||||||
|
if (stats.length === 0) return [];
|
||||||
|
const coreCount = stats[0].cpu_util_per_core.length;
|
||||||
|
const datasets = [];
|
||||||
|
for (let i = 0; i < coreCount; i++) {
|
||||||
|
datasets.push({
|
||||||
|
label: `Core ${i}`,
|
||||||
|
data: stats.map((s) => s.cpu_util_per_core[i]),
|
||||||
|
borderColor: COLORS[i % COLORS.length],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return datasets;
|
||||||
|
});
|
||||||
|
|
||||||
|
const memSwapDatasets = $derived.by(() => {
|
||||||
|
const stats = filteredSysStats;
|
||||||
|
if (stats.length === 0) return [];
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
label: "Memory Used %",
|
||||||
|
data: stats.map((s) => (s.mem_used_mb / s.mem_total_mb) * 100),
|
||||||
|
borderColor: "#3b82f6",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "Swap Used %",
|
||||||
|
data: stats.map((s) => (s.swap_total_mb > 0 ? (s.swap_used_mb / s.swap_total_mb) * 100 : 0)),
|
||||||
|
borderColor: "#8b5cf6",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
const latestMemSwap = $derived.by(() => {
|
||||||
|
const stats = filteredSysStats;
|
||||||
|
if (stats.length === 0) return null;
|
||||||
|
const s = stats[stats.length - 1];
|
||||||
|
return {
|
||||||
|
mem_total_mb: s.mem_total_mb,
|
||||||
|
mem_used_mb: s.mem_used_mb,
|
||||||
|
mem_used_pct: ((s.mem_used_mb / s.mem_total_mb) * 100).toFixed(1),
|
||||||
|
swap_total_mb: s.swap_total_mb,
|
||||||
|
swap_used_mb: s.swap_used_mb,
|
||||||
|
swap_used_pct: s.swap_total_mb > 0 ? ((s.swap_used_mb / s.swap_total_mb) * 100).toFixed(1) : null,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
const loadDatasets = $derived.by(() => {
|
||||||
|
const stats = filteredSysStats;
|
||||||
|
if (stats.length === 0) return [];
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
label: "1 min",
|
||||||
|
data: stats.map((s) => s.load_avg_1),
|
||||||
|
borderColor: "#10b981",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "5 min",
|
||||||
|
data: stats.map((s) => s.load_avg_5),
|
||||||
|
borderColor: "#f59e0b",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: "15 min",
|
||||||
|
data: stats.map((s) => s.load_avg_15),
|
||||||
|
borderColor: "#ef4444",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
const netBandwidthDatasets = $derived.by(() => {
|
||||||
|
const stats = filteredSysStats;
|
||||||
|
if (stats.length < 2) return [];
|
||||||
|
|
||||||
|
const ifaceNames = new Set<string>();
|
||||||
|
for (const s of stats) {
|
||||||
|
for (const n of s.net_io ?? []) {
|
||||||
|
ifaceNames.add(n.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const interfaces = [...ifaceNames].sort();
|
||||||
|
if (interfaces.length === 0) return [];
|
||||||
|
|
||||||
|
const datasets: { label: string; data: number[]; borderColor: string }[] = [];
|
||||||
|
let colorIdx = 0;
|
||||||
|
|
||||||
|
for (const iface of interfaces) {
|
||||||
|
const recvData: number[] = [];
|
||||||
|
const sentData: number[] = [];
|
||||||
|
|
||||||
|
for (let i = 1; i < stats.length; i++) {
|
||||||
|
const prev = stats[i - 1];
|
||||||
|
const curr = stats[i];
|
||||||
|
const prevIO = (prev.net_io ?? []).find((n) => n.name === iface);
|
||||||
|
const currIO = (curr.net_io ?? []).find((n) => n.name === iface);
|
||||||
|
|
||||||
|
if (!prevIO || !currIO) {
|
||||||
|
recvData.push(0);
|
||||||
|
sentData.push(0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dtMs = new Date(curr.timestamp).getTime() - new Date(prev.timestamp).getTime();
|
||||||
|
if (dtMs <= 0) {
|
||||||
|
recvData.push(0);
|
||||||
|
sentData.push(0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dtSec = dtMs / 1000;
|
||||||
|
recvData.push((((currIO.bytes_recv - prevIO.bytes_recv) / dtSec) * 8) / 1_000_000);
|
||||||
|
sentData.push((((currIO.bytes_sent - prevIO.bytes_sent) / dtSec) * 8) / 1_000_000);
|
||||||
|
}
|
||||||
|
|
||||||
|
datasets.push({
|
||||||
|
label: `${iface} in`,
|
||||||
|
data: recvData,
|
||||||
|
borderColor: COLORS[colorIdx % COLORS.length],
|
||||||
|
});
|
||||||
|
colorIdx++;
|
||||||
|
datasets.push({
|
||||||
|
label: `${iface} out`,
|
||||||
|
data: sentData,
|
||||||
|
borderColor: COLORS[colorIdx % COLORS.length],
|
||||||
|
});
|
||||||
|
colorIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return datasets;
|
||||||
|
});
|
||||||
|
|
||||||
|
const netBandwidthLabels = $derived.by(() => {
|
||||||
|
const stats = filteredSysStats;
|
||||||
|
if (stats.length < 2) return [];
|
||||||
|
const refTime = new Date(stats[stats.length - 1].timestamp).getTime();
|
||||||
|
return stats.slice(1).map((s) => formatDelta(s.timestamp, refTime));
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- GPU charts (filtered by time window) ---
|
||||||
|
|
||||||
|
const filteredGpuStats = $derived(gpuData.filter((g) => new Date(g.timestamp).getTime() >= cutoffTime()));
|
||||||
|
|
||||||
|
const hasGpuData = $derived(gpuData.length > 0);
|
||||||
|
|
||||||
|
const gpuLabels = $derived.by(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
const labels: string[] = [];
|
||||||
|
const stats = filteredGpuStats;
|
||||||
|
if (stats.length === 0) return [];
|
||||||
|
const refTime = new Date(stats[stats.length - 1].timestamp).getTime();
|
||||||
|
for (const g of stats) {
|
||||||
|
const label = formatDelta(g.timestamp, refTime);
|
||||||
|
if (!seen.has(label)) {
|
||||||
|
seen.add(label);
|
||||||
|
labels.push(label);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return labels;
|
||||||
|
});
|
||||||
|
|
||||||
|
function buildGpuDatasets(
|
||||||
|
stats: GpuStat[],
|
||||||
|
field: keyof Pick<GpuStat, "gpu_util_pct" | "mem_util_pct" | "temp_c" | "vram_temp_c" | "power_draw_w">,
|
||||||
|
) {
|
||||||
|
if (stats.length === 0) return [];
|
||||||
|
|
||||||
|
const byId = new Map<number, { name: string; values: number[] }>();
|
||||||
|
for (const g of stats) {
|
||||||
|
if (!byId.has(g.id)) {
|
||||||
|
byId.set(g.id, { name: g.name, values: [] });
|
||||||
|
}
|
||||||
|
byId.get(g.id)!.values.push(g[field] as number);
|
||||||
|
}
|
||||||
|
|
||||||
|
const datasets = [];
|
||||||
|
let colorIdx = 0;
|
||||||
|
for (const [id, entry] of byId) {
|
||||||
|
datasets.push({
|
||||||
|
label: entry.name || `GPU ${id}`,
|
||||||
|
data: entry.values,
|
||||||
|
borderColor: COLORS[colorIdx % COLORS.length],
|
||||||
|
});
|
||||||
|
colorIdx++;
|
||||||
|
}
|
||||||
|
return datasets;
|
||||||
|
}
|
||||||
|
|
||||||
|
const gpuUtilDatasets = $derived(buildGpuDatasets(filteredGpuStats, "gpu_util_pct"));
|
||||||
|
const gpuMemDatasets = $derived(buildGpuDatasets(filteredGpuStats, "mem_util_pct"));
|
||||||
|
const gpuTempDatasets = $derived(buildGpuDatasets(filteredGpuStats, "temp_c"));
|
||||||
|
const gpuVramTempDatasets = $derived(buildGpuDatasets(filteredGpuStats, "vram_temp_c"));
|
||||||
|
const gpuPowerDatasets = $derived(buildGpuDatasets(filteredGpuStats, "power_draw_w"));
|
||||||
|
const hasVramTemp = $derived(filteredGpuStats.some((g) => g.vram_temp_c > 0));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="space-y-6">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<h2 class="text-xl font-semibold text-txtmain">Performance (Experimental)</h2>
|
||||||
|
<div class="flex items-center gap-4">
|
||||||
|
<div class="flex items-center gap-1">
|
||||||
|
{#each WINDOWS as win, i}
|
||||||
|
<button
|
||||||
|
class="btn btn--sm"
|
||||||
|
class:bg-primary={$selectedWindow === i}
|
||||||
|
class:text-btn-primary-text={$selectedWindow === i}
|
||||||
|
onclick={() => ($selectedWindow = i)}
|
||||||
|
>
|
||||||
|
{win.label}
|
||||||
|
</button>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center gap-1">
|
||||||
|
<span class="text-xs text-txtsecondary mr-1">Refresh:</span>
|
||||||
|
{#each INTERVALS as intv, i}
|
||||||
|
<button
|
||||||
|
class="btn btn--sm"
|
||||||
|
class:bg-primary={$selectedInterval === i}
|
||||||
|
class:text-btn-primary-text={$selectedInterval === i}
|
||||||
|
onclick={() => handleIntervalChange(i)}
|
||||||
|
>
|
||||||
|
{intv.label}
|
||||||
|
</button>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
<button class="btn btn--sm p-1" title="Refresh" onclick={manualRefresh} disabled={refreshing}>
|
||||||
|
<svg
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="2"
|
||||||
|
stroke-linecap="round"
|
||||||
|
stroke-linejoin="round"
|
||||||
|
class="w-4 h-4"
|
||||||
|
class:animate-spin={refreshing}
|
||||||
|
>
|
||||||
|
<path d="M21 12a9 9 0 0 0-9-9 9.75 9.75 0 0 0-6.74 2.74L3 8" />
|
||||||
|
<path d="M3 3v5h5" />
|
||||||
|
<path d="M3 12a9 9 0 0 0 9 9 9.75 9.75 0 0 0 6.74-2.74L21 16" />
|
||||||
|
<path d="M16 16h5v5" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<p class="text-sm text-txtsecondary">
|
||||||
|
This is an experimental feature. Please see <a class="underline hover:text-txtmain" href="https://github.com/mostlygeek/llama-swap/issues/596">issue 596</a> for instructions.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<!-- GPU Section -->
|
||||||
|
<section class="space-y-4">
|
||||||
|
<h3 class="text-lg font-medium text-txtmain">GPU</h3>
|
||||||
|
{#if !hasGpuData}
|
||||||
|
<p class="text-txtsecondary card p-4">No GPU data available</p>
|
||||||
|
{:else}
|
||||||
|
<div class="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||||
|
<PerformanceChart
|
||||||
|
title="GPU Utilization (%)"
|
||||||
|
labels={gpuLabels}
|
||||||
|
datasets={gpuUtilDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yMax={100}
|
||||||
|
yLabel="%"
|
||||||
|
/>
|
||||||
|
<PerformanceChart
|
||||||
|
title="GPU Memory Utilization (%)"
|
||||||
|
labels={gpuLabels}
|
||||||
|
datasets={gpuMemDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yMax={100}
|
||||||
|
yLabel="%"
|
||||||
|
/>
|
||||||
|
<PerformanceChart
|
||||||
|
title="GPU Temperature (°C)"
|
||||||
|
labels={gpuLabels}
|
||||||
|
datasets={gpuTempDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yLabel="°C"
|
||||||
|
/>
|
||||||
|
{#if hasVramTemp}
|
||||||
|
<PerformanceChart
|
||||||
|
title="GPU VRAM Temperature (°C)"
|
||||||
|
labels={gpuLabels}
|
||||||
|
datasets={gpuVramTempDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yLabel="°C"
|
||||||
|
/>
|
||||||
|
{/if}
|
||||||
|
<PerformanceChart
|
||||||
|
title="GPU Power Draw (W)"
|
||||||
|
labels={gpuLabels}
|
||||||
|
datasets={gpuPowerDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yLabel="W"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- System Section -->
|
||||||
|
<section class="space-y-4">
|
||||||
|
<h3 class="text-lg font-medium text-txtmain">System</h3>
|
||||||
|
<div class="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||||
|
<PerformanceChart
|
||||||
|
title="CPU Utilization (%)"
|
||||||
|
labels={sysLabels}
|
||||||
|
datasets={cpuDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yMax={100}
|
||||||
|
yLabel="%"
|
||||||
|
showLegend={false}
|
||||||
|
/>
|
||||||
|
<div>
|
||||||
|
<PerformanceChart
|
||||||
|
title="Memory & Swap Usage (%)"
|
||||||
|
labels={sysLabels}
|
||||||
|
datasets={memSwapDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yMax={100}
|
||||||
|
yLabel="%"
|
||||||
|
/>
|
||||||
|
{#if latestMemSwap}
|
||||||
|
<div class="flex items-center justify-center gap-4 text-xs text-txtsecondary mt-1 px-4">
|
||||||
|
<span
|
||||||
|
>Mem: <span class="text-txtmain font-medium"
|
||||||
|
>{latestMemSwap.mem_used_mb.toLocaleString()} / {latestMemSwap.mem_total_mb.toLocaleString()} MB ({latestMemSwap.mem_used_pct}%)</span
|
||||||
|
></span
|
||||||
|
>
|
||||||
|
{#if latestMemSwap.swap_used_pct !== null}
|
||||||
|
<span
|
||||||
|
>Swap: <span class="text-txtmain font-medium"
|
||||||
|
>{latestMemSwap.swap_used_mb.toLocaleString()} / {latestMemSwap.swap_total_mb.toLocaleString()} MB ({latestMemSwap.swap_used_pct}%)</span
|
||||||
|
></span
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
<PerformanceChart title="Load Average" labels={sysLabels} datasets={loadDatasets} yMin={0} />
|
||||||
|
{#if netBandwidthDatasets.length > 0}
|
||||||
|
<PerformanceChart
|
||||||
|
title="Network Bandwidth (Mbit/s)"
|
||||||
|
labels={netBandwidthLabels}
|
||||||
|
datasets={netBandwidthDatasets}
|
||||||
|
yMin={0}
|
||||||
|
yLabel="Mbit/s"
|
||||||
|
showLegend={false}
|
||||||
|
/>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
@@ -1,5 +1,14 @@
|
|||||||
import { writable } from "svelte/store";
|
import { writable } from "svelte/store";
|
||||||
import type { Model, Metrics, VersionInfo, LogData, APIEventEnvelope, ReqRespCapture, InFlightStats } from "../lib/types";
|
import type {
|
||||||
|
Model,
|
||||||
|
ActivityLogEntry,
|
||||||
|
VersionInfo,
|
||||||
|
LogData,
|
||||||
|
APIEventEnvelope,
|
||||||
|
ReqRespCapture,
|
||||||
|
InFlightStats,
|
||||||
|
PerformanceResponse,
|
||||||
|
} from "../lib/types";
|
||||||
import { connectionState } from "./theme";
|
import { connectionState } from "./theme";
|
||||||
|
|
||||||
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
||||||
@@ -8,7 +17,7 @@ const LOG_LENGTH_LIMIT = 1024 * 100; /* 100KB of log data */
|
|||||||
export const models = writable<Model[]>([]);
|
export const models = writable<Model[]>([]);
|
||||||
export const proxyLogs = writable<string>("");
|
export const proxyLogs = writable<string>("");
|
||||||
export const upstreamLogs = writable<string>("");
|
export const upstreamLogs = writable<string>("");
|
||||||
export const metrics = writable<Metrics[]>([]);
|
export const metrics = writable<ActivityLogEntry[]>([]);
|
||||||
export const inFlightRequests = writable<number>(0);
|
export const inFlightRequests = writable<number>(0);
|
||||||
export const versionInfo = writable<VersionInfo>({
|
export const versionInfo = writable<VersionInfo>({
|
||||||
build_date: "unknown",
|
build_date: "unknown",
|
||||||
@@ -62,7 +71,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
const newModels = JSON.parse(message.data) as Model[];
|
const newModels = JSON.parse(message.data) as Model[];
|
||||||
// Sort models by name and id
|
// Sort models by name and id
|
||||||
newModels.sort((a, b) => {
|
newModels.sort((a, b) => {
|
||||||
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric : true} );
|
return (a.name + a.id).localeCompare(b.name + b.id, undefined, { numeric: true });
|
||||||
});
|
});
|
||||||
models.set(newModels);
|
models.set(newModels);
|
||||||
break;
|
break;
|
||||||
@@ -82,7 +91,7 @@ export function enableAPIEvents(enabled: boolean): void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
case "metrics": {
|
case "metrics": {
|
||||||
const newMetrics = JSON.parse(message.data) as Metrics[];
|
const newMetrics = JSON.parse(message.data) as ActivityLogEntry[];
|
||||||
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
|
metrics.update((prevMetrics) => [...newMetrics, ...prevMetrics]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -196,3 +205,17 @@ export async function getCapture(id: number): Promise<ReqRespCapture | null> {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function fetchPerformance(after?: string): Promise<PerformanceResponse | null> {
|
||||||
|
try {
|
||||||
|
const url = after ? `/api/performance?after=${encodeURIComponent(after)}` : "/api/performance";
|
||||||
|
const response = await fetch(url);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP error! status: ${response.status}`);
|
||||||
|
}
|
||||||
|
return await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to fetch performance data:", error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,10 +2,50 @@ import { writable, derived } from "svelte/store";
|
|||||||
import { persistentStore } from "./persistent";
|
import { persistentStore } from "./persistent";
|
||||||
import type { ScreenWidth } from "../lib/types";
|
import type { ScreenWidth } from "../lib/types";
|
||||||
|
|
||||||
|
export type ThemeMode = "light" | "dark" | "system";
|
||||||
|
|
||||||
|
function getInitialThemeMode(): ThemeMode {
|
||||||
|
if (typeof window !== "undefined") {
|
||||||
|
try {
|
||||||
|
const saved = localStorage.getItem("theme");
|
||||||
|
if (saved !== null) {
|
||||||
|
const oldTheme = JSON.parse(saved);
|
||||||
|
localStorage.removeItem("theme");
|
||||||
|
return oldTheme ? "dark" : "light";
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error("Error parsing stored theme", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "system";
|
||||||
|
}
|
||||||
|
|
||||||
// Persistent stores
|
// Persistent stores
|
||||||
export const isDarkMode = persistentStore<boolean>("theme", false);
|
export const themeMode = persistentStore<ThemeMode>("theme-mode", getInitialThemeMode());
|
||||||
export const appTitle = persistentStore<string>("app-title", "llama-swap");
|
export const appTitle = persistentStore<string>("app-title", "llama-swap");
|
||||||
|
|
||||||
|
const prefersDarkQuery = "(prefers-color-scheme: dark)";
|
||||||
|
|
||||||
|
function getSystemPrefersDark(): boolean {
|
||||||
|
return (
|
||||||
|
typeof window !== "undefined" &&
|
||||||
|
typeof window.matchMedia === "function" &&
|
||||||
|
window.matchMedia(prefersDarkQuery).matches
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Internal store for the raw OS dark preference
|
||||||
|
const systemPrefersDark = writable(getSystemPrefersDark());
|
||||||
|
|
||||||
|
// Derived store for actual dark mode state
|
||||||
|
export const isDarkMode = derived(
|
||||||
|
[themeMode, systemPrefersDark],
|
||||||
|
([$themeMode, $systemPrefersDark]) => {
|
||||||
|
if ($themeMode === "system") return $systemPrefersDark;
|
||||||
|
return $themeMode === "dark";
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
// Non-persistent stores
|
// Non-persistent stores
|
||||||
export const screenWidth = writable<ScreenWidth>("md");
|
export const screenWidth = writable<ScreenWidth>("md");
|
||||||
export const connectionState = writable<"connected" | "connecting" | "disconnected">("disconnected");
|
export const connectionState = writable<"connected" | "connecting" | "disconnected">("disconnected");
|
||||||
@@ -17,9 +57,15 @@ export const isNarrow = derived(screenWidth, ($screenWidth) => {
|
|||||||
|
|
||||||
// Function to toggle theme
|
// Function to toggle theme
|
||||||
export function toggleTheme(): void {
|
export function toggleTheme(): void {
|
||||||
isDarkMode.update((current) => !current);
|
themeMode.update((current) => {
|
||||||
|
if (current === "system") return "light";
|
||||||
|
if (current === "light") return "dark";
|
||||||
|
return "system";
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Function to check and update screen width
|
// Function to check and update screen width
|
||||||
export function checkScreenWidth(): void {
|
export function checkScreenWidth(): void {
|
||||||
const innerWidth = window.innerWidth;
|
const innerWidth = window.innerWidth;
|
||||||
@@ -51,3 +97,17 @@ export function initScreenWidth(): () => void {
|
|||||||
window.removeEventListener("resize", checkScreenWidth);
|
window.removeEventListener("resize", checkScreenWidth);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize system theme listener
|
||||||
|
export function initSystemThemeListener(): () => void {
|
||||||
|
if (typeof window === "undefined" || typeof window.matchMedia !== "function") return () => {};
|
||||||
|
|
||||||
|
const mediaQuery = window.matchMedia(prefersDarkQuery);
|
||||||
|
systemPrefersDark.set(mediaQuery.matches);
|
||||||
|
const handleChange = (e: MediaQueryListEvent) => {
|
||||||
|
systemPrefersDark.set(e.matches);
|
||||||
|
};
|
||||||
|
|
||||||
|
mediaQuery.addEventListener("change", handleChange);
|
||||||
|
return () => mediaQuery.removeEventListener("change", handleChange);
|
||||||
|
}
|
||||||
|
|||||||
@@ -26,13 +26,15 @@ export default defineConfig({
|
|||||||
assetsDir: "assets",
|
assetsDir: "assets",
|
||||||
},
|
},
|
||||||
server: {
|
server: {
|
||||||
proxy: {
|
// yes very insecure but who's running this thing
|
||||||
"/api": "http://localhost:8080", // Proxy API calls to Go backend during development
|
// on the public internet for dev?! haha.
|
||||||
"/logs": "http://localhost:8080",
|
host: "0.0.0.0",
|
||||||
"/upstream": "http://localhost:8080",
|
allowedHosts: true,
|
||||||
"/unload": "http://localhost:8080",
|
proxy: Object.fromEntries(
|
||||||
"/v1": "http://localhost:8080",
|
["/api", "/logs", "/upstream", "/unload", "/v1", "/sdapi"].map((path) => [
|
||||||
"/sdapi": "http://localhost:8080",
|
path,
|
||||||
},
|
process.env.LLAMA_SWAP_URL ?? "http://localhost:8080",
|
||||||
|
]),
|
||||||
|
),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user